mirror of
https://gitee.com/mindspore/mindformers.git
synced 2025-12-06 11:29:59 +08:00
bugfix: api doc bugfix
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
mindformers.core.CrossEntropyLoss
|
||||
=================================
|
||||
|
||||
.. py:class:: mindformers.core.CrossEntropyLoss(parallel_config=default_dpmp_config, check_for_nan_in_loss_and_grad=False, monitor_device_local_loss=False, calculate_per_token_loss=False, seq_split_num=1, **kwargs)
|
||||
.. py:class:: mindformers.core.CrossEntropyLoss(parallel_config=default_dpmp_config, calculate_per_token_loss=False, seq_split_num=1, **kwargs)
|
||||
|
||||
计算预测值和目标值之间的交叉熵损失。
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
mindformers.core.MFLossMonitor
|
||||
==============================
|
||||
|
||||
.. py:class:: mindformers.core.MFLossMonitor(learning_rate=None, per_print_times=1, micro_batch_num=1, micro_batch_interleave_num=1, origin_epochs=None, dataset_size=None, initial_epoch=0, initial_step=0, global_batch_size=0, gradient_accumulation_steps=1, check_for_nan_in_loss_and_grad=False, calculate_per_token_loss=False)
|
||||
.. py:class:: mindformers.core.MFLossMonitor(learning_rate=None, per_print_times=1, micro_batch_num=1, micro_batch_interleave_num=1, origin_epochs=None, dataset_size=None, initial_epoch=0, initial_step=0, global_batch_size=0, gradient_accumulation_steps=1, check_for_nan_in_loss_and_grad=False, calculate_per_token_loss=False, print_separate_loss=False)
|
||||
|
||||
监控训练过程中loss等相关参数的回调函数。
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
mindformers.wrapper.MFPipelineWithLossScaleCell
|
||||
===============================================
|
||||
|
||||
.. py:class:: mindformers.wrapper.MFPipelineWithLossScaleCell(network, optimizer, use_clip_grad=True, max_grad_norm=1.0, scale_sense=1.0, micro_batch_num=1, local_norm=False, calculate_per_token_loss=False, global_norm_spike_threshold=1.0, use_skip_data_by_global_norm=False, **kwargs)
|
||||
.. py:class:: mindformers.wrapper.MFPipelineWithLossScaleCell(network, optimizer, use_clip_grad=True, max_grad_norm=1.0, scale_sense=1.0, micro_batch_num=1, local_norm=False, calculate_per_token_loss=False, global_norm_spike_threshold=1.0, use_skip_data_by_global_norm=False, print_separate_loss=False, **kwargs)
|
||||
|
||||
为MindFormers的单步训练单元扩充流水线并行的损失缩放功能。
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
mindformers.wrapper.MFTrainOneStepCell
|
||||
======================================
|
||||
|
||||
.. py:class:: mindformers.wrapper.MFTrainOneStepCell(network, optimizer, use_clip_grad=False, max_grad_norm=1.0, scale_sense=1.0, local_norm=False, calculate_per_token_loss=False, global_norm_spike_threshold=1.0, use_skip_data_by_global_norm=False, **kwargs)
|
||||
.. py:class:: mindformers.wrapper.MFTrainOneStepCell(network, optimizer, use_clip_grad=False, max_grad_norm=1.0, scale_sense=1.0, local_norm=False, calculate_per_token_loss=False, global_norm_spike_threshold=1.0, use_skip_data_by_global_norm=False, print_separate_loss=False, **kwargs)
|
||||
|
||||
MindFormers的单步训练包装接口。
|
||||
使用损失缩放、梯度裁剪、梯度累积、指数移动平均等策略进行网络训练。
|
||||
|
||||
Reference in New Issue
Block a user