!3113 [pytorch][bugfix]fix some bug for icsl

Merge pull request !3113 from jzh/210_uicsl
2025-12-06 11:28:59 +08:00 · 2025-08-08 07:13:20 +00:00
parent a4567e5b65
commit 6785f325f3
49 changed files with 225 additions and 102 deletions
--- a/SECURITYNOTE.md
+++ b/SECURITYNOTE.md
@@ -46,7 +46,9 @@

 1. 建议用户结合运行资源状况编写对应训练脚本。若训练脚本与资源状况不匹配，如数据集加载内存大小超出内存容量限制、训练脚本在本地生成数据超过磁盘空间大小等情况，可能引发错误并导致进程意外退出。
 2. MindSpeed-LLM内部用到了pytorch,可能会因为版本不匹配导致运行错误，具体可参考pytorch[安全声明](https://gitee.com/ascend/pytorch#%E5%AE%89%E5%85%A8%E5%A3%B0%E6%98%8E)。
-
+3. 出于安全考虑torch.load目前加载都采用了weights_only=True的配置，请用户根据需要进行配置调整。
+4. HumanEval使用了subprocess.run,存在安全风险，为了不影响功能正常使用，做了一些安全校验规避，请用户根据需要自行构建黑名单，完善安全问题。
+5. 因为安全问题，需要设置trust_remote_code=False，无法远程加载Transformer官方仓库未支持的开源模型，如需要，请手动配置--trust-remote-code参量。

 ## 公网地址声明

--- a/configs/dangerous_shell.json
+++ b/configs/dangerous_shell.json
@@ -0,0 +1,9 @@
+[
+    r"os\.(system|popen|exec|setuid|setgid|chroot)\s*\(",
+    r"subprocess\.(run|Popen|call)\s*\(",
+    r"pty\.spawn\s*\(",
+    r"(requests|urllib|socket|httpx)\.(get|post|urlopen|connect)\s*\(",
+    r"open\s*\(",
+    r"os\.(remove|rename|chmod|chown|mkdir)\s*\(",
+    r"(eval|exec|__import__|globals|locals)\s*\("
+]
--- a/convert_ckpt.py
+++ b/convert_ckpt.py
@@ -76,6 +76,10 @@ def main():
                        help='Enable only save lora-checkpoint to hf')
    parser.add_argument('--load-checkpoint-loosely', action='store_true', default=False,
                       help='Enable loading checkpoint not strictly.')
+    parser.add_argument('--trust-remote-code',
+                       action='store_true',
+                       default=False,
+                       help='enable trust-remote-code for transformer to load model')
    known_args, _ = parser.parse_known_args()


--- a/evaluation.py
+++ b/evaluation.py
@@ -380,7 +380,7 @@ def main():
        model_provider=model_provider,
        pretrained_model_name_or_path=args.load
    )
-    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name_or_path, trust_remote_code=True, local_files_only=True)
+    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name_or_path, trust_remote_code=False, local_files_only=True)

    rank = dist.get_rank()
    if 'cmmlu' in args.task:
--- a/examples/mcore/deepseek3/convert_ckpt_deepseek3.py
+++ b/examples/mcore/deepseek3/convert_ckpt_deepseek3.py
@@ -12,7 +12,7 @@ import safetensors
 import torch
 import safetensors.torch
 import bitsandbytes as bnb
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger.basicConfig(format="")
 logger.getLogger().setLevel(logger.INFO)

@@ -75,8 +75,8 @@ class CkptConvert(object):
        self.vpp_stage = vpp_stage
        if vpp_stage is not None:
            self.vpp_size = self.num_layers // self.pp_size // self.vpp_stage
-        self.hf_model_path = hf_model_path
-        self.mg_save_path = mg_save_path
+        self.hf_model_path = standardize_path(hf_model_path, check_read=True)
+        self.mg_save_path = standardize_path(mg_save_path, check_write=True)
        self.num_layer_list = num_layer_list
        self.noop_layers = noop_layers
        self.moe_grouped_gemm = moe_grouped_gemm
@@ -138,7 +138,7 @@ class CkptConvert(object):
        """megatron model path"""
        iter_mg_path = os.path.join(mg_path, "iter_0000001")
        if not os.path.exists(mg_path):
-            os.makedirs(mg_path, exist_ok=True)
+            os.makedirs(mg_path, mode=0o750, exist_ok=True)

        with open(os.path.join(mg_path, "latest_checkpointed_iteration.txt"), 'w') as f:
            f.write("1")
@@ -786,7 +786,7 @@ class CkptConvert(object):
                    for tp_rank in range(self.tp_size):
                        save_prefix = self.generate_mg_weights_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=ep_rank)
                        parallel_save_path = os.path.join(save_model_path, save_prefix)
-                        os.makedirs(parallel_save_path)
+                        os.makedirs(parallel_save_path, mode=0o750, exist_ok=True)
                        save_file_name = os.path.join(parallel_save_path, "model_optim_rng.pt")
                        logger.info(f"Saving to {save_file_name}")

@@ -845,7 +845,7 @@ class CkptConvert(object):
                    for tp_rank in range(self.tp_size):
                        save_prefix = self.generate_mg_weights_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=ep_rank)
                        parallel_save_path = os.path.join(save_model_path, save_prefix)
-                        os.makedirs(parallel_save_path, exist_ok=True)
+                        os.makedirs(parallel_save_path, mode=0o750, exist_ok=True)
                        save_file_name = os.path.join(parallel_save_path, "model_optim_rng.pt")
                        logger.info(f"Saving to {save_file_name}")
                        model_dict = {"checkpoint_version": 3.0, "iteration": 1}
--- a/examples/mcore/deepseek3/convert_ckpt_deepseek3_mcore2hf.py
+++ b/examples/mcore/deepseek3/convert_ckpt_deepseek3_mcore2hf.py
@@ -14,7 +14,7 @@ import tqdm
 import torch
 import torch_npu
 import safetensors.torch
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger.basicConfig(format="")
 logger.getLogger().setLevel(logger.INFO)

@@ -33,7 +33,7 @@ GLOBAL_LM_HEAD_WEIGHTS = None

 def load_data(file_path):
    logger.info(f"Loading the checkpoint from {file_path}.")
-    return torch.load(file_path, map_location='cpu', weights_only=False)
+    return torch.load(file_path, map_location='cpu', weights_only=True)


 def tensor_memory_size(tensor):
@@ -73,15 +73,15 @@ class MgCkptConvert(object):
        self.ep_size = ep_size
        self.vpp_stage = vpp_stage

-        self.mg_model_path = mg_model_path
-        self.hf_save_path = hf_save_path
+        self.mg_model_path = standardize_path(mg_model_path, check_read=True)
+        self.hf_save_path = standardize_path(hf_save_path, check_write=True)
        self.lora_model_path = lora_model_path
        self.iter_path = self.get_iter_path(self.mg_model_path)
        if self.lora_model_path is not None:
            self.lora_iter_path = self.get_iter_path(self.lora_model_path)

        if not os.path.exists(self.hf_save_path):
-            os.makedirs(self.hf_save_path)
+            os.makedirs(self.hf_save_path, mode=0o750, exist_ok=True)

        self.num_layers = num_layers
        self.noop_layers = noop_layers
@@ -194,7 +194,7 @@ class MgCkptConvert(object):

        directory = os.path.join(ckpt_path, f'iter_{iteration:07d}')

-        os.makedirs(directory, exist_ok=True)
+        os.makedirs(directory, mode=0o750, exist_ok=True)

        return directory

--- a/mindspeed_llm/core/datasets/gpt_dataset.py
+++ b/mindspeed_llm/core/datasets/gpt_dataset.py
@@ -16,6 +16,7 @@ from megatron.core.datasets.gpt_dataset import (_build_document_index,
                                                _build_shuffle_index
                                                )
 from mindspeed_llm.tasks.utils.error_utils import GPTDatasetSampleIndexError
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 from .blended_megatron_dataset_builder import need_to_build_dataset

 logger = logging.getLogger(__name__)
@@ -65,11 +66,13 @@ def _build_document_sample_shuffle_indices(
    Returns:
        Tuple[numpy.ndarray, numpy.ndarray]: The document index, the sample index, and the shuffle index
    """
+
    path_to_cache = self.config.path_to_cache
    if path_to_cache is None and not self.config.mock:
        path_to_cache = os.path.join(
            self.dataset.path_prefix, "cache", f"{type(self).__name__}_indices"
        )
+    path_to_cache = standardize_path(path_to_cache, check_write=True)

    # start of megatron_adaptation,
    # here we change from (class)GPTDataset._build_document_sample_shuffle_indices
@@ -196,8 +199,7 @@ def _build_document_sample_shuffle_indices(
        )

        if any(sample_index[:, 0] < 0):
-            _url = "https://gitee.com/ascend/MindSpeed-LLM/wikis/megatron%20data%20helpers%E5%8F%AF%E8%83%BD%E5%BC%95%E5%85%A5%E7%9A%84%E9%97%AE%E9%A2%98"
-            raise GPTDatasetSampleIndexError(f"Bad sample index. Visit {_url} for more information")
+            raise GPTDatasetSampleIndexError(f"Bad sample index.")

        # Build the shuffle index
        if separate_final_epoch:
@@ -210,7 +212,7 @@ def _build_document_sample_shuffle_indices(
            )

        if path_to_cache:
-            os.makedirs(path_to_cache, exist_ok=True)
+            os.makedirs(path_to_cache, mode=0o750, exist_ok=True)
            # Write the description
            with open(path_to_description, "wt") as writer:
                writer.write(self.unique_description)
@@ -256,8 +258,7 @@ def _build_document_sample_shuffle_indices(
    sample_index = numpy.load(path_to_sample_index, allow_pickle=True, mmap_mode='r')

    if any(sample_index[:, 0] < 0):
-        _url = "https://gitee.com/ascend/MindSpeed-LLM/wikis/megatron%20data%20helpers%E5%8F%AF%E8%83%BD%E5%BC%95%E5%85%A5%E7%9A%84%E9%97%AE%E9%A2%98"
-        raise GPTDatasetSampleIndexError(f"Bad sample index. Visit {_url} for more information")
+        raise GPTDatasetSampleIndexError(f"Bad sample index.")

    t_end = time.time()
    log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")
--- a/mindspeed_llm/core/parallel_state.py
+++ b/mindspeed_llm/core/parallel_state.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Expert parallel groups."""
-
+import os
 import sys
 from functools import wraps
 from typing import Optional
@@ -24,7 +24,7 @@ import torch_npu
 import megatron
 from megatron.core.parallel_state import get_context_parallel_world_size, get_nccl_options
 from mindspeed.core.parallel_state import hccl_buffer_auto_adaptive, parse_hccl_buffer_string
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 _EXPERT_PARALLEL_GROUP = None
 _MPU_EXPERT_MODEL_PARALLEL_RANK = None
 _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = None
@@ -48,6 +48,8 @@ def initialize_model_parallel_decorator(initialize_model_parallel):
        from megatron.training.utils import print_rank_0
        timeout = timedelta(minutes=distributed_timeout_minutes)

+        nccl_communicator_config_path = standardize_path(nccl_communicator_config_path, check_read=True)
+
        if pipeline_model_parallel_size == 2 and virtual_pipeline_model_parallel_size is not None:
            megatron.core.parallel_state._VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK = 0
            megatron.core.parallel_state._VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = virtual_pipeline_model_parallel_size
@@ -261,6 +263,8 @@ def initialize_model_parallel_wrapper(initialize_model_parallel):
        if args.hccl_group_buffer is not None:
            parse_hccl_buffer_string(args.hccl_group_buffer)

+        nccl_communicator_config_path = standardize_path(nccl_communicator_config_path, check_read=True)
+
        data_parallel_size = 1  # dp 1
        rank = torch.distributed.get_rank()
        all_ep_groups = []
--- a/mindspeed_llm/core/pipeline_parallel/dualpipe/adaptor.py
+++ b/mindspeed_llm/core/pipeline_parallel/dualpipe/adaptor.py
@@ -15,7 +15,7 @@ try:
    from mindspeed_llm.core.pipeline_parallel.dualpipe.gpt_model import gpt_model_forward_backward_overlaping
    from mindspeed_llm.core.pipeline_parallel.dualpipe.MTP_overlap import forward_overlap
 except ImportError:
-    pass
+    print("[warning] failed import dualpipe modules, not support dualpipe")

 from mindspeed_llm.core.transformer.multi_token_prediction import MultiTokenPredictionLayer, MTPLossAutoScaler

--- a/mindspeed_llm/mindspore/training/arguments.py
+++ b/mindspeed_llm/mindspore/training/arguments.py
@@ -44,4 +44,8 @@ def _add_moba_args(parser):
    group.add_argument('--moba-calc-method', type=int, default=1,
                       help='moba calculation method. 1: naive attention with naive attention operations; 2: use flash'
                            'attention. default: 1')
+    group.add_argument('--trust-remote-code',
+                       action='store_true',
+                       default=False,
+                       help='enable trust-remote-code for transformer to load model')
    return parser
--- a/mindspeed_llm/tasks/checkpoint/convert_ckpt_mamba2.py
+++ b/mindspeed_llm/tasks/checkpoint/convert_ckpt_mamba2.py
@@ -8,6 +8,7 @@ import logging as logger
 import argparse
 import torch
 import safetensors.torch
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger.basicConfig(format="")
 logger.getLogger().setLevel(logger.INFO)

@@ -67,7 +68,7 @@ class CheckpointConverter:

            try:
                if filename.endswith(".bin"):
-                    cur_weights = torch.load(file_path, map_location=torch.device('cpu'))
+                    cur_weights = torch.load(file_path, map_location=torch.device('cpu'), weights_only=True)
                    model_dict.update(cur_weights)
                    print(f"Successfully loaded: {filename}")
                    loaded = True
@@ -349,7 +350,7 @@ class CheckpointConverter:

        out_iteration, input_model_dir, src_model_file = self.get_latest_checkpoint_model_file(self.args.load_dir)

-        src_model = torch.load(src_model_file, map_location='cpu', weights_only=False)
+        src_model = torch.load(src_model_file, map_location='cpu', weights_only=True)

        logger.info(f"Sample model {src_model_file} is loaded.\n")
        return out_iteration, input_model_dir, src_model
@@ -381,7 +382,7 @@ class CheckpointConverter:
                    input_pp_rank
                )

-                tp_models.append(torch.load(model_file, map_location='cpu', weights_only=False))
+                tp_models.append(torch.load(model_file, map_location='cpu', weights_only=True))
                logger.info(f"Model {model_file} is loaded.")

            if input_tp_rank > 1:
@@ -475,7 +476,7 @@ class CheckpointConverter:
            dir_name += f"_{pp_idx:03d}"

        save_path = os.path.join(args.save_dir, f"iter_{out_iteration:07d}", dir_name)
-        os.makedirs(save_path, exist_ok=True)
+        os.makedirs(save_path, mode=0o750, exist_ok=True)

        return os.path.join(save_path, filename)

@@ -589,6 +590,8 @@ def run():

    args, _ = parser.parse_known_args()

+    args.load_dir = standardize_path(args.load_dir, check_read=True)
+
    converter = CheckpointConverter(args)
    converter.main()

--- a/mindspeed_llm/tasks/checkpoint/convert_param.py
+++ b/mindspeed_llm/tasks/checkpoint/convert_param.py
@@ -18,8 +18,8 @@ import json
 import os
 import stat
 import time
-
 import torch
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path


 def get_json_from_file(json_file):
@@ -123,10 +123,11 @@ class ConvertBase:
        self.mg_latest_ckpt_file_name = "latest_checkpointed_iteration.txt"

        # hf model index_file
-        self.model_index_file = os.path.join(
-            self.args_cmd.hf_dir,
-            "pytorch_model.bin.index.json") if self.args_cmd.model_index_file is None \
+        index_file = os.path.join(self.args_cmd.hf_dir, "pytorch_model.bin.index.json")
+        self.model_index_file = index_file if self.args_cmd.model_index_file is None \
            else self.args_cmd.model_index_file
+        self.model_index_file = standardize_path(self.model_index_file, check_read=True)
+
        self.model_index_map = get_json_from_file(self.model_index_file)
        # hf model config_file
        self.config_file = os.path.join(
@@ -217,7 +218,7 @@ class ConvertBase:
                        hf_model[k] = f.get_tensor(k)
            elif str(model_files).endswith(".bin"):
                print(f"load file : {file_path}")
-                hf_model = torch.load(file_path, map_location='cpu', weights_only=False)
+                hf_model = torch.load(file_path, map_location='cpu', weights_only=True)
            else:
                raise ValueError(f"unsupported model file format. {os.path.splitext(hf_model)[-1]} ")
            return hf_model
@@ -550,17 +551,18 @@ class ConvertHf2Mg(ConvertBase):
                                                            ep_rank=ep_rank)
                        save_dir = self.get_mg_model_save_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=ep_rank,
                                                              iteration=iteration)
-                        os.makedirs(save_dir, exist_ok=True)
+                        os.makedirs(save_dir, mode=0o750, exist_ok=True)
                        torch.save(model_dict, os.path.join(save_dir, self.mg_model_file_name))
                else:  # Dense Model
                    model_dict = self._set_dense_mg_model(hf_model=hf_model, tp_rank=tp_rank, pp_rank=pp_rank)
                    save_dir = self.get_mg_model_save_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=None,
                                                          iteration=iteration)
-                    os.makedirs(save_dir, exist_ok=True)
+                    os.makedirs(save_dir, mode=0o750, exist_ok=True)
                    torch.save(model_dict, os.path.join(save_dir, self.mg_model_file_name))

        # write latest_checkpointed_iteration.txt
        latest_ckpt_file_path = os.path.join(self.args_cmd.mg_dir, self.mg_latest_ckpt_file_name)
+        latest_ckpt_file_path = standardize_path(latest_ckpt_file_path, check_write=True)
        modes = stat.S_IWUSR | stat.S_IRUSR | stat.S_IWGRP | stat.S_IRGRP
        with os.fdopen(os.open(latest_ckpt_file_path, flags=os.O_RDWR | os.O_CREAT, mode=modes), 'w') as fout:
            fout.write(iteration)
@@ -577,7 +579,7 @@ class ConvertMg2Hf(ConvertBase):
        for tp_rank in range(self.tp_size):
            mg_save_dir = self.get_mg_model_save_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=None,
                                                     iteration=self.args_cmd.iteration)
-            mg_tp_model = torch.load(os.path.join(mg_save_dir, self.mg_model_file_name), map_location='cpu', weights_only=False)
+            mg_tp_model = torch.load(os.path.join(mg_save_dir, self.mg_model_file_name), map_location='cpu', weights_only=True)
            mg_tp_models.append(mg_tp_model)

        hf_model = {}
@@ -751,13 +753,13 @@ class ConvertMg2Hf(ConvertBase):

    def _update_hf_model_file(self, hf_model, model_file):
        file_path = os.path.join(self.args_cmd.hf_dir, model_file)
-        exist_model = torch.load(file_path, map_location='cpu', weights_only=False) if os.path.exists(file_path) else {}
+        exist_model = torch.load(file_path, map_location='cpu', weights_only=True) if os.path.exists(file_path) else {}

        for param_key in hf_model.keys():
            if self.get_hf_model_file_based_param_key(param_key) == model_file:
                exist_model[param_key] = hf_model[param_key]

-        os.makedirs(os.path.dirname(file_path), exist_ok=True)
+        os.makedirs(os.path.dirname(file_path), mode=0o750, exist_ok=True)
        torch.save(exist_model, file_path)

    def run(self):
--- a/mindspeed_llm/tasks/checkpoint/loader_hf.py
+++ b/mindspeed_llm/tasks/checkpoint/loader_hf.py
@@ -373,7 +373,9 @@ def _load_checkpoint(model_provider, queue, args):
    md = build_metadata(args, margs)
    queue.put(md)

-    model_hf.get_modules_from_pretrained()
+    print(f"args.trust_remote_code:{args.trust_remote_code}")
+
+    model_hf.get_modules_from_pretrained(trust_remote_code=args.trust_remote_code)
    model_mg.get_modules_from_config()

    model_mg.update_module(model_hf)
--- a/mindspeed_llm/tasks/checkpoint/models.py
+++ b/mindspeed_llm/tasks/checkpoint/models.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
 import abc
 import os
+import ast
 import sys
 import re
 import json
@@ -23,7 +24,7 @@ from megatron.core import tensor_parallel
 from mindspeed_llm.training.utils import parse_args
 from mindspeed_llm.training import model_provider_func_wrapper
 from mindspeed_llm.training.checkpointing import load_checkpoint_wrapper
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger.basicConfig(format="")
 logger.getLogger().setLevel(logger.INFO)

@@ -465,9 +466,9 @@ class HuggingfaceModel(ModelBase):
    def initialize_args(self):
        # Read huggingface args.
        if self.args_cmd.save_model_type == 'hf':
-            cfg_dir = self.args_cmd.save_dir
+            cfg_dir = standardize_path(self.args_cmd.save_dir, check_write=True)
        else:
-            cfg_dir = self.args_cmd.load_dir
+            cfg_dir = standardize_path(self.args_cmd.load_dir, check_read=True)
        llama_args_path = os.path.join(cfg_dir, "config.json")
        with open(llama_args_path) as f:
            self.args = json.load(f)
@@ -501,7 +502,7 @@ class HuggingfaceModel(ModelBase):
        self.args.save_lora_to_hf = self.args_cmd.save_lora_to_hf
        self.args.noop_layers = self.args_cmd.noop_layers

-    def get_modules_from_config(self, device_map="cpu", trust_remote_code=True):
+    def get_modules_from_config(self, device_map="cpu", trust_remote_code=False):
        # Load Huggingface model.
        if self.args_cmd.save_model_type == "hf":
            load_dir = self.args_cmd.save_dir
@@ -513,9 +514,9 @@ class HuggingfaceModel(ModelBase):
        hf_model.to_empty(device=device_map)
        self.module = [hf_model]
        if hasattr(self.args, "torch_dtype") and self.args.torch_dtype in ["float16", "bfloat16"]:
-            self.module[0] = self.module[0].to(eval(f'torch.{self.args.torch_dtype}'))
+            self.module[0] = self.module[0].to(ast.literal_eval(f'torch.{self.args.torch_dtype}'))

-    def get_modules_from_pretrained(self, device_map="cpu", trust_remote_code=True):
+    def get_modules_from_pretrained(self, device_map="cpu", trust_remote_code=False):
        # Load Huggingface model.
        if self.args_cmd.save_model_type == "hf":
            load_dir = self.args_cmd.save_dir
@@ -542,7 +543,8 @@ class HuggingfaceModel(ModelBase):
            )
            self.module = [get_peft_model(self.module[0], lora_config)]        
        if hasattr(self.args, "torch_dtype") and self.args.torch_dtype in ["float16", "bfloat16"]:
-            self.module[0] = self.module[0].to(eval(f'torch.{self.args.torch_dtype}'))
+            dtype = getattr(torch, self.args.torch_dtype)
+            self.module[0] = self.module[0].to(dtype)

    def get_lora_key(self, layer_name, prefix):
        return f"{layer_name}.{prefix}"
--- a/mindspeed_llm/tasks/checkpoint/optim.py
+++ b/mindspeed_llm/tasks/checkpoint/optim.py
@@ -16,7 +16,7 @@ logger.getLogger().setLevel(logger.INFO)

 def load_data(file_path):
    try:
-        data = torch.load(file_path, map_location='cpu', weights_only=False)
+        data = torch.load(file_path, map_location='cpu', weights_only=True)
        return data
    except Exception as e:
        logger.info(f"Error while loading file '{file_path}': {e}")
@@ -74,7 +74,7 @@ class OptimBaseProcessor(abc.ABC):
    @staticmethod
    def check_mkdir(dir_path):
        if not os.path.exists(dir_path):
-            os.makedirs(dir_path)
+            os.makedirs(dir_path, mode=0o750, exist_ok=True)

    def get_ckpt_path(self, tp_rank, pp_rank, ep_rank=None, suffix=""):
        """
@@ -317,7 +317,7 @@ class OptimSourceProcessor(OptimBaseProcessor):
        
    @staticmethod
    def make_param_index_map(model_path):
-        weights = torch.load(model_path, map_location=torch.device('cpu'), weights_only=False)
+        weights = torch.load(model_path, map_location=torch.device('cpu'), weights_only=True)

        # Count the number of models in the checkpoint
        model_num = sum([1 if key.startswith("model") else 0 for key in weights.keys()])
@@ -478,7 +478,7 @@ class OptimSourceProcessor(OptimBaseProcessor):
            optim_path = self.optimizer_paths[tp_rank][pp_rank][ep_rank]
            logger.info(f"Splitting from {optim_path} ...")

-            merged_ckpt = torch.load(optim_path, map_location="cpu", weights_only=False)
+            merged_ckpt = torch.load(optim_path, map_location="cpu", weights_only=True)
            if isinstance(merged_ckpt, dict):
                merged_ckpt = [merged_ckpt]

@@ -643,7 +643,7 @@ class OptimTargetProcessor(OptimBaseProcessor):
            for key in ["param", "exp_avg", "exp_avg_sq"]:
                load_path = f"{ckpt_name}_{key}{ckpt_ext}"
                logger.info(f"    {key} is loaded from {load_path}.")
-                optim_ckpt = torch.load(load_path, map_location="cpu", weights_only=False)
+                optim_ckpt = torch.load(load_path, map_location="cpu", weights_only=True)

                flatten_ckpt = self.flatten_optimizer_ckpt(optim_ckpt, pp_rank, key)

--- a/mindspeed_llm/tasks/checkpoint/optim_converter.py
+++ b/mindspeed_llm/tasks/checkpoint/optim_converter.py
@@ -9,7 +9,7 @@ from collections import defaultdict, OrderedDict

 from tqdm import tqdm
 import torch
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger.basicConfig(format="")
 logger.getLogger().setLevel(logger.INFO)

@@ -39,7 +39,7 @@ class OptimConverter(abc.ABC):
        
    def get_optim_param_from_src_model_ckpt(self):
        ckpt_path = self.src_optim.model_paths[0][0][0]
-        model_ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
+        model_ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)
        self.optim_param = model_ckpt['optimizer']
        self.opt_param_scheduler = model_ckpt['opt_param_scheduler']

@@ -57,7 +57,7 @@ class OptimConverter(abc.ABC):
            bool: True if successful, False otherwise.
        """
        try:
-            model_ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
+            model_ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)

            # Apply modifications
            for key, value in modifications.items():
--- a/mindspeed_llm/tasks/checkpoint/saver.py
+++ b/mindspeed_llm/tasks/checkpoint/saver.py
@@ -539,9 +539,9 @@ def save_huggingface(args, model):
    from .models import get_huggingface_model
    model_hf = get_huggingface_model(args)
    if args.load_hf_from_config:
-        model_hf.get_modules_from_config()
+        model_hf.get_modules_from_config(trust_remote_code=args.trust_remote_code)
    else:
-        model_hf.get_modules_from_pretrained()
+        model_hf.get_modules_from_pretrained(trust_remote_code=args.trust_remote_code)
    args_cmd = model_hf.get_args_cmd()

    model_hf.update_module(model)
--- a/mindspeed_llm/tasks/evaluation/eval_impl/agi_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/agi_eval.py
@@ -34,7 +34,7 @@ from mindspeed_llm.tasks.evaluation.eval_utils.agi_utils import (
    get_default_instruction, 
    get_pred_postprocess_func
 )
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger = logging.getLogger(__name__)


@@ -42,7 +42,7 @@ class AGIEvalExam(DatasetEval):
    def __init__(self, test_dir, eval_args,
                 instruction_template="{fewshot_template}   {question}\n{question_template}\n{options}"
                                      "\n{answer_template}"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
        self.instruction_template = instruction_template
        self.batch_size = eval_args.evaluation_batch_size
        self.rank = dist.get_rank()
--- a/mindspeed_llm/tasks/evaluation/eval_impl/bbh_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/bbh_eval.py
@@ -32,7 +32,7 @@ from mindspeed_llm.tasks.evaluation.eval_api.dataset_eval import DatasetEval
 from mindspeed_llm.tasks.evaluation.eval_impl.template import BBH_TEMPLATE_DIR, BBH_COT_TEMPLATE_DIR, get_eval_template
 from mindspeed_llm.tasks.evaluation.eval_utils.bbh_utils import bbh_mcq_postprocess, bbh_freeform_postprocess, bbh_true_or_false_questions
 from mindspeed_llm.tasks.evaluation.utils import get_final_list_dataset
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path

 logger = logging.getLogger(__name__)

@@ -75,7 +75,7 @@ bbh_free_form_sets = [
 class BBHEval(DatasetEval):
    def __init__(self, test_dir, eval_args,
                 instruction_template="{fewshot_template}Q: {question}\nA:"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
        self.instruction_template = instruction_template
        self.batch_size = eval_args.evaluation_batch_size
        self.rank = dist.get_rank()
--- a/mindspeed_llm/tasks/evaluation/eval_impl/boolq_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/boolq_eval.py
@@ -28,7 +28,7 @@ from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat
 from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.utils import get_final_list_dataset
 from mindspeed_llm.tasks.evaluation.eval_utils.boolq_utils import first_capital_postprocess
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path

 logger = logging.getLogger(__name__)

@@ -36,7 +36,7 @@ logger = logging.getLogger(__name__)
 class BoolqEval(DatasetEval):
    def __init__(self, test_dir, eval_args,
                 instruction_template="{passage}\nQuestion: {question}?\nAnswer:"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
        self.instruction_template = instruction_template
        self.alternative_prompt = "{title} -- {passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:"
        self.answer_reference = {'True': 'A', 'False': 'B', 'Yes': 'A', 'No': 'B', 'Y': 'A', 'N': 'B', 'T': 'A', 'F': 'B'}
--- a/mindspeed_llm/tasks/evaluation/eval_impl/ceval_exam.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/ceval_exam.py
@@ -31,7 +31,7 @@ from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.eval_utils.ceval_utils import format_ceval_templates, first_capital_postprocess
 from mindspeed_llm.tasks.evaluation.utils import get_final_dataset
 from mindspeed_llm.tasks.evaluation.eval_impl.template import CEVAL_TEMPLATE_DIR, get_eval_template
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path

 logger = logging.getLogger(__name__)

@@ -39,7 +39,7 @@ logger = logging.getLogger(__name__)
 class CEvalExam(DatasetEval):
    def __init__(self, test_dir, eval_args,
                 instruction_template="{fewshot_template}\n\n问：{question}\n答："):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
        self.instruction_template = instruction_template
        self.batch_size = eval_args.evaluation_batch_size
        self.rank = dist.get_rank()
--- a/mindspeed_llm/tasks/evaluation/eval_impl/cmmlu_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/cmmlu_eval.py
@@ -31,9 +31,8 @@ from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat
 from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.eval_utils.cmmlu_utils import cmmlu_subject_mapping, first_option_postprocess, cmmlu_format_example
 from mindspeed_llm.tasks.evaluation.utils import get_final_dataset
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 from .template import CMMLU_TEMPLATE_DIR, get_eval_template
-
-
 logger = logging.getLogger(__name__)


@@ -43,7 +42,7 @@ class CmmluEval(DatasetEval):
                                      "{question}\n答案： ",
                 output_template1=r".*(?P<答案>[A|B|C|D])\..*",
                 output_template2=r"(?P<答案>[A|B|C|D])"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
        self.instruction_template = instruction_template
        self.output_template = [output_template1, output_template2]
        self.batch_size = eval_args.evaluation_batch_size
--- a/mindspeed_llm/tasks/evaluation/eval_impl/gsm8k_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/gsm8k_eval.py
@@ -30,7 +30,7 @@ from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.eval_utils.gsm8k_utils import four_shots_prompt, gsm8k_postprocess
 from mindspeed_llm.tasks.evaluation.utils import get_final_list_dataset
 from mindspeed_llm.tasks.evaluation.eval_impl.template import GSM8K_TEMPLATE_DIR
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger = logging.getLogger(__name__)


@@ -38,7 +38,7 @@ class Gsm8kEval(DatasetEval):
    def __init__(self, test_dir, eval_args,
                 instruction_template="{fewshot_template}\n\n{question}",
                 output_template=r'The answer is (.*?) '):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
        self.instruction_template = instruction_template
        self.output_template = output_template
        self.batch_size = eval_args.evaluation_batch_size      
--- a/mindspeed_llm/tasks/evaluation/eval_impl/hellaswag_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/hellaswag_eval.py
@@ -18,7 +18,7 @@ from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat
 from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.eval_utils.mmlu_utils import postprocess
 from mindspeed_llm.tasks.evaluation.utils import get_final_list_dataset
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path

 logger = logging.getLogger(__name__)

@@ -27,7 +27,7 @@ class HellaswagEval(DatasetEval):
    def __init__(self, test_dir, eval_args,
                 output_template1=r".*(?P<answer>[A|B|C|D])\..*",
                 output_template2=r"(?P<answer>[A|B|C|D])"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
        self.output_template = [output_template1, output_template2]
        self.instruction_template = ('{ctx}\nQuestion: Which ending makes the most sense?\n'
                                     'A. {A}\nB. {B}\nC. {C}\nD. {D}\n'
--- a/mindspeed_llm/tasks/evaluation/eval_impl/human_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/human_eval.py
@@ -19,6 +19,7 @@ import logging
 import re
 import sys
 import subprocess
+import ast
 from typing import Iterable, Dict
 import pandas as pd
 import tqdm
@@ -32,17 +33,57 @@ from mindspeed_llm.tasks.evaluation.eval_api.dataset_eval import DatasetEval
 from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat
 from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.training.utils import WRITE_FILE_DEFAULT_FLAGS, WRITE_FILE_DEFAULT_MODES
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 from mindspeed_llm.tasks.evaluation.eval_utils.human_utils import humaneval_postprocess, get_score

 logger = logging.getLogger(__name__)


+def is_code_dangerous(code: str, dangerous_patterns) -> bool:
+    """AST 检测提权、外联、文件篡改"""
+
+    # 正则检测（快速过滤）
+    for pattern in dangerous_patterns:
+        if re.search(pattern, code):
+            return True
+
+    # AST 语义分析（防绕过）
+    try:
+        tree = ast.parse(code)
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Call):
+                if isinstance(node.func, ast.Name):
+                    if node.func.id in ("exec", "eval", "open", "os", "subprocess"):
+                        return True
+            elif isinstance(node, (ast.Import, ast.ImportFrom)):
+                for alias in node.names:
+                    if alias.name in ("os", "sys", "subprocess"):
+                        return True
+
+            # 检测 os.system("sudo ...")
+            if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and node.func.attr == "system":
+                return True
+            if any(isinstance(arg, ast.Str) and ("sudo" in arg.s or "curl" in arg.s) for arg in node.args):
+                return True
+            # 检测动态导入（如 __import__("os").system(...)）
+            if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "__import__":
+                return True
+
+        return False
+    except SyntaxError:
+        return True  # 语法错误视为危险
+
+
 def extract_answer_code(answer, task: dict):
    """
    :param answer:
    :param task:
    :return:
    """
+    """安全生成测试文件"""
+    if is_code_dangerous(answer, self.dangerous_patterns) or is_code_dangerous(task["test"], self.dangerous_patterns):
+        raise ValueError("Unsafe code detected")
+
    task_id = task['task_id']
    target_func = task['entry_point']
    test_case = task['test']
@@ -51,7 +92,7 @@ def extract_answer_code(answer, task: dict):
    code_lines = code.split("\n")
    target_func_flag = False
    if not os.path.exists(CODE_TEST_LOG_DIR):
-        os.makedirs(CODE_TEST_LOG_DIR)
+        os.makedirs(CODE_TEST_LOG_DIR, mode=0o750, exist_ok=True)
    test_code_path = "{}/{}".format(CODE_TEST_LOG_DIR, save_file)
    with os.fdopen(os.open(test_code_path, WRITE_FILE_DEFAULT_FLAGS, WRITE_FILE_DEFAULT_MODES), 'w') as f:
        f.write("from typing import List\n")
@@ -85,7 +126,7 @@ def extract_answer_code(answer, task: dict):

 class HumanEval(DatasetEval):
    def __init__(self, test_dir, eval_args):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
        instruction_template = eval_args.instruction_template
        if instruction_template:
            self.instruction_template = instruction_template
@@ -96,6 +137,11 @@ class HumanEval(DatasetEval):
        self.file_pbar = None
        self.task_pbar = None
        self.prompt = 'Complete the following python code:\n{prompt}'
+        self.dangerous_patterns = []
+        with open("configs/dangerous_shell.json", "r", encoding="utf-8") as f:
+            self.dangerous_patterns = json.load(f)
+            print(self.dangerous_patterns)
+

    def read_problems(self) -> Dict[str, Dict]:
        return {task["task_id"]: task for task in self.stream_jsonl(self.test_dir)}
--- a/mindspeed_llm/tasks/evaluation/eval_impl/mmlu_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/mmlu_eval.py
@@ -30,6 +30,7 @@ from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat
 from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.eval_utils.mmlu_utils import _format_example, postprocess
 from mindspeed_llm.tasks.evaluation.utils import get_final_dataset
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 from .template import MMLU_TEMPLATE_DIR, get_eval_template


@@ -42,7 +43,7 @@ class MmluEval(DatasetEval):
                                      "{question}\nAnswer:",
                 output_template1=r".*(?P<answer>[A|B|C|D])\..*",
                 output_template2=r"(?P<answer>[A|B|C|D])"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
        self.instruction_template = instruction_template
        self.output_template = [output_template1, output_template2]
        self.batch_size = eval_args.evaluation_batch_size
--- a/mindspeed_llm/tasks/evaluation/eval_impl/needlebench_single.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/needlebench_single.py
@@ -157,7 +157,7 @@ def load_datasets(config: NeedleBenchConfig):
        dataset = []
        needle_file_path = os.path.join(config.dataset_dir, "needles.jsonl")
        cache_dir = os.path.join(tempfile.gettempdir(), "data-gym-cache")
-        os.makedirs(cache_dir, exist_ok=True)
+        os.makedirs(cache_dir, mode=0o750, exist_ok=True)
        cache_file = "9b5ad71b2ce5302211f9c61530b329a4922fc6a4"
        dest_filepath = os.path.join(cache_dir, cache_file)
        # 参考docs中的needlebench-evaluation.md下载cl100k_base.tiktoken，将其放置到该数据集文件夹
--- a/mindspeed_llm/tasks/evaluation/file_utils.py
+++ b/mindspeed_llm/tasks/evaluation/file_utils.py
@@ -1,5 +1,6 @@
 # Copyright Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
 from functools import reduce
+import re
 import os
 import stat

@@ -52,17 +53,34 @@ def safe_open(file_path: str, mode='r', encoding=None, permission_mode=0o600, is
                     mode, encoding=encoding)


-def standardize_path(path: str, max_path_length=MAX_PATH_LENGTH, check_link=True):
+def standardize_path(path: str, max_path_length=MAX_PATH_LENGTH, check_link=True, check_read=True, check_write=True):
    """
    check path
    param: path
    return: data real path after check
    """
-    check_path_is_none(path)
+    if path:
+        path = os.path.realpath(path)
+    else:
+        return None
+
+    if os.path.exists(path):
+        if check_read and not os.access(path, os.R_OK):
+            raise RuntimeError(f"File {path} not readable")
+
+        if check_write and not os.access(path, os.W_OK):
+            raise RuntimeError(f"File {path} not writable")
+    else:
+        print(f"Path: {path} not exists")
+
    check_path_length_lt(path, max_path_length)
    if check_link:
        check_path_is_link(path)
-    path = os.path.realpath(path)
+
+    pattern = r'(\.|/|_|-|\s|[~0-9a-zA-Z]|[\u4e00-\u9fa5])+'
+    if not re.fullmatch(pattern, path):
+        raise RuntimeError(f"Invalid input path: {path}")
+
    return path


--- a/mindspeed_llm/tasks/models/transformer/mla_up_proj_overlap_tp_comm.py
+++ b/mindspeed_llm/tasks/models/transformer/mla_up_proj_overlap_tp_comm.py
@@ -7,7 +7,7 @@ try:
    from mindspeed.core.pipeline_parallel.fb_overlap.modules.attention import launch_async_all2all
    from mindspeed.core.pipeline_parallel.dualpipev.dualpipev_schedules import get_post_process_flag
 except ImportError:
-    pass
+    print("[warning] failed import dualpipe modules, not support dualpipe")
 from mindspeed.core.transformer.moe.comm_utils import async_all_gather
 from mindspeed.core.tensor_parallel.random import CheckpointWithoutOutput
 from megatron.training.utils import get_args
--- a/mindspeed_llm/tasks/posttrain/rejection_sampling/rejection_sampling.py
+++ b/mindspeed_llm/tasks/posttrain/rejection_sampling/rejection_sampling.py
@@ -2,7 +2,7 @@ import argparse
 import gc
 import json
 import re
-
+import os
 import jsonlines
 import pandas as pd
 import torch
@@ -13,6 +13,7 @@ from vllm.distributed.parallel_state import (destroy_distributed_environment, de

 from utils import blending_datasets, PromptGtAnswerDataset, apply_GenRM_template, rejection_sampling_processor
 from mindspeed_llm.tasks.posttrain.verifier.rule_verifier import preprocess_box_response_for_qwen_prompt
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path


 def clean_up():
@@ -36,13 +37,13 @@ def batch_generate_vllm(args):
    dummy_strategy.args = args

    # configure tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(args.pretrain, trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(args.pretrain, trust_remote_code=args.trust_remote_code)

    # configure model
    llm = LLM(
        model=args.pretrain,
        tensor_parallel_size=args.tp_size,
-        trust_remote_code=True,
+        trust_remote_code=args.trust_remote_code,
        seed=args.seed,
        max_num_seqs=args.max_num_seqs,
        enable_prefix_caching=args.enable_prefix_caching,
@@ -107,7 +108,7 @@ def batch_GenRM_rejection_sampling(args):
    llm = LLM(
        model=args.pretrain,
        tensor_parallel_size=args.tp_size,
-        trust_remote_code=True,
+        trust_remote_code=args.trust_remote_code,
        seed=args.seed,
        max_num_seqs=args.max_num_seqs,
        enable_prefix_caching=args.enable_prefix_caching,
@@ -215,9 +216,15 @@ if __name__ == "__main__":
    parser.add_argument("--iter", type=int, default=None,
                        help="Used to slice the datasets in range iter * rollout_batch_size: (iter + 1) * rollout_batch_size", )
    parser.add_argument("--rollout-batch-size", type=int, default=2048, help="Number of samples to generate")
+    parser.add_argument('--trust-remote-code',
+                       action='store_true',
+                       default=False,
+                       help='enable trust-remote-code for transformer to load model')

    args = parser.parse_args()

+    args.output_path = standardize_path(args.output_path, check_write=True)
+
    if args.task and args.task == "generate_vllm":
        batch_generate_vllm(args)
    elif args.task and args.task == "rejection_sampling":
--- a/mindspeed_llm/tasks/posttrain/rejection_sampling/utils.py
+++ b/mindspeed_llm/tasks/posttrain/rejection_sampling/utils.py
@@ -31,7 +31,7 @@ def blending_datasets(
        ext = os.path.splitext(dataset)[-1]
        # local python script
        if ext == ".py" or (os.path.isdir(dataset) and os.path.exists(os.path.join(dataset, f"{dataset_basename}.py"))):
-            data = load_dataset(dataset, trust_remote_code=True)
+            data = load_dataset(dataset, trust_remote_code=False)
            strategy.print(f"loaded {dataset} with python script")
        # local text file
        elif ext in [".json", ".jsonl", ".csv"]:
--- a/mindspeed_llm/tasks/posttrain/sft/sft_trainer.py
+++ b/mindspeed_llm/tasks/posttrain/sft/sft_trainer.py
@@ -13,7 +13,7 @@ from megatron.training import get_timers
 try:
    from mindspeed.core.pipeline_parallel.dualpipev.dualpipev_schedules import set_post_process_flag
 except ImportError:
-    pass
+    print("[warning] failed import dualpipe modules, not support dualpipe")
 from mindspeed_llm.training.utils import get_tune_attention_mask, get_finetune_data_on_this_tp_rank, generate_actual_seq_len
 from mindspeed_llm.tasks.posttrain.base import BaseTrainer

--- a/mindspeed_llm/tasks/posttrain/trl_ppo/utils.py
+++ b/mindspeed_llm/tasks/posttrain/trl_ppo/utils.py
@@ -1,5 +1,5 @@
 from typing import Union
-
+import os
 import torch

 from megatron.core import mpu, dist_checkpointing
@@ -19,6 +19,7 @@ from megatron.training.training import compute_throughputs_and_append_to_progres
 from megatron.training.utils import unwrap_model, print_rank_0, append_to_progress_log
 from megatron.training.yaml_arguments import core_transformer_config_from_yaml
 from mindspeed_llm.tasks.posttrain.orm.orm_model import GPTRewardModel
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path


 def model_provider(is_reward_model=False, pre_process=True, post_process=True) -> Union[GPTModel]:
@@ -137,6 +138,8 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler,
    if save_model_type:
        save_path = args.save + '/' + save_model_type

+    save_path = standardize_path(save_path, check_write=True)
+
    ckpt_format = args.dist_ckpt_format if args.use_dist_ckpt else 'torch'
    print_rank_0('saving checkpoint at iteration {:7d} to {} in {} format'.format(
        iteration, save_path, ckpt_format))
--- a/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/grader.py
+++ b/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/grader.py
@@ -38,7 +38,7 @@ def parse_digits(num):
            try:
                return float(num) / 100
            except:
-                pass
+                return None
    return None


--- a/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/parser.py
+++ b/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/parser.py
@@ -1,7 +1,7 @@
 import random
 import re
 from typing import TypeVar, Iterable, List, Union, Any, Dict
-
+import ast
 import regex
 import sympy
 from latex2sympy2 import latex2sympy
@@ -75,7 +75,7 @@ def convert_word_number(text: str) -> str:
    try:
        text = str(w2n.word_to_num(text))
    except ValueError:
-        pass
+        return None
    return text


@@ -468,7 +468,7 @@ def extract_theoremqa_answer(pred: str, answer_flag: bool = True):
            pred = clean_units(pred)
            try:
                tmp = str(latex2sympy(pred))
-                pred = str(eval(tmp))
+                pred = str(ast.literal_eval(tmp))
            except Exception:
                if re.match(r"-?[\d\.]+\s\D+$", pred):
                    pred = pred.split(" ")[0]
--- a/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/utils.py
+++ b/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/utils.py
@@ -21,16 +21,14 @@ def load_jsonl(file: Union[str, Path]) -> Iterable[Any]:
                yield json.loads(line)
            except json.JSONDecodeError as e:
                print("Error in loading JSON:", line, "Error:", e)
-                pass
            except Exception as e:
                print("Unexpected error in loading:", line, "Error:", e)
-                pass


 def save_jsonl(samples, save_path):
    # ensure path
    folder = os.path.dirname(save_path)
-    os.makedirs(folder, exist_ok=True)
+    os.makedirs(folder, mode=0o750, exist_ok=True)

    with open(save_path, "w", encoding="utf-8") as f:
        for sample in samples:
--- a/mindspeed_llm/tasks/preprocess/data_handler.py
+++ b/mindspeed_llm/tasks/preprocess/data_handler.py
@@ -1034,6 +1034,7 @@ def build_dataset(args):
            # for MOSS, streaming is needed.
            args.streaming = True
        if args.hf_datasets_params:
+            args.hf_datasets_params = standardize_path(args.hf_datasets_params, check_read=True)
            with open(args.hf_datasets_params, 'r') as fin:
                param_dict = json.load(fin)
            return load_dataset(**param_dict)
--- a/mindspeed_llm/tasks/preprocess/formatter.py
+++ b/mindspeed_llm/tasks/preprocess/formatter.py
@@ -190,7 +190,8 @@ class ToolFormatter(Formatter):
                return [default_tool_formatter(tools)]
            else:
                raise NotImplementedError
-        except Exception:
+        except Exception as e:
+            print(f"[warning] Unexpected error processing content: {content}. Error: {e}")
            return [""]

    def extract(self, content: str) -> Union[str, Tuple[str, str]]:
--- a/mindspeed_llm/tasks/preprocess/utils.py
+++ b/mindspeed_llm/tasks/preprocess/utils.py
@@ -23,7 +23,7 @@ from datasets import load_dataset, concatenate_datasets, interleave_datasets

 from mindspeed_llm.tasks.preprocess.templates import Role
 from mindspeed_llm.tasks.preprocess.parser import InstructionDatasetAttr
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -103,6 +103,7 @@ def get_dataset_list(data_args) -> List["InstructionDatasetAttr"]:
    else:
        dataset_names = []

+    data_args.dataset_dir = standardize_path(data_args.dataset_dir, check_read=True)
    try:
        with open(os.path.join(data_args.dataset_dir, DATA_CONFIG), "r") as f:
            dataset_info = json.load(f)
--- a/mindspeed_llm/training/arguments.py
+++ b/mindspeed_llm/training/arguments.py
@@ -820,6 +820,10 @@ def _add_training_args(parser):
                       action='store_true',
                       default=False,
                       help='enable deterministic computing for npu')
+    group.add_argument('--trust-remote-code',
+                       action='store_true',
+                       default=False,
+                       help='enable trust-remote-code for transformer to load model')
    group.add_argument('--jit-compile', action='store_true', default=False,
                       help='Setting jit compile mode to True')
    group.add_argument('--prompt-type', type=str, default=None,
--- a/mindspeed_llm/training/checkpointing.py
+++ b/mindspeed_llm/training/checkpointing.py
@@ -123,7 +123,7 @@ def _load_base_checkpoint(load_dir, rank0=False, sharded_state_dict=None,
        return state_dict, checkpoint_name, release

    try:
-        state_dict = torch.load(checkpoint_name, map_location='cpu', weights_only=False)
+        state_dict = torch.load(checkpoint_name, map_location='cpu', weights_only=True)
    except ModuleNotFoundError:
        from megatron.legacy.fp16_deprecated import loss_scaler
        # For backward compatibility.
@@ -134,7 +134,7 @@ def _load_base_checkpoint(load_dir, rank0=False, sharded_state_dict=None,
        sys.modules['megatron.fp16.loss_scaler'] = sys.modules[
            'megatron.legacy.fp16_deprecated.loss_scaler']
        sys.modules['megatron.model'] = sys.modules['megatron.legacy.model']
-        state_dict = torch.load(checkpoint_name, map_location='cpu', weights_only=False)
+        state_dict = torch.load(checkpoint_name, map_location='cpu', weights_only=True)
        sys.modules.pop('fp16.loss_scaler', None)
        sys.modules.pop('megatron.fp16.loss_scaler', None)
        sys.modules.pop('megatron.model', None)
--- a/mindspeed_llm/training/tokenizer/tokenizer.py
+++ b/mindspeed_llm/training/tokenizer/tokenizer.py
@@ -49,6 +49,7 @@ def build_tokenizer(args):
            model_max_length=args.seq_length,
            use_fast=args.tokenizer_not_use_fast,
            prompt_type=args.prompt_type,
+            trust_remote_code=args.trust_remote_code,
            **hf_tokenizer_kwargs
        )

@@ -108,7 +109,7 @@ class TokenizerAdaptor:
 class _AutoTokenizer(MegatronTokenizer):
    """AutoTokenizer for Hf Pretrained model loading."""

-    def __init__(self, tokenizer_name_or_path, vocab_extra_ids, model_max_length, use_fast, prompt_type=None, **kwargs):
+    def __init__(self, tokenizer_name_or_path, vocab_extra_ids, model_max_length, use_fast, prompt_type=None, trust_remote_code=False, **kwargs):
        name = tokenizer_name_or_path
        super().__init__(name)
        hf_tokenizer_kwargs = kwargs
@@ -117,7 +118,7 @@ class _AutoTokenizer(MegatronTokenizer):

        hf_tokenizer_kwargs["model_max_length"] = model_max_length
        hf_tokenizer_kwargs["use_fast"] = use_fast
-        hf_tokenizer_kwargs["trust_remote_code"] = True
+        hf_tokenizer_kwargs["trust_remote_code"] = trust_remote_code
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, **hf_tokenizer_kwargs, local_files_only=True)
        if (prompt_type is None) and (self.tokenizer.pad_token_id is None):
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
--- a/preprocess_data.py
+++ b/preprocess_data.py
@@ -195,6 +195,10 @@ def add_tokenizer_args(parser):
        default=[],
        help="The labels represent the correctness of each reasoning step in the entire reasoning process.",
    )
+    parser.add_argument('--trust-remote-code',
+                       action='store_true',
+                       default=False,
+                       help='enable trust-remote-code for transformer to load model')


 def add_output_args(parser):
--- a/tests/mindspore/st/shell_scripts/deepseek_v3_pretrain.sh
+++ b/tests/mindspore/st/shell_scripts/deepseek_v3_pretrain.sh
@@ -100,6 +100,7 @@ ROPE_ARGS="


 GPT_ARGS="
+    --trust-remote-code \
    --no-gradient-accumulation-fusion \
    --spec mindspeed_llm.tasks.models.spec.deepseek_spec layer_spec \
    --reset-position-ids \
--- a/tests/mindspore/st/shell_scripts/deepseek_v3_sft.sh
+++ b/tests/mindspore/st/shell_scripts/deepseek_v3_sft.sh
@@ -46,6 +46,7 @@ MLA_ARGS="


 MOE_ARGS="
+    --trust-remote-code \
    --moe-grouped-gemm \
    --moe-permutation-async-comm \
    --use-fused-moe-token-permute-and-unpermute \
--- a/tests/mindspore/st/shell_scripts/glm_pretrain.sh
+++ b/tests/mindspore/st/shell_scripts/glm_pretrain.sh
@@ -34,6 +34,7 @@ DISTRIBUTED_ARGS="
 "

 GPT_ARGS="
+    --trust-remote-code \
    --tensor-model-parallel-size ${TP} \
    --pipeline-model-parallel-size ${PP} \
    --sequence-parallel \
--- a/tests/st/shell_scripts/chatglm3_gqa_cp4.sh
+++ b/tests/st/shell_scripts/chatglm3_gqa_cp4.sh
@@ -31,6 +31,7 @@ DISTRIBUTED_ARGS="
 "

 GPT_ARGS="
+    --trust-remote-code \
    --use-mcore-models \
    --transformer-impl local \
    --tensor-model-parallel-size ${TP} \
--- a/tests/test_tools/utils.py
+++ b/tests/test_tools/utils.py
@@ -97,7 +97,7 @@ def compare_state_dicts(state_dict1, state_dict2):


 def process_file(file_path):
-    data = torch.load(file_path, map_location='cpu', weights_only=False)
+    data = torch.load(file_path, map_location='cpu', weights_only=True)
    layer_ckpt = {}
    # 兼容带vpp的权重
    for key in data.keys():
@@ -164,8 +164,8 @@ def weight_compare(dir_1, dir_2, suffix="pt", use_md5=False):
        if use_md5:
            are_equal = (get_md5sum(path_1) == get_md5sum(path_2))
        else:
-            state_dict1 = torch.load(path_1, weights_only=False)
-            state_dict2 = torch.load(path_2, weights_only=False)
+            state_dict1 = torch.load(path_1, weights_only=True)
+            state_dict2 = torch.load(path_2, weights_only=True)
            are_equal = compare_state_dicts(state_dict1, state_dict2)
        if not are_equal:
            return False
@@ -192,8 +192,8 @@ def weight_compare_optim(dir_1, dir_2, suffix="pt", use_md5=False):
        if use_md5:
            are_equal = (get_md5sum(path_1) == get_md5sum(path_2))
        else:
-            state_dict1 = torch.load(path_1, weights_only=False)
-            state_dict2 = torch.load(path_2, weights_only=False)
+            state_dict1 = torch.load(path_1, weights_only=True)
+            state_dict2 = torch.load(path_2, weights_only=True)
            are_equal = compare_state_dicts(state_dict1, state_dict2)
        
        if not are_equal:
--- a/tests/ut/checkpoint/test_checkpoint.json
+++ b/tests/ut/checkpoint/test_checkpoint.json
@@ -2,6 +2,7 @@
    "test_deepseek2_hf2mcore_tp1pp4ep8": [
        {
            "param": {
+                "trust-remote-code":null,
                "model-type":"GPT",
                "load-model-type":"hf",
                "save-model-type":"mg",
@@ -23,6 +24,7 @@
    "test_deepseek2_mcore2hf_tp1pp4ep8": [
        {
            "param": {
+                "trust-remote-code":null,
                "model-type":"GPT",
                "load-model-type":"mg",
                "save-model-type": "hf",