!6842 【master】增加路径校验逻辑

Merge pull request !6842 from zyw_hw/add_check_path_logic_master
This commit is contained in:
i-robot
2025-07-26 02:06:25 +00:00
committed by Gitee
7 changed files with 83 additions and 38 deletions

View File

@@ -18,7 +18,7 @@ AutoConfig, AutoModel, AutoProcessor, AutoTokenizer
import os
import shutil
from mindformers.tools.utils import try_sync_file
from mindformers.tools.utils import try_sync_file, check_path_is_valid
from mindformers.mindformer_book import MindFormerBook, print_dict
from mindformers.models.build_processor import build_processor
@@ -110,9 +110,9 @@ class AutoConfig:
if pretrained_model_name_or_path is not None:
yaml_name_or_path = pretrained_model_name_or_path
if not isinstance(yaml_name_or_path, str):
raise TypeError(f"yaml_name_or_path should be a str,"
f" but got {type(yaml_name_or_path)}.")
if not check_path_is_valid(yaml_name_or_path):
raise ValueError(f"The value of yaml_name_or_path is {yaml_name_or_path}, and it is not valid, "
f"please check and reset it.")
if os.path.exists(yaml_name_or_path):
if not yaml_name_or_path.endswith(".yaml"):
@@ -425,9 +425,10 @@ class AutoModel:
if pretrained_model_name_or_path is not None:
pretrained_model_name_or_dir = pretrained_model_name_or_path
if not isinstance(pretrained_model_name_or_dir, str):
raise TypeError(f"pretrained_model_name_or_dir should be a str,"
f" but got {type(pretrained_model_name_or_dir)}")
if not check_path_is_valid(pretrained_model_name_or_path):
raise ValueError(f"The value of pretrained_model_name_or_path is {pretrained_model_name_or_path}, "
f"and it is not valid, please check and reset it.")
pretrained_model_name_or_dir = os.path.realpath(pretrained_model_name_or_dir)
config_args = cls._get_config_args(pretrained_model_name_or_dir, **kwargs)
if not download_checkpoint:
@@ -526,8 +527,9 @@ class AutoProcessor:
if pretrained_model_name_or_path is not None:
yaml_name_or_path = pretrained_model_name_or_path
if not isinstance(yaml_name_or_path, str):
raise TypeError(f"yaml_name_or_path should be a str, but got {type(yaml_name_or_path)}")
if not check_path_is_valid(yaml_name_or_path):
raise ValueError(f"The value of yaml_name_or_path is {yaml_name_or_path}, and it is not valid, "
f"please check and reset it.")
is_exist = os.path.exists(yaml_name_or_path)
model_name = yaml_name_or_path.split('/')[cls._model_name].split("_")[cls._model_type] \
@@ -716,9 +718,10 @@ class AutoTokenizer:
if pretrained_model_name_or_path is not None:
yaml_name_or_path = pretrained_model_name_or_path
from . import MindFormerRegister
if not isinstance(yaml_name_or_path, str):
raise TypeError(f"yaml_name_or_path should be a str, but got {type(yaml_name_or_path)}")
from mindformers import MindFormerRegister
if not check_path_is_valid(yaml_name_or_path):
raise ValueError(f"The value of yaml_name_or_path is {yaml_name_or_path}, and it is not valid, "
f"please check and reset it.")
# Try to load from the remote
if not cls.invalid_yaml_name(yaml_name_or_path):

View File

@@ -20,22 +20,22 @@ import importlib
import inspect
import json
from collections import OrderedDict
from .tokenization_auto import AutoTokenizer
from ..configuration_utils import PretrainedConfig
from ..tokenization_utils_base import TOKENIZER_CONFIG_FILE
from .image_processing_auto import AutoImageProcessor
from ..image_processing_utils import ImageProcessingMixin
from ..processing_utils import ProcessorMixin
from ..utils import FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME
from .auto_factory import _LazyAutoMapping
from .configuration_auto import CONFIG_MAPPING_NAMES, AutoConfig
from ...tools.hub.hub import get_file_from_repo
from ...tools.generic import experimental_mode_func_checker
from ...tools import get_class_from_dynamic_module, resolve_trust_remote_code, logger
from ...tools.register.config import MindFormerConfig
from ...mindformer_book import MindFormerBook, print_dict
from ..build_processor import build_processor
from mindformers.tools.utils import check_path_is_valid
from mindformers.models.auto.tokenization_auto import AutoTokenizer
from mindformers.models.configuration_utils import PretrainedConfig
from mindformers.models.tokenization_utils_base import TOKENIZER_CONFIG_FILE
from mindformers.models.auto.image_processing_auto import AutoImageProcessor
from mindformers.models.image_processing_utils import ImageProcessingMixin
from mindformers.models.processing_utils import ProcessorMixin
from mindformers.models.utils import FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME
from mindformers.models.auto.auto_factory import _LazyAutoMapping
from mindformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES, AutoConfig
from mindformers.tools.hub.hub import get_file_from_repo
from mindformers.tools.generic import experimental_mode_func_checker
from mindformers.tools import get_class_from_dynamic_module, resolve_trust_remote_code, logger
from mindformers.tools.register.config import MindFormerConfig
from mindformers.mindformer_book import MindFormerBook, print_dict
from mindformers.models.build_processor import build_processor
EXP_ERROR_MSG = "The input yaml_name_or_path should be a path to yaml file, or a " \
@@ -59,6 +59,9 @@ def is_experimental_mode(path):
is_exists = os.path.exists(path)
is_dir = os.path.isdir(path)
if is_exists:
if not check_path_is_valid(path):
raise ValueError(f"The value of path in AutoProcessor.from_pretrained() is {path}, "
f"it is not valid, please check and reset it.")
if is_dir:
yaml_list = [file
for file in os.listdir(path)

View File

@@ -31,6 +31,7 @@ from mindformers.tools import (
extract_commit_hash,
)
from mindformers.tools.hub import get_class_from_dynamic_module, resolve_trust_remote_code
from mindformers.tools.utils import check_path_is_valid
from mindformers.utils.import_utils import is_sentencepiece_available, is_tokenizers_available
from mindformers.tools.logger import logger
from mindformers.models.auto.configuration_auto import (
@@ -57,6 +58,9 @@ def is_experimental_mode(path):
experimental_mode = False
is_exist = os.path.exists(path)
if is_exist and not check_path_is_valid(path):
raise ValueError(f"The value of path in AutoTokenizer.from_pretrained() is {path}, "
f"it is not valid, please check and reset it.")
is_dir = os.path.isdir(path)
if is_dir:
yaml_list = [file for file in os.listdir(path) if file.endswith(".yaml")]
@@ -286,7 +290,7 @@ class AutoTokenizer:
Returns:
The class name of the tokenizer in the config yaml.
"""
from ...tools import MindFormerConfig
from mindformers.tools import MindFormerConfig
is_exist = os.path.exists(yaml_name_or_path)
is_dir = os.path.isdir(yaml_name_or_path)
is_file = os.path.isfile(yaml_name_or_path)
@@ -346,7 +350,7 @@ class AutoTokenizer:
@classmethod
def get_class_from_origin_mode(cls, yaml_name_or_path, **kwargs):
"""original logic: from yaml."""
from ...tools import MindFormerRegister
from mindformers import MindFormerRegister
if not isinstance(yaml_name_or_path, str):
raise TypeError(f"yaml_name_or_path should be a str,"

View File

@@ -22,7 +22,7 @@ import shutil
import yaml
from mindformers.tools.check_rules import check_yaml_depth_before_loading
from mindformers.tools.utils import FILE_PERMISSION
from mindformers.tools.utils import FILE_PERMISSION, check_path_is_valid
from ..mindformer_book import MindFormerBook
from ..mindformer_book import print_path_or_list
from ..tools.logger import logger
@@ -107,9 +107,9 @@ class BaseConfig(dict):
if pretrained_model_name_or_path is not None:
yaml_name_or_path = pretrained_model_name_or_path
if not isinstance(yaml_name_or_path, str):
raise TypeError(f"yaml_name_or_path should be a str,"
f" but got {type(yaml_name_or_path)}.")
if not check_path_is_valid(yaml_name_or_path):
raise ValueError(f"The value of yaml_name_or_path is {yaml_name_or_path}, and it is not valid, "
f"please check and reset it.")
if os.path.exists(yaml_name_or_path):
if not yaml_name_or_path.endswith(".yaml"):
@@ -177,9 +177,12 @@ class BaseConfig(dict):
if save_directory is None:
save_directory = MindFormerBook.get_default_checkpoint_save_folder()
if not isinstance(save_directory, str) or not isinstance(save_name, str):
raise TypeError(f"save_directory and save_name should be a str,"
f" but got {type(save_directory)} and {type(save_name)}.")
if not check_path_is_valid(save_directory):
raise ValueError(f"The value of save_directory is {save_directory}, "
f"it is not valid, please check and reset it.")
if not isinstance(save_name, str):
raise TypeError(f"save_name should be a str, but got {type(save_name)}.")
if not os.path.exists(save_directory):
os.makedirs(save_directory, exist_ok=True)

View File

@@ -24,7 +24,7 @@ from typing import Optional, Union
import json
import yaml
from mindformers.tools.check_rules import check_yaml_depth_before_loading
from mindformers.tools.utils import FILE_PERMISSION
from mindformers.tools.utils import FILE_PERMISSION, check_path_is_valid
from ..mindformer_book import print_path_or_list, MindFormerBook
from .build_processor import build_processor
from .tokenization_utils import PreTrainedTokenizer
@@ -57,6 +57,9 @@ def is_experimental_mode(path):
is_exists = os.path.exists(path)
is_dir = os.path.isdir(path)
if is_exists:
if not check_path_is_valid(path):
raise ValueError(f"The value of path in ProcessorMixin.from_pretrained() is {path}, "
f"it is not valid, please check and reset it.")
if is_dir:
experimental_mode = True
else: # file

View File

@@ -19,6 +19,7 @@ import os
import tempfile
from contextlib import contextmanager
from mindformers.tools.utils import check_path_is_valid
@contextmanager
@@ -86,6 +87,9 @@ def is_experimental_mode(path):
if not os.path.exists(path) and "/" in path and path.split("/")[0] != "mindspore":
experimental_mode = True
elif os.path.isdir(path) or is_json_file(path):
if not check_path_is_valid(path):
raise ValueError(f"The value of path in AutoConfig.from_pretrained() is {path}, "
f"it is not valid, please check and reset it.")
experimental_mode = True
return experimental_mode

View File

@@ -838,3 +838,28 @@ def get_ascend_log_path():
if ascend_log_path:
return os.path.join(ascend_log_path, 'log')
return os.path.join(os.path.expanduser("~"), 'ascend', 'log')
def check_path_is_valid(path):
"""check path is or not valid"""
from .logger import logger
if not path or not isinstance(path, str):
logger.warning(f"The input path is not valid because it is {type(path)}")
return False
if ".." in path:
logger.warning(f"The input path should not include '..'.")
return False
# pylint: disable=W0703
# check whether the path has soft link. If path has soft link, function will return false.
try:
real_path = os.path.realpath(path)
if real_path != os.path.abspath(path):
logger.warning(f"The input path should not include symbolic link.")
return False
return True
except BaseException as e:
logger.warning(f"Error occurs when normalize the input path: {e}.")
return False