mirror of
https://gitee.com/mindspore/mindformers.git
synced 2025-12-06 19:42:57 +08:00
!6842 【master】增加路径校验逻辑
Merge pull request !6842 from zyw_hw/add_check_path_logic_master
This commit is contained in:
@@ -18,7 +18,7 @@ AutoConfig, AutoModel, AutoProcessor, AutoTokenizer
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from mindformers.tools.utils import try_sync_file
|
||||
from mindformers.tools.utils import try_sync_file, check_path_is_valid
|
||||
|
||||
from mindformers.mindformer_book import MindFormerBook, print_dict
|
||||
from mindformers.models.build_processor import build_processor
|
||||
@@ -110,9 +110,9 @@ class AutoConfig:
|
||||
if pretrained_model_name_or_path is not None:
|
||||
yaml_name_or_path = pretrained_model_name_or_path
|
||||
|
||||
if not isinstance(yaml_name_or_path, str):
|
||||
raise TypeError(f"yaml_name_or_path should be a str,"
|
||||
f" but got {type(yaml_name_or_path)}.")
|
||||
if not check_path_is_valid(yaml_name_or_path):
|
||||
raise ValueError(f"The value of yaml_name_or_path is {yaml_name_or_path}, and it is not valid, "
|
||||
f"please check and reset it.")
|
||||
|
||||
if os.path.exists(yaml_name_or_path):
|
||||
if not yaml_name_or_path.endswith(".yaml"):
|
||||
@@ -425,9 +425,10 @@ class AutoModel:
|
||||
if pretrained_model_name_or_path is not None:
|
||||
pretrained_model_name_or_dir = pretrained_model_name_or_path
|
||||
|
||||
if not isinstance(pretrained_model_name_or_dir, str):
|
||||
raise TypeError(f"pretrained_model_name_or_dir should be a str,"
|
||||
f" but got {type(pretrained_model_name_or_dir)}")
|
||||
if not check_path_is_valid(pretrained_model_name_or_path):
|
||||
raise ValueError(f"The value of pretrained_model_name_or_path is {pretrained_model_name_or_path}, "
|
||||
f"and it is not valid, please check and reset it.")
|
||||
|
||||
pretrained_model_name_or_dir = os.path.realpath(pretrained_model_name_or_dir)
|
||||
config_args = cls._get_config_args(pretrained_model_name_or_dir, **kwargs)
|
||||
if not download_checkpoint:
|
||||
@@ -526,8 +527,9 @@ class AutoProcessor:
|
||||
if pretrained_model_name_or_path is not None:
|
||||
yaml_name_or_path = pretrained_model_name_or_path
|
||||
|
||||
if not isinstance(yaml_name_or_path, str):
|
||||
raise TypeError(f"yaml_name_or_path should be a str, but got {type(yaml_name_or_path)}")
|
||||
if not check_path_is_valid(yaml_name_or_path):
|
||||
raise ValueError(f"The value of yaml_name_or_path is {yaml_name_or_path}, and it is not valid, "
|
||||
f"please check and reset it.")
|
||||
|
||||
is_exist = os.path.exists(yaml_name_or_path)
|
||||
model_name = yaml_name_or_path.split('/')[cls._model_name].split("_")[cls._model_type] \
|
||||
@@ -716,9 +718,10 @@ class AutoTokenizer:
|
||||
if pretrained_model_name_or_path is not None:
|
||||
yaml_name_or_path = pretrained_model_name_or_path
|
||||
|
||||
from . import MindFormerRegister
|
||||
if not isinstance(yaml_name_or_path, str):
|
||||
raise TypeError(f"yaml_name_or_path should be a str, but got {type(yaml_name_or_path)}")
|
||||
from mindformers import MindFormerRegister
|
||||
if not check_path_is_valid(yaml_name_or_path):
|
||||
raise ValueError(f"The value of yaml_name_or_path is {yaml_name_or_path}, and it is not valid, "
|
||||
f"please check and reset it.")
|
||||
|
||||
# Try to load from the remote
|
||||
if not cls.invalid_yaml_name(yaml_name_or_path):
|
||||
|
||||
@@ -20,22 +20,22 @@ import importlib
|
||||
import inspect
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
|
||||
from .tokenization_auto import AutoTokenizer
|
||||
from ..configuration_utils import PretrainedConfig
|
||||
from ..tokenization_utils_base import TOKENIZER_CONFIG_FILE
|
||||
from .image_processing_auto import AutoImageProcessor
|
||||
from ..image_processing_utils import ImageProcessingMixin
|
||||
from ..processing_utils import ProcessorMixin
|
||||
from ..utils import FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME
|
||||
from .auto_factory import _LazyAutoMapping
|
||||
from .configuration_auto import CONFIG_MAPPING_NAMES, AutoConfig
|
||||
from ...tools.hub.hub import get_file_from_repo
|
||||
from ...tools.generic import experimental_mode_func_checker
|
||||
from ...tools import get_class_from_dynamic_module, resolve_trust_remote_code, logger
|
||||
from ...tools.register.config import MindFormerConfig
|
||||
from ...mindformer_book import MindFormerBook, print_dict
|
||||
from ..build_processor import build_processor
|
||||
from mindformers.tools.utils import check_path_is_valid
|
||||
from mindformers.models.auto.tokenization_auto import AutoTokenizer
|
||||
from mindformers.models.configuration_utils import PretrainedConfig
|
||||
from mindformers.models.tokenization_utils_base import TOKENIZER_CONFIG_FILE
|
||||
from mindformers.models.auto.image_processing_auto import AutoImageProcessor
|
||||
from mindformers.models.image_processing_utils import ImageProcessingMixin
|
||||
from mindformers.models.processing_utils import ProcessorMixin
|
||||
from mindformers.models.utils import FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME
|
||||
from mindformers.models.auto.auto_factory import _LazyAutoMapping
|
||||
from mindformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES, AutoConfig
|
||||
from mindformers.tools.hub.hub import get_file_from_repo
|
||||
from mindformers.tools.generic import experimental_mode_func_checker
|
||||
from mindformers.tools import get_class_from_dynamic_module, resolve_trust_remote_code, logger
|
||||
from mindformers.tools.register.config import MindFormerConfig
|
||||
from mindformers.mindformer_book import MindFormerBook, print_dict
|
||||
from mindformers.models.build_processor import build_processor
|
||||
|
||||
|
||||
EXP_ERROR_MSG = "The input yaml_name_or_path should be a path to yaml file, or a " \
|
||||
@@ -59,6 +59,9 @@ def is_experimental_mode(path):
|
||||
is_exists = os.path.exists(path)
|
||||
is_dir = os.path.isdir(path)
|
||||
if is_exists:
|
||||
if not check_path_is_valid(path):
|
||||
raise ValueError(f"The value of path in AutoProcessor.from_pretrained() is {path}, "
|
||||
f"it is not valid, please check and reset it.")
|
||||
if is_dir:
|
||||
yaml_list = [file
|
||||
for file in os.listdir(path)
|
||||
|
||||
@@ -31,6 +31,7 @@ from mindformers.tools import (
|
||||
extract_commit_hash,
|
||||
)
|
||||
from mindformers.tools.hub import get_class_from_dynamic_module, resolve_trust_remote_code
|
||||
from mindformers.tools.utils import check_path_is_valid
|
||||
from mindformers.utils.import_utils import is_sentencepiece_available, is_tokenizers_available
|
||||
from mindformers.tools.logger import logger
|
||||
from mindformers.models.auto.configuration_auto import (
|
||||
@@ -57,6 +58,9 @@ def is_experimental_mode(path):
|
||||
experimental_mode = False
|
||||
|
||||
is_exist = os.path.exists(path)
|
||||
if is_exist and not check_path_is_valid(path):
|
||||
raise ValueError(f"The value of path in AutoTokenizer.from_pretrained() is {path}, "
|
||||
f"it is not valid, please check and reset it.")
|
||||
is_dir = os.path.isdir(path)
|
||||
if is_dir:
|
||||
yaml_list = [file for file in os.listdir(path) if file.endswith(".yaml")]
|
||||
@@ -286,7 +290,7 @@ class AutoTokenizer:
|
||||
Returns:
|
||||
The class name of the tokenizer in the config yaml.
|
||||
"""
|
||||
from ...tools import MindFormerConfig
|
||||
from mindformers.tools import MindFormerConfig
|
||||
is_exist = os.path.exists(yaml_name_or_path)
|
||||
is_dir = os.path.isdir(yaml_name_or_path)
|
||||
is_file = os.path.isfile(yaml_name_or_path)
|
||||
@@ -346,7 +350,7 @@ class AutoTokenizer:
|
||||
@classmethod
|
||||
def get_class_from_origin_mode(cls, yaml_name_or_path, **kwargs):
|
||||
"""original logic: from yaml."""
|
||||
from ...tools import MindFormerRegister
|
||||
from mindformers import MindFormerRegister
|
||||
|
||||
if not isinstance(yaml_name_or_path, str):
|
||||
raise TypeError(f"yaml_name_or_path should be a str,"
|
||||
|
||||
@@ -22,7 +22,7 @@ import shutil
|
||||
|
||||
import yaml
|
||||
from mindformers.tools.check_rules import check_yaml_depth_before_loading
|
||||
from mindformers.tools.utils import FILE_PERMISSION
|
||||
from mindformers.tools.utils import FILE_PERMISSION, check_path_is_valid
|
||||
from ..mindformer_book import MindFormerBook
|
||||
from ..mindformer_book import print_path_or_list
|
||||
from ..tools.logger import logger
|
||||
@@ -107,9 +107,9 @@ class BaseConfig(dict):
|
||||
if pretrained_model_name_or_path is not None:
|
||||
yaml_name_or_path = pretrained_model_name_or_path
|
||||
|
||||
if not isinstance(yaml_name_or_path, str):
|
||||
raise TypeError(f"yaml_name_or_path should be a str,"
|
||||
f" but got {type(yaml_name_or_path)}.")
|
||||
if not check_path_is_valid(yaml_name_or_path):
|
||||
raise ValueError(f"The value of yaml_name_or_path is {yaml_name_or_path}, and it is not valid, "
|
||||
f"please check and reset it.")
|
||||
|
||||
if os.path.exists(yaml_name_or_path):
|
||||
if not yaml_name_or_path.endswith(".yaml"):
|
||||
@@ -177,9 +177,12 @@ class BaseConfig(dict):
|
||||
if save_directory is None:
|
||||
save_directory = MindFormerBook.get_default_checkpoint_save_folder()
|
||||
|
||||
if not isinstance(save_directory, str) or not isinstance(save_name, str):
|
||||
raise TypeError(f"save_directory and save_name should be a str,"
|
||||
f" but got {type(save_directory)} and {type(save_name)}.")
|
||||
if not check_path_is_valid(save_directory):
|
||||
raise ValueError(f"The value of save_directory is {save_directory}, "
|
||||
f"it is not valid, please check and reset it.")
|
||||
|
||||
if not isinstance(save_name, str):
|
||||
raise TypeError(f"save_name should be a str, but got {type(save_name)}.")
|
||||
|
||||
if not os.path.exists(save_directory):
|
||||
os.makedirs(save_directory, exist_ok=True)
|
||||
|
||||
@@ -24,7 +24,7 @@ from typing import Optional, Union
|
||||
import json
|
||||
import yaml
|
||||
from mindformers.tools.check_rules import check_yaml_depth_before_loading
|
||||
from mindformers.tools.utils import FILE_PERMISSION
|
||||
from mindformers.tools.utils import FILE_PERMISSION, check_path_is_valid
|
||||
from ..mindformer_book import print_path_or_list, MindFormerBook
|
||||
from .build_processor import build_processor
|
||||
from .tokenization_utils import PreTrainedTokenizer
|
||||
@@ -57,6 +57,9 @@ def is_experimental_mode(path):
|
||||
is_exists = os.path.exists(path)
|
||||
is_dir = os.path.isdir(path)
|
||||
if is_exists:
|
||||
if not check_path_is_valid(path):
|
||||
raise ValueError(f"The value of path in ProcessorMixin.from_pretrained() is {path}, "
|
||||
f"it is not valid, please check and reset it.")
|
||||
if is_dir:
|
||||
experimental_mode = True
|
||||
else: # file
|
||||
|
||||
@@ -19,6 +19,7 @@ import os
|
||||
import tempfile
|
||||
|
||||
from contextlib import contextmanager
|
||||
from mindformers.tools.utils import check_path_is_valid
|
||||
|
||||
|
||||
@contextmanager
|
||||
@@ -86,6 +87,9 @@ def is_experimental_mode(path):
|
||||
if not os.path.exists(path) and "/" in path and path.split("/")[0] != "mindspore":
|
||||
experimental_mode = True
|
||||
elif os.path.isdir(path) or is_json_file(path):
|
||||
if not check_path_is_valid(path):
|
||||
raise ValueError(f"The value of path in AutoConfig.from_pretrained() is {path}, "
|
||||
f"it is not valid, please check and reset it.")
|
||||
experimental_mode = True
|
||||
|
||||
return experimental_mode
|
||||
|
||||
@@ -838,3 +838,28 @@ def get_ascend_log_path():
|
||||
if ascend_log_path:
|
||||
return os.path.join(ascend_log_path, 'log')
|
||||
return os.path.join(os.path.expanduser("~"), 'ascend', 'log')
|
||||
|
||||
|
||||
def check_path_is_valid(path):
|
||||
"""check path is or not valid"""
|
||||
from .logger import logger
|
||||
if not path or not isinstance(path, str):
|
||||
logger.warning(f"The input path is not valid because it is {type(path)}")
|
||||
return False
|
||||
|
||||
if ".." in path:
|
||||
logger.warning(f"The input path should not include '..'.")
|
||||
return False
|
||||
|
||||
# pylint: disable=W0703
|
||||
# check whether the path has soft link. If path has soft link, function will return false.
|
||||
try:
|
||||
real_path = os.path.realpath(path)
|
||||
if real_path != os.path.abspath(path):
|
||||
logger.warning(f"The input path should not include symbolic link.")
|
||||
return False
|
||||
|
||||
return True
|
||||
except BaseException as e:
|
||||
logger.warning(f"Error occurs when normalize the input path: {e}.")
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user