mirror of
https://gitee.com/mindspore/mindformers.git
synced 2025-12-06 11:29:59 +08:00
fix file permission
This commit is contained in:
@@ -777,6 +777,7 @@ class TrainingStateMonitor(Callback):
|
||||
os.makedirs(parent_dirs)
|
||||
with open(self.global_norm_record_path, 'w') as file:
|
||||
json.dump(self.abnormal_global_norms, file)
|
||||
set_safe_mode_for_file_or_dir(self.global_norm_record_path)
|
||||
logger.info(f"Current global norm {global_norm} is greater equal than "
|
||||
f"threshold {self.global_norm_spike_threshold}, stop training...")
|
||||
barrier_world()
|
||||
@@ -1393,6 +1394,7 @@ class CheckpointMonitor(ModelCheckpoint):
|
||||
all_step_health_data.append(health_step_data)
|
||||
with open(dump_health_json_path, 'w') as file:
|
||||
json.dump(all_step_health_data, file, indent=4)
|
||||
set_safe_mode_for_file_or_dir(dump_health_json_path)
|
||||
|
||||
if "epoch_num" in self._append_dict:
|
||||
self._append_dict["epoch_num"] = cb_params.cur_epoch_num
|
||||
|
||||
@@ -13,7 +13,7 @@ from mindformers.dataset.blended_datasets.blended_megatron_dataset_config import
|
||||
from mindformers.dataset.blended_datasets.megatron_dataset import MegatronDataset
|
||||
from mindformers.dataset.blended_datasets.utils import normalize
|
||||
from mindformers.tools.logger import logger
|
||||
from mindformers.tools.utils import get_rank_info
|
||||
from mindformers.tools.utils import get_rank_info, set_safe_mode_for_file_or_dir
|
||||
|
||||
|
||||
_VERBOSE = False
|
||||
@@ -159,6 +159,9 @@ class BlendedDataset():
|
||||
# Save the indexes
|
||||
numpy.save(path_to_dataset_index, dataset_index, allow_pickle=False)
|
||||
numpy.save(path_to_dataset_sample_index, dataset_sample_index, allow_pickle=False)
|
||||
set_safe_mode_for_file_or_dir(
|
||||
[path_to_description, path_to_dataset_index, path_to_dataset_sample_index]
|
||||
)
|
||||
else:
|
||||
logger.info(f"Unable to save the {type(self).__name__} indexes because path_to_cache is None")
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from mindformers.dataset.blended_datasets.utils import Split
|
||||
from mindformers.dataset.blended_datasets.utils_s3 import S3Config, is_s3_path
|
||||
from mindformers.models.tokenization_utils_base import PreTrainedTokenizerBase
|
||||
from mindformers.tools.logger import logger
|
||||
from mindformers.tools.utils import get_rank_info
|
||||
from mindformers.tools.utils import get_rank_info, set_safe_mode_for_file_or_dir
|
||||
|
||||
|
||||
_PAD_TOKEN_ID = -1
|
||||
@@ -468,6 +468,12 @@ class GPTDataset(MegatronDataset):
|
||||
numpy.save(path_to_document_index, document_index, allow_pickle=False)
|
||||
numpy.save(path_to_sample_index, sample_index, allow_pickle=False)
|
||||
numpy.save(path_to_shuffle_index, shuffle_index, allow_pickle=False)
|
||||
set_safe_mode_for_file_or_dir([
|
||||
path_to_description,
|
||||
path_to_document_index,
|
||||
path_to_sample_index,
|
||||
path_to_shuffle_index
|
||||
])
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unable to save the {type(self).__name__} indexes because path_to_cache is None",
|
||||
|
||||
@@ -29,6 +29,7 @@ from mindspore.communication import get_rank
|
||||
from mindformers.tools.register.register import MindFormerRegister, MindFormerModuleType
|
||||
from mindformers.tools.logger import logger
|
||||
from mindformers.version_control import get_dataset_map
|
||||
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
|
||||
|
||||
from .dataloader.build_dataloader import build_dataset_loader
|
||||
from .base_dataset import BaseDataset
|
||||
@@ -480,6 +481,7 @@ class TokenCounter:
|
||||
# Clear existing file content
|
||||
with open(filename, 'w', newline='') as csvfile:
|
||||
_ = csv.writer(csvfile)
|
||||
set_safe_mode_for_file_or_dir(filename)
|
||||
|
||||
self.initialized = True
|
||||
self.token_count_pairs_header_written = False
|
||||
|
||||
@@ -33,13 +33,14 @@ import numpy as np
|
||||
import mindspore as ms
|
||||
from mindformers.tools.check_rules import check_yaml_depth_before_loading
|
||||
from mindformers.tools.utils import FILE_PERMISSION
|
||||
from ..tools.logger import logger
|
||||
from ..tools.generic import add_model_info_to_auto_map
|
||||
from ..utils.import_utils import is_tokenizers_available
|
||||
from ..tools.register import MindFormerConfig
|
||||
from .build_tokenizer import build_tokenizer
|
||||
from ..mindformer_book import MindFormerBook, print_path_or_list
|
||||
from ..tools.hub import is_offline_mode, cached_file, extract_commit_hash, custom_object_save, PushToHubMixin
|
||||
from mindformers.tools.logger import logger
|
||||
from mindformers.tools.generic import add_model_info_to_auto_map
|
||||
from mindformers.utils.import_utils import is_tokenizers_available
|
||||
from mindformers.tools.register import MindFormerConfig
|
||||
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
|
||||
from mindformers.models.build_tokenizer import build_tokenizer
|
||||
from mindformers.mindformer_book import MindFormerBook, print_path_or_list
|
||||
from mindformers.tools.hub import is_offline_mode, cached_file, extract_commit_hash, custom_object_save, PushToHubMixin
|
||||
|
||||
|
||||
TOKENIZER_URL_SUPPORT_LIST = MindFormerBook.get_tokenizer_url_support_list()
|
||||
@@ -2583,6 +2584,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
with open(tokenizer_config_file, "w", encoding="utf-8") as f:
|
||||
out_str = json.dumps(tokenizer_config, indent=2, sort_keys=True, ensure_ascii=False) + "\n"
|
||||
f.write(out_str)
|
||||
set_safe_mode_for_file_or_dir(tokenizer_config_file)
|
||||
logger.info(f"tokenizer config file saved in {tokenizer_config_file}")
|
||||
|
||||
# Sanitize AddedTokens in special_tokens_map
|
||||
|
||||
@@ -617,11 +617,14 @@ def remove_folder(folder_path, rank_id=None):
|
||||
|
||||
|
||||
def set_safe_mode_for_file_or_dir(path):
|
||||
path = Path(path)
|
||||
if path.is_dir():
|
||||
path.chmod(DIRECTORY_PERMISSION)
|
||||
if path.is_file():
|
||||
path.chmod(FILE_PERMISSION)
|
||||
if isinstance(path, str):
|
||||
path = [path]
|
||||
for item in path:
|
||||
item = Path(item)
|
||||
if item.is_dir():
|
||||
item.chmod(DIRECTORY_PERMISSION)
|
||||
if item.is_file():
|
||||
item.chmod(FILE_PERMISSION)
|
||||
|
||||
|
||||
def get_epoch_and_step_from_ckpt_name(ckpt_file, ckpt_fmt='ckpt'):
|
||||
|
||||
@@ -26,6 +26,7 @@ from dataclasses import dataclass, field
|
||||
from mindspore import dtype as msdtype
|
||||
from mindspore_gs.ptq import PTQConfig, PTQMode, OutliersSuppressionType, QuantGranularity, PrecisionRecovery
|
||||
from mindspore_gs.common import BackendTarget
|
||||
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
|
||||
|
||||
dtype_map = {"None": None,
|
||||
"bool": msdtype.bool_,
|
||||
@@ -121,6 +122,7 @@ class QuantizationConfigMixin:
|
||||
json_string = json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
|
||||
|
||||
writer.write(json_string)
|
||||
set_safe_mode_for_file_or_dir(json_file_path)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""
|
||||
|
||||
@@ -28,6 +28,7 @@ from safetensors.torch import save_file
|
||||
|
||||
import mindspore as ms
|
||||
from mindspore.ops.operations import Cast
|
||||
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
|
||||
|
||||
ms.set_context(device_target='CPU')
|
||||
cpu_cast = Cast().set_device('CPU')
|
||||
@@ -469,6 +470,7 @@ def ms_ckpt_convertor(input_path, output_path, config):
|
||||
with open(converted_model_index_file, "w") as f:
|
||||
json_string = json.dumps(converted_st_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
|
||||
f.write(json_string)
|
||||
set_safe_mode_for_file_or_dir(converted_model_index_file)
|
||||
|
||||
|
||||
def ms_safetensors_convertor(input_path, output_path, config):
|
||||
@@ -515,6 +517,7 @@ def ms_safetensors_convertor(input_path, output_path, config):
|
||||
with open(converted_model_index_file, "w") as f:
|
||||
json_string = json.dumps(converted_st_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
|
||||
f.write(json_string)
|
||||
set_safe_mode_for_file_or_dir(converted_model_index_file)
|
||||
|
||||
|
||||
def convert_ms_to_pt(input_path, output_path, config=None):
|
||||
|
||||
@@ -29,6 +29,7 @@ import torch
|
||||
from safetensors.torch import load_file
|
||||
|
||||
import mindspore as ms
|
||||
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
|
||||
|
||||
|
||||
dtype_map = {
|
||||
@@ -562,6 +563,7 @@ def ms_safetensors_convertor(input_path, output_path, config):
|
||||
with open(converted_model_index_file, "w") as f:
|
||||
json_string = json.dumps(converted_st_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
|
||||
f.write(json_string)
|
||||
set_safe_mode_for_file_or_dir(converted_model_index_file)
|
||||
|
||||
|
||||
def convert_pt_to_ms(input_path, output_path, config=None):
|
||||
@@ -869,8 +871,10 @@ def infer_trans_ckpt_pt_to_ms(src_hf_dir, dst_ms_dir, worker_num, arg):
|
||||
ms_meta = {}
|
||||
os.makedirs(dst_ms_dir, exist_ok=True)
|
||||
infer_convert_weight(src_hf_dir, dst_ms_dir, worker_num, ms_meta, arg)
|
||||
with open(f"{dst_ms_dir}/param_name_map.json", "w") as fp:
|
||||
path = f"{dst_ms_dir}/param_name_map.json"
|
||||
with open(path, "w") as fp:
|
||||
json.dump(ms_meta, fp, indent=4)
|
||||
set_safe_mode_for_file_or_dir(path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -64,9 +64,8 @@ from multiprocessing import Pool
|
||||
import numpy as np
|
||||
from safetensors import safe_open
|
||||
from safetensors.numpy import save_file
|
||||
# pylint: disable=W0611
|
||||
import mindspore
|
||||
from mindformers.tools.logger import logger
|
||||
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
|
||||
|
||||
|
||||
def process_attention_weights(mode, layer_id, mapping, num_heads, qk_nope_head_dim, qk_rope_head_dim):
|
||||
@@ -242,8 +241,10 @@ if __name__ == "__main__":
|
||||
index_json.update(m)
|
||||
|
||||
logger.info('Saving param_name_map.json')
|
||||
with open(f'{args.output_path}/param_name_map.json', 'w') as f:
|
||||
res_path = f'{args.output_path}/param_name_map.json'
|
||||
with open(res_path, 'w') as f:
|
||||
json.dump(index_json, f, indent=4)
|
||||
set_safe_mode_for_file_or_dir(res_path)
|
||||
logger.info('param_name_map.json is saved')
|
||||
|
||||
end = time()
|
||||
|
||||
@@ -26,6 +26,7 @@ from safetensors.torch import load_file, save_file
|
||||
from tqdm import tqdm
|
||||
|
||||
import torch
|
||||
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
|
||||
|
||||
|
||||
def weight_dequant(weight: torch.Tensor, scale: torch.Tensor, block_size: int = 128) -> torch.Tensor:
|
||||
@@ -123,6 +124,7 @@ def update_model_index(bf16_path, weight_map, fp8_weight_names):
|
||||
new_model_index_file = os.path.join(bf16_path, "model.safetensors.index.json")
|
||||
with open(new_model_index_file, "w") as f:
|
||||
json.dump({"metadata": {}, "weight_map": weight_map}, f, indent=2)
|
||||
set_safe_mode_for_file_or_dir(new_model_index_file)
|
||||
|
||||
|
||||
def main(fp8_path, bf16_path):
|
||||
|
||||
@@ -26,7 +26,7 @@ from mindformers.models.llama.llama_tokenizer import LlamaTokenizer
|
||||
from mindformers.core.context import build_context
|
||||
from mindformers.trainer.utils import load_ckpt
|
||||
from mindformers.tools import get_output_root_path
|
||||
from mindformers.tools.utils import str2bool
|
||||
from mindformers.tools.utils import str2bool, set_safe_mode_for_file_or_dir
|
||||
from research.llm_boost.llm_boost import LlmBoostForCausalLM
|
||||
from research.llm_boost.llm_boost import LlmBoostConfig
|
||||
from research.qwen2.qwen2_tokenizer import Qwen2Tokenizer
|
||||
@@ -127,6 +127,7 @@ def main(
|
||||
print(tokenizer.decode(output))
|
||||
file.write(tokenizer.decode(output) + '\n')
|
||||
file.close()
|
||||
set_safe_mode_for_file_or_dir(save_file)
|
||||
|
||||
else:
|
||||
_framework_profiler_step_start()
|
||||
|
||||
@@ -27,6 +27,7 @@ from mindspore.ops.operations import Cast
|
||||
from safetensors.torch import save_file
|
||||
from mindformers import MindFormerConfig
|
||||
from mindformers.tools.logger import logger
|
||||
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
|
||||
|
||||
ms.set_context(device_target='CPU')
|
||||
cpu_cast = Cast().set_device('CPU')
|
||||
@@ -192,6 +193,7 @@ def layers_model_file_map(file_path):
|
||||
param_name_map = {key: "model.safetensors" for key in weight.keys()}
|
||||
with open(weight_map_file, 'w') as f:
|
||||
json.dump(param_name_map, f, indent=4)
|
||||
set_safe_mode_for_file_or_dir(weight_map_file)
|
||||
else:
|
||||
raise ValueError(f"Cannot find weight map file in path {file_path}")
|
||||
|
||||
@@ -242,6 +244,7 @@ def ms_ckpt_convertor(input_path, output_path, config):
|
||||
with open(converted_model_index_file, "w") as f:
|
||||
json_string = json.dumps(converted_st_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
|
||||
f.write(json_string)
|
||||
set_safe_mode_for_file_or_dir(converted_model_index_file)
|
||||
|
||||
|
||||
def ms_safetensors_convertor(input_path, output_path, config):
|
||||
@@ -284,6 +287,7 @@ def ms_safetensors_convertor(input_path, output_path, config):
|
||||
with open(converted_model_index_file, "w") as f:
|
||||
json_string = json.dumps(converted_st_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
|
||||
f.write(json_string)
|
||||
set_safe_mode_for_file_or_dir(converted_model_index_file)
|
||||
|
||||
|
||||
def convert_ms_to_pt(input_path, output_path, config_path):
|
||||
|
||||
@@ -26,7 +26,7 @@ import numpy as np
|
||||
import mindspore as ms
|
||||
|
||||
from mindformers import MindFormerConfig, MindFormerRegister, MindFormerModuleType
|
||||
from mindformers.tools.utils import str2bool
|
||||
from mindformers.tools.utils import str2bool, set_safe_mode_for_file_or_dir
|
||||
from mindformers.utils.convert_utils import qkv_concat_hf2mg, ffn_concat_hf2mg
|
||||
|
||||
dtype_map = {
|
||||
@@ -174,6 +174,7 @@ def convert_lora_config(input_path):
|
||||
|
||||
with open(config_path, 'w', encoding='utf-8') as file:
|
||||
json.dump(data, file, indent=4)
|
||||
set_safe_mode_for_file_or_dir(config_path)
|
||||
print(f"JSON file modified successfully!")
|
||||
|
||||
except FileNotFoundError:
|
||||
|
||||
@@ -27,6 +27,7 @@ from tqdm import tqdm
|
||||
from safetensors.torch import load_file
|
||||
|
||||
import mindspore as ms
|
||||
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
|
||||
|
||||
|
||||
DTYPE_MAP = {
|
||||
@@ -632,6 +633,7 @@ def ms_safetensors_convertor(input_path, output_path, config):
|
||||
with open(converted_model_index_file, "w") as f:
|
||||
json_string = json.dumps(converted_param_name_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
|
||||
f.write(json_string)
|
||||
set_safe_mode_for_file_or_dir(converted_model_index_file)
|
||||
tqdm.write(f"Param name map is saved into file '{converted_model_index_file}' successfully!")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user