fix file permission

This commit is contained in:
zyw_hw
2025-07-21 17:02:50 +08:00
parent e6a2e190d5
commit 3704bc1c6a
15 changed files with 58 additions and 20 deletions

View File

@@ -777,6 +777,7 @@ class TrainingStateMonitor(Callback):
os.makedirs(parent_dirs)
with open(self.global_norm_record_path, 'w') as file:
json.dump(self.abnormal_global_norms, file)
set_safe_mode_for_file_or_dir(self.global_norm_record_path)
logger.info(f"Current global norm {global_norm} is greater equal than "
f"threshold {self.global_norm_spike_threshold}, stop training...")
barrier_world()
@@ -1393,6 +1394,7 @@ class CheckpointMonitor(ModelCheckpoint):
all_step_health_data.append(health_step_data)
with open(dump_health_json_path, 'w') as file:
json.dump(all_step_health_data, file, indent=4)
set_safe_mode_for_file_or_dir(dump_health_json_path)
if "epoch_num" in self._append_dict:
self._append_dict["epoch_num"] = cb_params.cur_epoch_num

View File

@@ -13,7 +13,7 @@ from mindformers.dataset.blended_datasets.blended_megatron_dataset_config import
from mindformers.dataset.blended_datasets.megatron_dataset import MegatronDataset
from mindformers.dataset.blended_datasets.utils import normalize
from mindformers.tools.logger import logger
from mindformers.tools.utils import get_rank_info
from mindformers.tools.utils import get_rank_info, set_safe_mode_for_file_or_dir
_VERBOSE = False
@@ -159,6 +159,9 @@ class BlendedDataset():
# Save the indexes
numpy.save(path_to_dataset_index, dataset_index, allow_pickle=False)
numpy.save(path_to_dataset_sample_index, dataset_sample_index, allow_pickle=False)
set_safe_mode_for_file_or_dir(
[path_to_description, path_to_dataset_index, path_to_dataset_sample_index]
)
else:
logger.info(f"Unable to save the {type(self).__name__} indexes because path_to_cache is None")

View File

@@ -14,7 +14,7 @@ from mindformers.dataset.blended_datasets.utils import Split
from mindformers.dataset.blended_datasets.utils_s3 import S3Config, is_s3_path
from mindformers.models.tokenization_utils_base import PreTrainedTokenizerBase
from mindformers.tools.logger import logger
from mindformers.tools.utils import get_rank_info
from mindformers.tools.utils import get_rank_info, set_safe_mode_for_file_or_dir
_PAD_TOKEN_ID = -1
@@ -468,6 +468,12 @@ class GPTDataset(MegatronDataset):
numpy.save(path_to_document_index, document_index, allow_pickle=False)
numpy.save(path_to_sample_index, sample_index, allow_pickle=False)
numpy.save(path_to_shuffle_index, shuffle_index, allow_pickle=False)
set_safe_mode_for_file_or_dir([
path_to_description,
path_to_document_index,
path_to_sample_index,
path_to_shuffle_index
])
else:
logger.warning(
f"Unable to save the {type(self).__name__} indexes because path_to_cache is None",

View File

@@ -29,6 +29,7 @@ from mindspore.communication import get_rank
from mindformers.tools.register.register import MindFormerRegister, MindFormerModuleType
from mindformers.tools.logger import logger
from mindformers.version_control import get_dataset_map
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
from .dataloader.build_dataloader import build_dataset_loader
from .base_dataset import BaseDataset
@@ -480,6 +481,7 @@ class TokenCounter:
# Clear existing file content
with open(filename, 'w', newline='') as csvfile:
_ = csv.writer(csvfile)
set_safe_mode_for_file_or_dir(filename)
self.initialized = True
self.token_count_pairs_header_written = False

View File

@@ -33,13 +33,14 @@ import numpy as np
import mindspore as ms
from mindformers.tools.check_rules import check_yaml_depth_before_loading
from mindformers.tools.utils import FILE_PERMISSION
from ..tools.logger import logger
from ..tools.generic import add_model_info_to_auto_map
from ..utils.import_utils import is_tokenizers_available
from ..tools.register import MindFormerConfig
from .build_tokenizer import build_tokenizer
from ..mindformer_book import MindFormerBook, print_path_or_list
from ..tools.hub import is_offline_mode, cached_file, extract_commit_hash, custom_object_save, PushToHubMixin
from mindformers.tools.logger import logger
from mindformers.tools.generic import add_model_info_to_auto_map
from mindformers.utils.import_utils import is_tokenizers_available
from mindformers.tools.register import MindFormerConfig
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
from mindformers.models.build_tokenizer import build_tokenizer
from mindformers.mindformer_book import MindFormerBook, print_path_or_list
from mindformers.tools.hub import is_offline_mode, cached_file, extract_commit_hash, custom_object_save, PushToHubMixin
TOKENIZER_URL_SUPPORT_LIST = MindFormerBook.get_tokenizer_url_support_list()
@@ -2583,6 +2584,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
with open(tokenizer_config_file, "w", encoding="utf-8") as f:
out_str = json.dumps(tokenizer_config, indent=2, sort_keys=True, ensure_ascii=False) + "\n"
f.write(out_str)
set_safe_mode_for_file_or_dir(tokenizer_config_file)
logger.info(f"tokenizer config file saved in {tokenizer_config_file}")
# Sanitize AddedTokens in special_tokens_map

View File

@@ -617,11 +617,14 @@ def remove_folder(folder_path, rank_id=None):
def set_safe_mode_for_file_or_dir(path):
path = Path(path)
if path.is_dir():
path.chmod(DIRECTORY_PERMISSION)
if path.is_file():
path.chmod(FILE_PERMISSION)
if isinstance(path, str):
path = [path]
for item in path:
item = Path(item)
if item.is_dir():
item.chmod(DIRECTORY_PERMISSION)
if item.is_file():
item.chmod(FILE_PERMISSION)
def get_epoch_and_step_from_ckpt_name(ckpt_file, ckpt_fmt='ckpt'):

View File

@@ -26,6 +26,7 @@ from dataclasses import dataclass, field
from mindspore import dtype as msdtype
from mindspore_gs.ptq import PTQConfig, PTQMode, OutliersSuppressionType, QuantGranularity, PrecisionRecovery
from mindspore_gs.common import BackendTarget
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
dtype_map = {"None": None,
"bool": msdtype.bool_,
@@ -121,6 +122,7 @@ class QuantizationConfigMixin:
json_string = json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
writer.write(json_string)
set_safe_mode_for_file_or_dir(json_file_path)
def to_dict(self) -> Dict[str, Any]:
"""

View File

@@ -28,6 +28,7 @@ from safetensors.torch import save_file
import mindspore as ms
from mindspore.ops.operations import Cast
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
ms.set_context(device_target='CPU')
cpu_cast = Cast().set_device('CPU')
@@ -469,6 +470,7 @@ def ms_ckpt_convertor(input_path, output_path, config):
with open(converted_model_index_file, "w") as f:
json_string = json.dumps(converted_st_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
f.write(json_string)
set_safe_mode_for_file_or_dir(converted_model_index_file)
def ms_safetensors_convertor(input_path, output_path, config):
@@ -515,6 +517,7 @@ def ms_safetensors_convertor(input_path, output_path, config):
with open(converted_model_index_file, "w") as f:
json_string = json.dumps(converted_st_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
f.write(json_string)
set_safe_mode_for_file_or_dir(converted_model_index_file)
def convert_ms_to_pt(input_path, output_path, config=None):

View File

@@ -29,6 +29,7 @@ import torch
from safetensors.torch import load_file
import mindspore as ms
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
dtype_map = {
@@ -562,6 +563,7 @@ def ms_safetensors_convertor(input_path, output_path, config):
with open(converted_model_index_file, "w") as f:
json_string = json.dumps(converted_st_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
f.write(json_string)
set_safe_mode_for_file_or_dir(converted_model_index_file)
def convert_pt_to_ms(input_path, output_path, config=None):
@@ -869,8 +871,10 @@ def infer_trans_ckpt_pt_to_ms(src_hf_dir, dst_ms_dir, worker_num, arg):
ms_meta = {}
os.makedirs(dst_ms_dir, exist_ok=True)
infer_convert_weight(src_hf_dir, dst_ms_dir, worker_num, ms_meta, arg)
with open(f"{dst_ms_dir}/param_name_map.json", "w") as fp:
path = f"{dst_ms_dir}/param_name_map.json"
with open(path, "w") as fp:
json.dump(ms_meta, fp, indent=4)
set_safe_mode_for_file_or_dir(path)
if __name__ == "__main__":

View File

@@ -64,9 +64,8 @@ from multiprocessing import Pool
import numpy as np
from safetensors import safe_open
from safetensors.numpy import save_file
# pylint: disable=W0611
import mindspore
from mindformers.tools.logger import logger
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
def process_attention_weights(mode, layer_id, mapping, num_heads, qk_nope_head_dim, qk_rope_head_dim):
@@ -242,8 +241,10 @@ if __name__ == "__main__":
index_json.update(m)
logger.info('Saving param_name_map.json')
with open(f'{args.output_path}/param_name_map.json', 'w') as f:
res_path = f'{args.output_path}/param_name_map.json'
with open(res_path, 'w') as f:
json.dump(index_json, f, indent=4)
set_safe_mode_for_file_or_dir(res_path)
logger.info('param_name_map.json is saved')
end = time()

View File

@@ -26,6 +26,7 @@ from safetensors.torch import load_file, save_file
from tqdm import tqdm
import torch
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
def weight_dequant(weight: torch.Tensor, scale: torch.Tensor, block_size: int = 128) -> torch.Tensor:
@@ -123,6 +124,7 @@ def update_model_index(bf16_path, weight_map, fp8_weight_names):
new_model_index_file = os.path.join(bf16_path, "model.safetensors.index.json")
with open(new_model_index_file, "w") as f:
json.dump({"metadata": {}, "weight_map": weight_map}, f, indent=2)
set_safe_mode_for_file_or_dir(new_model_index_file)
def main(fp8_path, bf16_path):

View File

@@ -26,7 +26,7 @@ from mindformers.models.llama.llama_tokenizer import LlamaTokenizer
from mindformers.core.context import build_context
from mindformers.trainer.utils import load_ckpt
from mindformers.tools import get_output_root_path
from mindformers.tools.utils import str2bool
from mindformers.tools.utils import str2bool, set_safe_mode_for_file_or_dir
from research.llm_boost.llm_boost import LlmBoostForCausalLM
from research.llm_boost.llm_boost import LlmBoostConfig
from research.qwen2.qwen2_tokenizer import Qwen2Tokenizer
@@ -127,6 +127,7 @@ def main(
print(tokenizer.decode(output))
file.write(tokenizer.decode(output) + '\n')
file.close()
set_safe_mode_for_file_or_dir(save_file)
else:
_framework_profiler_step_start()

View File

@@ -27,6 +27,7 @@ from mindspore.ops.operations import Cast
from safetensors.torch import save_file
from mindformers import MindFormerConfig
from mindformers.tools.logger import logger
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
ms.set_context(device_target='CPU')
cpu_cast = Cast().set_device('CPU')
@@ -192,6 +193,7 @@ def layers_model_file_map(file_path):
param_name_map = {key: "model.safetensors" for key in weight.keys()}
with open(weight_map_file, 'w') as f:
json.dump(param_name_map, f, indent=4)
set_safe_mode_for_file_or_dir(weight_map_file)
else:
raise ValueError(f"Cannot find weight map file in path {file_path}")
@@ -242,6 +244,7 @@ def ms_ckpt_convertor(input_path, output_path, config):
with open(converted_model_index_file, "w") as f:
json_string = json.dumps(converted_st_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
f.write(json_string)
set_safe_mode_for_file_or_dir(converted_model_index_file)
def ms_safetensors_convertor(input_path, output_path, config):
@@ -284,6 +287,7 @@ def ms_safetensors_convertor(input_path, output_path, config):
with open(converted_model_index_file, "w") as f:
json_string = json.dumps(converted_st_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
f.write(json_string)
set_safe_mode_for_file_or_dir(converted_model_index_file)
def convert_ms_to_pt(input_path, output_path, config_path):

View File

@@ -26,7 +26,7 @@ import numpy as np
import mindspore as ms
from mindformers import MindFormerConfig, MindFormerRegister, MindFormerModuleType
from mindformers.tools.utils import str2bool
from mindformers.tools.utils import str2bool, set_safe_mode_for_file_or_dir
from mindformers.utils.convert_utils import qkv_concat_hf2mg, ffn_concat_hf2mg
dtype_map = {
@@ -174,6 +174,7 @@ def convert_lora_config(input_path):
with open(config_path, 'w', encoding='utf-8') as file:
json.dump(data, file, indent=4)
set_safe_mode_for_file_or_dir(config_path)
print(f"JSON file modified successfully!")
except FileNotFoundError:

View File

@@ -27,6 +27,7 @@ from tqdm import tqdm
from safetensors.torch import load_file
import mindspore as ms
from mindformers.tools.utils import set_safe_mode_for_file_or_dir
DTYPE_MAP = {
@@ -632,6 +633,7 @@ def ms_safetensors_convertor(input_path, output_path, config):
with open(converted_model_index_file, "w") as f:
json_string = json.dumps(converted_param_name_map, default=lambda x: x.__dict__, sort_keys=False, indent=2)
f.write(json_string)
set_safe_mode_for_file_or_dir(converted_model_index_file)
tqdm.write(f"Param name map is saved into file '{converted_model_index_file}' successfully!")