mirror of
https://gitee.com/mindspore/mindformers.git
synced 2025-12-06 11:29:59 +08:00
Compare commits
10 Commits
3d81fa1cb1
...
f271c58cd2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f271c58cd2 | ||
|
|
cb3083aab9 | ||
|
|
656b143f71 | ||
|
|
4e95ab6f51 | ||
|
|
1738940a3d | ||
|
|
0d55e8959d | ||
|
|
695e7a7e2e | ||
|
|
a5baae49c0 | ||
|
|
c7937097f1 | ||
|
|
550c063fd8 |
@@ -25,7 +25,7 @@ from mindformers.tools.logger import logger
|
||||
|
||||
|
||||
_LEVEL_0_TASK_TIME = 0
|
||||
_LEVEL_1_TASK_TIME = 124
|
||||
_LEVEL_1_TASK_TIME = 436
|
||||
_TASK_TYPE = TaskType.FOUR_CARDS_TASK
|
||||
|
||||
|
||||
|
||||
@@ -63,30 +63,6 @@ class TestHelperFunctions(unittest.TestCase):
|
||||
self.assertEqual(loss, 0.8)
|
||||
self.assertFalse(overflow)
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_get_weight_norm(self):
|
||||
"""Test _get_weight_norm function"""
|
||||
# Create mock network
|
||||
mock_network = Mock()
|
||||
param1 = Mock()
|
||||
param1.to.return_value = param1
|
||||
param1.norm.return_value = Tensor(np.array([2.0]))
|
||||
param2 = Mock()
|
||||
param2.to.return_value = param2
|
||||
param2.norm.return_value = Tensor(np.array([3.0]))
|
||||
|
||||
mock_network.trainable_params.return_value = [param1, param2]
|
||||
|
||||
with patch('mindspore.ops.functional.stack') as mock_stack:
|
||||
mock_stack.return_value = Tensor(np.array([3.605551]))
|
||||
|
||||
# pylint: disable=W0212
|
||||
norm = callback_module._get_weight_norm(mock_network)
|
||||
|
||||
self.assertAlmostEqual(norm, 3.605551, places=5)
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
|
||||
@@ -23,7 +23,7 @@ from mindformers.dataset.blended_datasets.blended_megatron_dataset_builder impor
|
||||
_get_size_per_split_per_dataset
|
||||
)
|
||||
from mindformers.dataset.blended_datasets.blended_megatron_dataset_config import BlendedMegatronDatasetConfig
|
||||
from mindformers.dataset.blended_datasets.utils import Split
|
||||
from mindformers.dataset.blended_datasets.utils import Split, compile_helpers
|
||||
|
||||
|
||||
class DummyTokenizer:
|
||||
@@ -522,6 +522,7 @@ class TestBlendedMegatronDatasetBuilder:
|
||||
Description: Test build method works with blend configuration having weights and size
|
||||
Expectation: Method builds datasets correctly with weights processing
|
||||
"""
|
||||
compile_helpers()
|
||||
config = create_test_config()
|
||||
config.mock = False
|
||||
config.blend = (["prefix1", "prefix2"], [0.3, 0.7])
|
||||
@@ -645,6 +646,7 @@ class TestBlendedMegatronDatasetBuilder:
|
||||
Description: Test parallel building of megatron datasets
|
||||
Expectation: Method builds datasets in parallel correctly
|
||||
"""
|
||||
compile_helpers()
|
||||
config = create_test_config()
|
||||
config.mock = False
|
||||
config.blend = (["prefix1", "prefix2"], [0.5, 0.5])
|
||||
|
||||
41
tests/st/test_ut/test_generation/qwen3_0_6b_infer.yaml
Normal file
41
tests/st/test_ut/test_generation/qwen3_0_6b_infer.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
seed: 0
|
||||
output_dir: './output' # path to save checkpoint/strategy
|
||||
load_checkpoint: ''
|
||||
use_parallel: False
|
||||
run_mode: 'predict'
|
||||
use_legacy: False
|
||||
load_ckpt_format: 'safetensors'
|
||||
|
||||
trainer:
|
||||
type: CausalLanguageModelingTrainer
|
||||
model_name: 'qwen3'
|
||||
|
||||
# default parallel of device num = 8 for Atlas 800T A2
|
||||
parallel_config:
|
||||
data_parallel: 1
|
||||
model_parallel: 1
|
||||
pretrained_model_dir: '/home/workspace/mindspore_dataset/weight/Qwen3-0.6B'
|
||||
generation:
|
||||
max_length: 128
|
||||
model:
|
||||
model_config:
|
||||
compute_dtype: "bfloat16"
|
||||
layernorm_compute_dtype: "float32"
|
||||
softmax_compute_dtype: "float32"
|
||||
rotary_dtype: "bfloat16"
|
||||
params_dtype: "bfloat16"
|
||||
|
||||
# mindspore context init config
|
||||
context:
|
||||
mode: 0 #0--Graph Mode; 1--Pynative Mode
|
||||
enable_graph_kernel: False
|
||||
ascend_config:
|
||||
precision_mode: "must_keep_origin_dtype"
|
||||
max_device_memory: "29GB"
|
||||
save_graphs: False
|
||||
save_graphs_path: "./graph"
|
||||
|
||||
# parallel context config
|
||||
parallel:
|
||||
parallel_mode: "MANUAL_PARALLEL"
|
||||
enable_alltoall: False
|
||||
286
tests/st/test_ut/test_generation/test_parallel_decoding.py
Normal file
286
tests/st/test_ut/test_generation/test_parallel_decoding.py
Normal file
@@ -0,0 +1,286 @@
|
||||
# Copyright 2025 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Test parallel decoding"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore as ms
|
||||
from mindspore import Tensor
|
||||
|
||||
from mindformers.generation.parallel_decoding import (
|
||||
_logits_process,
|
||||
_pre_process,
|
||||
_la_logits_process,
|
||||
_la_pre_process,
|
||||
_memory_decoding_pre_process,
|
||||
_prefix_cache_pre_process,
|
||||
parallel_decoding_control,
|
||||
parallel_decoding_logits_process,
|
||||
_construct_mask,
|
||||
_parallel_decoding_pad,
|
||||
_parallel_decoding_pad_2d_tensor
|
||||
)
|
||||
|
||||
|
||||
class MockConfig:
|
||||
def __init__(self, parallel_decoding=None):
|
||||
if parallel_decoding:
|
||||
self.parallel_decoding_params = {"parallel_decoding": parallel_decoding}
|
||||
else:
|
||||
self.parallel_decoding_params = None
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_register_decorators():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
assert 'la' in _logits_process
|
||||
assert 'la' in _pre_process
|
||||
assert 'memory_decoding' in _pre_process
|
||||
assert 'prefix_cache' in _pre_process
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_construct_mask():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
q_seq_lens = [2, 3]
|
||||
mask = _construct_mask(q_seq_lens)
|
||||
expected = np.array([
|
||||
[-0, 1, 1, 1, 1],
|
||||
[-0, -0, 1, 1, 1],
|
||||
[1, 1, -0, 1, 1],
|
||||
[1, 1, -0, -0, 1],
|
||||
[1, 1, -0, -0, -0]
|
||||
], dtype=np.float16)
|
||||
np.testing.assert_array_equal(mask, expected)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_parallel_decoding_pad():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
arr = np.array([1, 2, 3])
|
||||
padded = _parallel_decoding_pad(arr, axis=0, pad_len=5, value=-1)
|
||||
expected = np.array([1, 2, 3, -1, -1])
|
||||
np.testing.assert_array_equal(padded, expected)
|
||||
|
||||
# pad_len < current len → no change
|
||||
same = _parallel_decoding_pad(arr, axis=0, pad_len=2, value=-1)
|
||||
np.testing.assert_array_equal(same, arr)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_parallel_decoding_pad_2d_tensor():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
inputs = np.array([1, 2, 3, 4, 5, 6])
|
||||
lens = [2, 3]
|
||||
padded = _parallel_decoding_pad_2d_tensor(inputs, pad_seq_len=4, lens=lens, value=-1)
|
||||
expected = np.array([
|
||||
[1, 2, -1, -1],
|
||||
[3, 4, 5, -1]
|
||||
])
|
||||
np.testing.assert_array_equal(padded, expected)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_la_logits_process_simple():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
logits = Tensor(np.random.rand(4, 100), ms.float32)
|
||||
result = _la_logits_process(logits, None, None, False)
|
||||
assert result.shape == (4, 100)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_la_logits_process_with_q_seq_lens():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
logits = Tensor(np.random.rand(6, 100), ms.float32) # batch=2, max_seq=3
|
||||
q_seq_lens = [2, 3]
|
||||
block_tables = [[1, 2], [3, 4]]
|
||||
result = _la_logits_process(logits, q_seq_lens, block_tables, prefill=True)
|
||||
assert result.shape == (2, 100) # last token of each seq
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_la_pre_process_normal():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
config = MockConfig("la")
|
||||
input_ids = Tensor([[1, 2, 3]], ms.int32)
|
||||
model_inputs = {}
|
||||
block_tables = np.array([[10, 11]])
|
||||
slot_mapping = np.array([0, 1, 2])
|
||||
q_seq_lens = [3]
|
||||
|
||||
out_model_inputs, out_block, out_slot = _la_pre_process(
|
||||
config, input_ids, model_inputs,
|
||||
block_tables=block_tables,
|
||||
slot_mapping=slot_mapping,
|
||||
q_seq_lens=q_seq_lens
|
||||
)
|
||||
|
||||
assert isinstance(out_model_inputs['input_ids'], Tensor)
|
||||
assert out_model_inputs['input_ids'].shape == (1, 3)
|
||||
assert out_model_inputs['q_seq_lens'].shape == (1,)
|
||||
assert np.array_equal(out_block, block_tables.astype(np.int32))
|
||||
assert np.array_equal(out_slot, slot_mapping.astype(np.int32))
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_la_pre_process_with_max_padding():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
config = MockConfig("la")
|
||||
input_ids = Tensor([[1, 2, 0, 0, 3, 4, 0, 0]], ms.int32) # shape (1,8), max_len=4, two seqs
|
||||
model_inputs = {}
|
||||
block_tables = np.array([[1, 2], [3, 4]])
|
||||
slot_mapping = np.array([0, 1, 0, 0, 2, 3, 0, 0])
|
||||
q_seq_lens = [2, 2] # each seq has 2 real tokens
|
||||
|
||||
out_model_inputs, _, _ = _la_pre_process(
|
||||
config, input_ids, model_inputs,
|
||||
block_tables=block_tables,
|
||||
slot_mapping=slot_mapping,
|
||||
q_seq_lens=q_seq_lens
|
||||
)
|
||||
|
||||
# Should extract [1,2,3,4] → shape (1,4)
|
||||
assert out_model_inputs['input_ids'].shape == (1, 4)
|
||||
expected_ids = np.array([[1, 2, 0, 0]])
|
||||
np.testing.assert_array_equal(out_model_inputs['input_ids'].asnumpy(), expected_ids)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_la_pre_process_no_q_seq_lens():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
config = MockConfig("la")
|
||||
input_ids = Tensor([[1, 2, 3]], ms.int32)
|
||||
model_inputs = {}
|
||||
block_tables = np.array([[10, 11]])
|
||||
slot_mapping = np.array([0, 1, 2])
|
||||
|
||||
out_model_inputs, _, _ = _la_pre_process(
|
||||
config, input_ids, model_inputs,
|
||||
block_tables=block_tables,
|
||||
slot_mapping=slot_mapping,
|
||||
q_seq_lens=None,
|
||||
valid_length_each_example=[3]
|
||||
)
|
||||
|
||||
assert out_model_inputs['q_seq_lens'].shape == (1,)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_memory_and_prefix_preprocess():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
config = MockConfig("memory_decoding")
|
||||
input_ids = Tensor([], ms.int32)
|
||||
model_inputs = {}
|
||||
block_tables = np.array([0])
|
||||
slot_mapping = np.array([0])
|
||||
|
||||
out1 = _memory_decoding_pre_process(config, input_ids, model_inputs,
|
||||
block_tables=block_tables, slot_mapping=slot_mapping)
|
||||
out2 = _prefix_cache_pre_process(config, input_ids, model_inputs,
|
||||
block_tables=block_tables, slot_mapping=slot_mapping)
|
||||
|
||||
assert np.array_equal(out1[1], block_tables.astype(np.int32))
|
||||
assert np.array_equal(out2[2], slot_mapping.astype(np.int32))
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_parallel_decoding_control():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
assert parallel_decoding_control(MockConfig("la")) is True
|
||||
assert parallel_decoding_control(MockConfig("memory_decoding")) is True
|
||||
assert parallel_decoding_control(MockConfig("prefix_cache")) is True
|
||||
assert parallel_decoding_control(MockConfig("invalid")) is False
|
||||
assert parallel_decoding_control(MockConfig(None)) is False
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_parallel_decoding_logits_process():
|
||||
"""
|
||||
Feature: parallel decoding.
|
||||
Description: test a function in parallel decoding.
|
||||
Expectation: success.
|
||||
"""
|
||||
config = MockConfig("la")
|
||||
logits = Tensor(np.random.rand(2, 100), ms.float32)
|
||||
result = parallel_decoding_logits_process(config, logits, None, None, False)
|
||||
assert result.shape == (2, 100)
|
||||
56
tests/st/test_ut/test_generation/test_streamer.py
Normal file
56
tests/st/test_ut/test_generation/test_streamer.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# Copyright 2025 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Test stremer inference"""
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from mindspore.nn.utils import no_init_parameters
|
||||
|
||||
from mindformers import AutoModel, build_context, MindFormerConfig
|
||||
from mindformers import pipeline, TextStreamer, TextIteratorStreamer
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend910b_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_streamer():
|
||||
"""
|
||||
Feature: Streamer inference.
|
||||
Description: Test streamer inference.
|
||||
Expectation: Success.
|
||||
"""
|
||||
config_path = os.path.join(os.path.dirname(__file__), "qwen3_0_6b_infer.yaml")
|
||||
config = MindFormerConfig(config_path)
|
||||
config.use_parallel = False
|
||||
config.parallel_config.model_parallel = 1
|
||||
build_context(config)
|
||||
|
||||
inputs = ["I love Beijing, because", "请介绍北京", "生成以换行符结尾的句子"]
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(config.pretrained_model_dir, trust_remote_code=True)
|
||||
|
||||
with no_init_parameters():
|
||||
network = AutoModel.from_config(config)
|
||||
network.load_weights(config.pretrained_model_dir)
|
||||
|
||||
streamer = TextStreamer(tokenizer)
|
||||
text_generation_pipeline = pipeline(task="text_generation", model=network, tokenizer=tokenizer, streamer=streamer)
|
||||
_ = text_generation_pipeline(inputs, max_length=64, do_sample=False, top_k=3, top_p=1)
|
||||
|
||||
streamer = TextIteratorStreamer(tokenizer)
|
||||
text_generation_pipeline = pipeline(task="text_generation", model=network, tokenizer=tokenizer, streamer=streamer)
|
||||
_ = text_generation_pipeline(inputs, max_length=64, do_sample=False, top_k=3, top_p=1)
|
||||
1216
tests/st/test_ut/test_models/test_modeling_utils.py
Normal file
1216
tests/st/test_ut/test_models/test_modeling_utils.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -518,6 +518,9 @@ class TestTrainerCheckpointMethods(unittest.TestCase):
|
||||
f.write('mock')
|
||||
os.stat(last_checkpoint_path)
|
||||
|
||||
os.utime(last_checkpoint_path, (os.path.getatime(last_checkpoint_path) + 1,
|
||||
os.path.getmtime(last_checkpoint_path) + 1))
|
||||
|
||||
trainer._check_checkpoint_config(True)
|
||||
assert trainer.config.model.model_config.checkpoint_name_or_path == last_checkpoint_path
|
||||
|
||||
|
||||
Reference in New Issue
Block a user