diff --git a/docs/api/api_python/mindarmour.fuzz_testing.rst b/docs/api/api_python/mindarmour.fuzz_testing.rst index e597704..1b22814 100644 --- a/docs/api/api_python/mindarmour.fuzz_testing.rst +++ b/docs/api/api_python/mindarmour.fuzz_testing.rst @@ -59,6 +59,65 @@ mindarmour.fuzz_testing - **ValueError** - 初始种子队列为空。 - **ValueError** - `initial_seeds` 中的种子未包含两个元素。 +.. py:class:: mindarmour.fuzz_testing.SensitivityMaximizingFuzzer(target_model) + + 深度神经网络的模糊测试框架。 + + 参考文献:`https://huangd1999.github.io/Themis__Sensitivity\ + _Testing_for_Deep_Learning_System.pdf\ + `_。 + + 参数: + - **target_model** (Model) - 目标模糊模型。 + + .. py:method:: fuzzing(mutate_config, initial_seeds, coverage, evaluate=True, max_iters=1000, mutate_num_per_seed=20) + + 深度神经网络的模糊测试。 + + 参数: + - **mutate_config** (list) - 变异方法配置。格式为: + + .. code-block:: python + + mutate_config = [ + {'method': 'GaussianBlur', + 'params': {'ksize': [1, 2, 3, 5], 'auto_param': [True, False]}}, + {'method': 'UniformNoise', + 'params': {'factor': [0.1, 0.2, 0.3], 'auto_param': [False, True]}}, + {'method': 'GaussianNoise', + 'params': {'factor': [0.1, 0.2, 0.3], 'auto_param': [False, True]}}, + {'method': 'Contrast', + 'params': {'alpha': [0.5, 1, 1.5], 'beta': [-10, 0, 10], 'auto_param': [False, True]}}, + {'method': 'Rotate', + 'params': {'angle': [20, 90], 'auto_param': [False, True]}}, + {'method': 'FGSM', + 'params': {'eps': [0.3, 0.2, 0.4], 'alpha': [0.1], 'bounds': [(0, 1)]}} + ...] + + - 支持的方法在列表 `self._strategies` 中,每个方法的参数必须在可选参数的范围内。支持的方法分为两种类型: + - 首先,自然鲁棒性方法包括:'Translate'、'Scale'、'Shear'、'Rotate'、'Perspective'、'Curve'、'GaussianBlur'、'MotionBlur'、'GradientBlur'、'Contrast'、'GradientLuminance'、'UniformNoise'、'GaussianNoise'、'SaltAndPepperNoise'、'NaturalNoise'。 + - 其次,对抗样本攻击方式包括:'FGSM'、'PGD'和'MDIM'。'FGSM'、'PGD'和'MDIM'分别是 FastGradientSignMethod、ProjectedGradientDent和MomentumDiverseInputIterativeMethod的缩写。 `mutate_config` 必须包含在['Contrast', 'GradientLuminance', 'GaussianBlur', 'MotionBlur', 'GradientBlur', 'UniformNoise', 'GaussianNoise', 'SaltAndPepperNoise', 'NaturalNoise']中的方法。 + + - 第一类方法的参数设置方式可以在'mindarmour/natural_robustness/transform/image'中看到。第二类方法参数配置参考 `self._attack_param_checklists` 。 + - **initial_seeds** (list[list]) - 用于生成变异样本的初始种子队列。初始种子队列的格式为[[image_data, label], [...], ...],且标签必须为one-hot。 + - **coverage** (CoverageMetrics) - 神经元覆盖率指标类。 + - **evaluate** (bool) - 是否返回评估报告。默认值:``True``。 + - **max_iters** (int) - 选择要变异的种子的最大数量。默认值:``1000``。 + - **mutate_num_per_seed** (int) - 每个种子的最大变异次数。默认值:``20``。 + + 返回: + - **list** - 模糊测试生成的变异样本。 + - **list** - 变异样本的ground truth标签。 + - **list** - 预测结果。 + - **list** - 变异策略。 + - **dict** - Fuzzer的指标报告。 + + 异常: + - **ValueError** - 参数 `coverage` 必须是CoverageMetrics的子类。 + - **ValueError** - 初始种子队列为空。 + - **ValueError** - `initial_seeds` 中的种子未包含两个元素。 + .. py:class:: mindarmour.fuzz_testing.CoverageMetrics(model, incremental=False, batch_size=32) 计算覆盖指标的神经元覆盖类的抽象基类。 @@ -184,3 +243,25 @@ mindarmour.fuzz_testing 返回: - **float** - 'k-multisection neuron coverage'的指标。 + +.. py:class:: mindarmour.fuzz_testing.SensitivityConvergenceCoverage(model, threshold=0.5, incremental=False, batch_size=32, selected_neurons_num=100, n_iter=1000) + + 获取神经元收敛覆盖率的指标。SCC度量神经元输出变化值收敛为正态分布的比例。 + + 参数: + - **model** (Model) - 等待测试的预训练模型。 + - **threshold** (float) - 神经元收敛阈值。默认值:``0.5``。 + - **incremental** (bool) - 指标将以增量方式计算。默认值:``False``。 + - **batch_size** (int) - 模糊测试批次中的样本数。默认值:``32``。 + - **selected_neurons_num** (int) - 模糊测试时所选取的神经元数量。默认值:``100``。 + - **n_iter** (int) - 模糊测试时最大测试次数。默认值:``1000``。 + + .. py:method:: get_metrics(dataset) + + 获取'neuron convergence coverage'的指标。 + + 参数: + - **dataset** (numpy.ndarray) - 用于计算覆盖指标的数据集。 + + 返回: + - **float** - 'neuron convergence coverage'的指标。 diff --git a/examples/ai_fuzzer/lenet5_mnist_scc.py b/examples/ai_fuzzer/lenet5_mnist_scc.py index 848a469..8f26cfc 100644 --- a/examples/ai_fuzzer/lenet5_mnist_scc.py +++ b/examples/ai_fuzzer/lenet5_mnist_scc.py @@ -25,8 +25,8 @@ from mindspore.dataset import MnistDataset from mindspore.common.initializer import TruncatedNormal from mindspore.ops import operations as P from mindspore.ops import TensorSummary -from fuzzing import SensitivityMaximizingFuzzer -from sensitivity_convergence_coverage import SensitivityConvergenceCoverage +from mindarmour.fuzz_testing.fuzzing import SensitivityMaximizingFuzzer +from mindarmour.fuzz_testing.sensitivity_convergence_coverage import SensitivityConvergenceCoverage def datapipe(path): @@ -102,6 +102,7 @@ mutate_config = [{'method': 'GaussianBlur', # make initial seeds test_dataset = datapipe('MNIST_Data/test') + for data, label in test_dataset.create_tuple_iterator(): initial_data = data initial_label = label @@ -117,7 +118,10 @@ for img, label in zip(initial_data, initial_label): label_array = np.array([0 if i != label else 1 for i in range(10)]) initial_seeds.append([np.array(img).astype(np.float32), label_array.astype(np.float32)]) -SCC = SensitivityConvergenceCoverage(model, batch_size=200) +SCC = SensitivityConvergenceCoverage(model, batch_size=32) + +print("SCC.get_metrics(initial_data)", SCC.get_metrics(initial_data)) + model_fuzz_test = SensitivityMaximizingFuzzer(model) samples, gt_labels, preds, strategies, metrics = model_fuzz_test.fuzzing( mutate_config, initial_seeds, SCC, max_iters=10) diff --git a/mindarmour/fuzz_testing/__init__.py b/mindarmour/fuzz_testing/__init__.py index 7e31958..50e0485 100644 --- a/mindarmour/fuzz_testing/__init__.py +++ b/mindarmour/fuzz_testing/__init__.py @@ -15,7 +15,7 @@ This module provides a neuron coverage-gain based fuzz method to evaluate the robustness of given model. """ -from .fuzzing import Fuzzer +from .fuzzing import Fuzzer, SensitivityMaximizingFuzzer from .model_coverage_metrics import CoverageMetrics, NeuronCoverage, TopKNeuronCoverage, NeuronBoundsCoverage, \ SuperNeuronActivateCoverage, KMultisectionNeuronCoverage from .sensitivity_convergence_coverage import SensitivityConvergenceCoverage @@ -27,4 +27,5 @@ __all__ = ['Fuzzer', 'NeuronBoundsCoverage', 'SuperNeuronActivateCoverage', 'KMultisectionNeuronCoverage', - 'SensitivityConvergenceCoverage'] + 'SensitivityConvergenceCoverage', + 'SensitivityMaximizingFuzzer'] diff --git a/mindarmour/fuzz_testing/fuzzing.py b/mindarmour/fuzz_testing/fuzzing.py index d6847f5..5c03d58 100644 --- a/mindarmour/fuzz_testing/fuzzing.py +++ b/mindarmour/fuzz_testing/fuzzing.py @@ -512,7 +512,9 @@ class SensitivityMaximizingFuzzer(Fuzzer): Fuzzing test framework for deep neural networks. Reference: `https://huangd1999.github.io/Themis__Sensitivity\ - _Testing_for_Deep_Learning_System.pdf` + _Testing_for_Deep_Learning_System.pdf\ + ` Args: target_model (Model): Target fuzz model. @@ -522,8 +524,8 @@ class SensitivityMaximizingFuzzer(Fuzzer): >>> from mindspore.ops import operations as P >>> from mindspore.train import Model >>> from mindspore.ops import TensorSummary - >>> from mindarmour.fuzz_testing import Fuzzer - >>> from mindarmour.fuzz_testing import KMultisectionNeuronCoverage + >>> from mindarmour.fuzz_testing import Fuzzer, SensitivityMaximizingFuzzer + >>> from mindarmour.fuzz_testing import SensitivityConvergenceCoverage >>> class Net(nn.Cell): ... def __init__(self): ... super(Net, self).__init__() @@ -573,7 +575,7 @@ class SensitivityMaximizingFuzzer(Fuzzer): ... 'params': {'angle': [20, 90], 'auto_param': [False, True]}}, ... {'method': 'FGSM', ... 'params': {'eps': [0.3, 0.2, 0.4], 'alpha': [0.1], 'bounds': [(0, 1)]}}] - >>> batch_size = 8 + >>> batch_size = 32 >>> num_classe = 10 >>> train_images = np.random.rand(32, 1, 32, 32).astype(np.float32) >>> test_images = np.random.rand(batch_size, 1, 32, 32).astype(np.float32) @@ -583,9 +585,9 @@ class SensitivityMaximizingFuzzer(Fuzzer): >>> # make initial seeds >>> for img, label in zip(test_images, test_labels): ... initial_seeds.append([img, label]) - >>> initial_seeds = initial_seeds[:10] + >>> initial_seeds = initial_seeds[:batch_size] >>> SCC = SensitivityConvergenceCoverage(model,batch_size = batch_size) - >>> model_fuzz_test = Fuzzer(model) + >>> model_fuzz_test = SensitivityMaximizingFuzzer(model) >>> samples, gt_labels, preds, strategies, metrics = model_fuzz_test.fuzzing(mutate_config, initial_seeds, ... SCC, max_iters=100) """ @@ -595,7 +597,7 @@ class SensitivityMaximizingFuzzer(Fuzzer): self._target_model = target_model def fuzzing(self, mutate_config, initial_seeds, - coverage, evaluate=True, max_iters=10, mutate_num_per_seed=20): + coverage, evaluate=True, max_iters=1000, mutate_num_per_seed=20): """ Fuzzing tests for deep neural networks. @@ -629,9 +631,9 @@ class SensitivityMaximizingFuzzer(Fuzzer): initial_seeds (list[list]): Initial seeds used to generate mutated samples. The format of initial seeds is [[image_data, label], [...], ...] and the label must be one-hot. coverage (CoverageMetrics): Class of neuron coverage metrics. - evaluate (bool): return evaluate report or not. Default: True. - max_iters (int): Max number of select a seed to mutate. Default: 10000. - mutate_num_per_seed (int): The number of mutate times for a seed. Default: 20. + evaluate (bool): return evaluate report or not. Default: ``True``. + max_iters (int): Max number of select a seed to mutate. Default: ``1000``. + mutate_num_per_seed (int): The number of mutate times for a seed. Default: ``20``. Returns: @@ -778,5 +780,5 @@ class SensitivityMaximizingFuzzer(Fuzzer): attack_success_rate = None metrics_report['Accuracy'] = acc metrics_report['Attack_success_rate'] = attack_success_rate - metrics_report['Coverage_metrics'] = coverage.get_metrics(np.concatenate((fuzz_samples), axis=0)) + metrics_report['Coverage_metrics'] = coverage.get_metrics(fuzz_samples) return metrics_report diff --git a/mindarmour/fuzz_testing/scc_readme.md b/mindarmour/fuzz_testing/scc_readme.md index b18b467..1231d97 100644 --- a/mindarmour/fuzz_testing/scc_readme.md +++ b/mindarmour/fuzz_testing/scc_readme.md @@ -76,7 +76,17 @@ test_labels = np.concatenate(test_labels, axis=0) 2. Coverage参数设置。 -Sensitivity Convergence Coverage包含两个user-specified参数:threshold t和敏感神经元数量 selected_neurons_num。 +Sensitivity Convergence Coverage包含Model, threshold, batch_size, selected_neurons_num和n_iter。 + +Model:用户指定需要测试的模型。 + +threshold:神经元覆盖阈值,当覆盖率大于threshold时神经元覆盖率测试完成。 + +batch_size:测试过程中同时利用batch_size数量大小的输入计算Coverage。 + +selected_neurons_num:测试神经元数量,数量越大,测试越准确,但时间开销也越大。 + +n_iter:最大测试次数,避免模型长时间测试。 以下是Coverage参数配置例子: diff --git a/mindarmour/fuzz_testing/sensitivity_convergence_coverage.py b/mindarmour/fuzz_testing/sensitivity_convergence_coverage.py index a441a2d..a7194c4 100644 --- a/mindarmour/fuzz_testing/sensitivity_convergence_coverage.py +++ b/mindarmour/fuzz_testing/sensitivity_convergence_coverage.py @@ -15,13 +15,16 @@ Source code of SensitivityConvergenceCoverage class. """ import numpy as np +from tqdm import tqdm +from mindspore import Tensor from mindspore.train.summary.summary_record import _get_summary_tensor_data from mindarmour.fuzz_testing import CoverageMetrics from mindarmour.utils._check_param import check_numpy_param from mindarmour.utils.logger import LogUtil + LOGGER = LogUtil.get_instance() TAG = 'CoverageMetrics' @@ -34,10 +37,12 @@ class SensitivityConvergenceCoverage(CoverageMetrics): Args: model (Model): Model to be evaluated. - threshold (float): Threshold of sensitivity convergence coverage. Default: 0.001. - incremental (bool): Whether to use incremental mode. Default: False. - batch_size (int): Batch size. Default: 32. - selected_neurons_num (int): Number of neurons selected for sensitivity convergence coverage. Default: 10. + threshold (float): Threshold of sensitivity convergence coverage. Default: ``0.5``. + incremental (bool): Whether to use incremental mode. Default: ``False``. + batch_size (int): Batch size. Default: ``32``. + selected_neurons_num (int): Number of neurons selected for sensitivity convergence coverage. Default: ``100``. + n_iter (int): Number of iterations. Default: ``1000``. + ''' def __init__(self, model, threshold=0.5, incremental=False, batch_size=32, selected_neurons_num=100, n_iter=1000): @@ -50,25 +55,6 @@ class SensitivityConvergenceCoverage(CoverageMetrics): self.sensitive_neuron_idx = {} self.initial_samples = [] - def get_sensitive_neruon_idx(self, dataset): - ''' - Args: - dataset (numpy.ndarray): Dataset for evaluation. - - Returns: - sensitive_neuron_idx(dict): The index of sensitive neurons. - ''' - - inputs = check_numpy_param('dataset', dataset) - self._model.predict(Tensor(inputs)) - layer_out = _get_summary_tensor_data() - for layer, tensor in layer_out.items(): - tensor = tensor.asnumpy().reshape(tensor.shape[0], -1) - clean, benign = tensor[:tensor.shape[0] // 2], tensor[tensor.shape[0] // 2:] - sensitivity = abs(clean-benign) - self.sensitive_neuron_idx[layer] = np.argsort(np.sum(sensitivity, - axis=0))[-min(self.selected_neurons_num, len(Helper)):] - def get_metrics(self, dataset): ''' Args: @@ -82,7 +68,7 @@ class SensitivityConvergenceCoverage(CoverageMetrics): >>> from mindspore.ops import operations as P >>> from mindspore.train import Model >>> from mindspore.ops import TensorSummary - >>> from mindarmour.fuzz_testing import NeuronCoverage + >>> from mindarmour.fuzz_testing import SensitivityConvergenceCoverage >>> class Net(nn.Cell): ... def __init__(self): ... super(Net, self).__init__() @@ -114,22 +100,27 @@ class SensitivityConvergenceCoverage(CoverageMetrics): ... x = self.fc3(x) ... self.summary('fc3', x) ... return x - >>> net = Net() - >>> model = Model(net) >>> batch_size = 32 >>> num_classe = 10 + >>> train_images = np.random.rand(32, 1, 32, 32).astype(np.float32) >>> test_images = np.random.rand(batch_size, 1, 32, 32).astype(np.float32) - >>> test_images_adv = mutation(test_images) - >>> test_set = np.concatenate((test_images, test_images_adv), axis=0) - >>> SCC = SensitivityConvergenceCoverage(model,batch_size = batch_size*2) - >>> SCC_metric = SCC.get_metrics(test_set) + >>> test_labels = np.random.randint(num_classe, size=batch_size).astype(np.int32) + >>> test_labels = (np.eye(num_classe)[test_labels]).astype(np.float32) + >>> initial_seeds = [] + >>> # make initial seeds + >>> for img, label in zip(test_images, test_labels): + ... initial_seeds.append([img, label]) + >>> initial_seeds = initial_seeds[:batch_size] + >>> SCC = SensitivityConvergenceCoverage(model,batch_size = batch_size) + >>> metrics = SCC.get_metrics(test_images) ''' inputs = check_numpy_param('dataset', dataset) if not self.sensitive_neuron_idx: - self.get_sensitive_neruon_idx(dataset) + self._get_sensitive_neruon_idx(dataset) self._model.predict(Tensor(inputs)) layer_out = _get_summary_tensor_data() - for layer, tensor in layer_out.items(): + + for layer, tensor in tqdm(layer_out.items()): tensor = tensor.asnumpy().reshape(tensor.shape[0], -1) clean, benign = tensor[:tensor.shape[0] // 2], tensor[tensor.shape[0] // 2:] sensitivity = abs(clean-benign) @@ -137,13 +128,30 @@ class SensitivityConvergenceCoverage(CoverageMetrics): sensitivity = sensitivity[:, self.sensitive_neuron_idx[layer]] except KeyError: raise RuntimeError('The layer {} is not in the sensitive_neuron_idx'.format(layer)) - converged, size = self.scc(sensitivity, sensitivity.shape[1], self.threshold) + converged, size = self._scc(sensitivity, sensitivity.shape[1], self.threshold) self.total_converged += converged self.total_size += size scc_value = self.total_converged/self.total_size return scc_value - def scc(self, sensitivity_list, size, threshold=0): + def _get_sensitive_neruon_idx(self, dataset): + ''' + Args: + dataset (numpy.ndarray): Dataset for evaluation. + ''' + + inputs = check_numpy_param('dataset', dataset) + self._model.predict(Tensor(inputs)) + layer_out = _get_summary_tensor_data() + for layer, tensor in layer_out.items(): + tensor = tensor.asnumpy().reshape(tensor.shape[0], -1) + clean, benign = tensor[:tensor.shape[0] // 2], tensor[tensor.shape[0] // 2:] + sensitivity = abs(clean-benign) + self.sensitive_neuron_idx[layer] = np.argsort(np.sum(sensitivity, + axis=0))[-min(self.selected_neurons_num,\ + len(np.sum(sensitivity, axis=0))):] + + def _scc(self, sensitivity_list, size, threshold=0): ''' Args: sensitivity_list(numpy.ndarray): The sensitivity of each neuron. @@ -151,35 +159,35 @@ class SensitivityConvergenceCoverage(CoverageMetrics): threshold(float): The threshold of sensitivity convergence coverage. Returns: - converged(int): The number of neurons that have converged to the threshold. - size(int): The number of neurons. + int, The number of neurons that have converged to the threshold. + int, The number of neurons. ''' converged = 0 for i in range(sensitivity_list.shape[1]): - _, acceptance_rate = self.build_mh_chain(sensitivity_list[:, i], - np.mean(sensitivity_list[:, i]), self.n_iter, self.log_prob) + _, acceptance_rate = self._build_mh_chain(sensitivity_list[:, i], + np.mean(sensitivity_list[:, i]), self.n_iter, self._log_prob) if acceptance_rate > threshold: converged += 1 return converged, size - def proposal(self, x, stepsize): + def _proposal(self, x, stepsize): ''' Args: x(numpy.ndarray): The input of the proposal function. stepsize(float): The stepsize of the proposal function. Returns: - x(numpy.ndarray): The output of the proposal function. + numpy.ndarray, The output of the proposal function. ''' return np.random.uniform(low=x - 0.5 * stepsize, high=x + 0.5 * stepsize, size=x.shape) - def p_acc_mh(self, x_new, x_old, log_prob): + def _p_acc_mh(self, x_new, x_old, log_prob): ''' Args: x_new(numpy.ndarray): The new state. @@ -187,21 +195,21 @@ class SensitivityConvergenceCoverage(CoverageMetrics): log_prob(function): The log probability function. Returns: - float: The acceptance probability. + float, The acceptance probability. ''' return min(1, np.exp(log_prob(x_new) - log_prob(x_old))) - def log_prob(self, x): + def _log_prob(self, x): ''' Args: x(numpy.ndarray): The input of the log probability function. Returns: - float: The output of the log probability function. + float, The output of the log probability function. ''' return -0.5 * np.sum(x ** 2) - def sample_mh(self, x_old, log_prob, stepsize): + def _sample_mh(self, x_old, log_prob, stepsize): ''' here we determine whether we accept the new state or not: we draw a random number uniformly from [0,1] and compare @@ -212,16 +220,16 @@ class SensitivityConvergenceCoverage(CoverageMetrics): stepsize(float): The stepsize of the proposal function. Returns: - accept(bool): Whether to accept the new state. - x_new(numpy.ndarray): The new state. + bool, Whether to accept the new state. + numpy.ndarray, if bool=True: return new state, else: return old state. ''' - x_new = self.proposal(x_old, stepsize) - accept = np.random.random() < self.p_acc_mh(x_new, x_old, log_prob) + x_new = self._proposal(x_old, stepsize) + accept = np.random.random() < self._p_acc_mh(x_new, x_old, log_prob) if accept: return accept, x_new return accept, x_old - def build_mh_chain(self, init, stepsize, n_total, log_prob): + def _build_mh_chain(self, init, stepsize, n_total, log_prob): ''' Args: init(numpy.ndarray): The initial state. @@ -230,14 +238,14 @@ class SensitivityConvergenceCoverage(CoverageMetrics): log_prob(function): The log probability function. Returns: - chain(list): The chain of samples. - acceptance_rate(float): The acceptance rate of the chain. + list, The chain of samples. + float, The acceptance rate of the chain. ''' n_accepted = 0 chain = [init] for _ in range(n_total): - accept, state = self.sample_mh(chain[-1], log_prob, stepsize) + accept, state = self._sample_mh(chain[-1], log_prob, stepsize) chain.append(state) n_accepted += accept acceptance_rate = n_accepted / float(n_total)