mirror of
https://gitee.com/openeuler/A-Tune.git
synced 2025-12-06 08:08:59 +08:00
anomaly_detection
This commit is contained in:
@@ -936,7 +936,7 @@ example.conf 可以参考如下方式书写(以下各优化项非必填,仅
|
||||
|
||||
**命令格式**
|
||||
|
||||
**atune-adm collection** <OPTINOS*>*
|
||||
**atune-adm collection** <OPTIONS*>*
|
||||
|
||||
**参数说明**
|
||||
|
||||
@@ -966,7 +966,7 @@ example.conf 可以参考如下方式书写(以下各优化项非必填,仅
|
||||
|
||||
**命令格式**
|
||||
|
||||
**atune-adm train** <OPTINOS*>*
|
||||
**atune-adm train** <OPTIONS*>*
|
||||
|
||||
**参数说明**
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ from analysis.app import App
|
||||
from analysis.ui.config import UiConfig
|
||||
from analysis.ui.database import ui_tuning, ui_analysis, ui_user
|
||||
from analysis.ui import offline
|
||||
|
||||
from analysis.ui import echo
|
||||
|
||||
class AppUI(App):
|
||||
"""app ui"""
|
||||
@@ -34,6 +34,7 @@ class AppUI(App):
|
||||
self.api.add_resource(ui_analysis.UiAnalysis, '/v1/UI/analysis/<string:cmd>')
|
||||
self.api.add_resource(ui_user.UiUser, '/v1/UI/user/<string:cmd>')
|
||||
self.api.add_resource(offline.OfflineTunning, '/v2/UI/offline/<string:cmd>')
|
||||
self.api.add_resource(echo.EchoTunning, '/v2/UI/echo')
|
||||
|
||||
|
||||
def main(filename):
|
||||
|
||||
66
analysis/ui/echo.py
Normal file
66
analysis/ui/echo.py
Normal file
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2020 Huawei Technologies Co., Ltd.
|
||||
# A-Tune is licensed under the Mulan PSL v2.
|
||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
||||
# PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
# Create: 2022-9-20
|
||||
|
||||
"""
|
||||
Routers for /v2/UI/echo url
|
||||
"""
|
||||
|
||||
from cmath import pi
|
||||
import logging
|
||||
import json
|
||||
from flask import abort, request
|
||||
from flask_restful import Resource
|
||||
from socket import *
|
||||
|
||||
from analysis.atuned.utils.npipe import NPipe, get_npipe
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
CORS = [('Access-Control-Allow-Origin', '*')]
|
||||
|
||||
|
||||
class EchoTunning(Resource):
|
||||
"""restful api for web ui echo"""
|
||||
|
||||
def post(self):
|
||||
"""restful apu get"""
|
||||
tuning_command = 'atune-adm '
|
||||
if request.json == None:
|
||||
return abort(404, 'does not get body')
|
||||
tuning_command = tuning_command + request.json['command']
|
||||
print(request.json['options'])
|
||||
for option in request.json['options']:
|
||||
tuning_command = tuning_command + ' ' + \
|
||||
option['option'] + ' '+option['value']
|
||||
tuning_command = tuning_command + ' ' + request.json['yaml']
|
||||
pipe = NPipe()
|
||||
res=pipe.open()
|
||||
pipe.write(tuning_command)
|
||||
pipe.close()
|
||||
return res, 200, CORS
|
||||
|
||||
def get(self):
|
||||
IP = ''
|
||||
PORT = 5002
|
||||
BUFFER_SIZE = 512
|
||||
listenSocket = socket(AF_INET, SOCK_STREAM)
|
||||
listenSocket.bind((IP, PORT))
|
||||
listenSocket.listen(8)
|
||||
dataSocket, addr = listenSocket.accept()
|
||||
n_pipe = get_npipe(args.get("session_id"))
|
||||
while True:
|
||||
echo = n_pipe.get()
|
||||
if echo:
|
||||
break
|
||||
dataSocket.send(echo)
|
||||
dataSocket.close()
|
||||
listenSocket.close()
|
||||
95
anomaly_detection/data_loader.py
Normal file
95
anomaly_detection/data_loader.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# Copyright (c) 2022 Huawei Technologies Co., Ltd.
|
||||
# A-Tune is licensed under the Mulan PSL v2.
|
||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
||||
# PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
|
||||
# #############################################
|
||||
# @Author : zhaoyanjun
|
||||
# @Contact : zhao-yanjun@qq.com
|
||||
# @Date : 2022/10/10
|
||||
# @License : Mulan PSL v2
|
||||
# #############################################
|
||||
|
||||
import torch
|
||||
import os
|
||||
import random
|
||||
from torch.utils.data import Dataset
|
||||
from torch.utils.data import DataLoader
|
||||
import numpy as np
|
||||
import collections
|
||||
import numbers
|
||||
import math
|
||||
import pandas as pd
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
import pickle
|
||||
|
||||
class DataSegLoader(object):
|
||||
'''load data and get data segment'''
|
||||
def __init__(self, train_path, test_path, label_path, data_name, win_size, step, mode = "train"):
|
||||
self.mode = mode
|
||||
self.step = step
|
||||
self.win_size = win_size
|
||||
self.scaler = StandardScaler()
|
||||
|
||||
train_data = pd.read_csv(train_path + str(data_name))
|
||||
test_data = pd.read_csv(test_path + str(data_name))
|
||||
train_data = train_data.values
|
||||
test_data = test_data.values
|
||||
|
||||
train_data = self.scaler.fit_transform(train_data)
|
||||
test_data = self.scaler.fit_transform(test_data)
|
||||
|
||||
self.train = train_data
|
||||
self.test = test_data
|
||||
data_len = len(self.train)
|
||||
self.val = self.test
|
||||
label = pd.read_csv(label_path + str(data_name))
|
||||
labels = label['label'].values
|
||||
self.test_labels = labels
|
||||
|
||||
def __len__(self):
|
||||
'''get data length'''
|
||||
|
||||
if self.mode == "train":
|
||||
return (self.train.shape[0] - self.win_size) // self.step + 1
|
||||
elif (self.mode == 'thre'):
|
||||
return (self.val.shape[0] - self.win_size) // self.win_size + 1
|
||||
else:
|
||||
return (self.test.shape[0] - self.win_size) // self.step + 1
|
||||
|
||||
def __getitem__(self, index):
|
||||
'''get data segment'''
|
||||
|
||||
index = index * self.step
|
||||
if self.mode == "train":
|
||||
return np.float32(self.train[index:index + self.win_size])
|
||||
elif (self.mode == 'val'):
|
||||
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
|
||||
elif (self.mode == 'test'):
|
||||
return np.float32(self.test[index:index + self.win_size]), np.float32(
|
||||
self.test_labels[index:index + self.win_size])
|
||||
elif (self.mode == 'thre'):
|
||||
segment_index = index // self.step * self.win_size
|
||||
segment_end = segment_index + self.win_size
|
||||
return np.float32(self.test[segment_index:segment_end]), np.float32(self.test_labels[segment_index:segment_end])
|
||||
return None
|
||||
|
||||
def get_loader_segment(train_path, test_path, label_path, batch_size, file_name = None, win_size = 100, step = 1, mode = 'train'):
|
||||
'''load data and return data segment'''
|
||||
dataset = DataSegLoader(train_path, test_path, label_path, file_name, win_size, 1, mode)
|
||||
|
||||
if mode == 'train':
|
||||
shuffle = True
|
||||
else:
|
||||
shuffle = False
|
||||
|
||||
data_loader = DataLoader(dataset = dataset,
|
||||
batch_size = batch_size,
|
||||
shuffle = shuffle,
|
||||
num_workers = 0)
|
||||
return data_loader
|
||||
299
anomaly_detection/detection.py
Normal file
299
anomaly_detection/detection.py
Normal file
@@ -0,0 +1,299 @@
|
||||
# Copyright (c) 2022 Huawei Technologies Co., Ltd.
|
||||
# A-Tune is licensed under the Mulan PSL v2.
|
||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
||||
# PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
|
||||
# #############################################
|
||||
# @Author : zhaoyanjun
|
||||
# @Contact : zhao-yanjun@qq.com
|
||||
# @Date : 2022/10/10
|
||||
# @License : Mulan PSL v2
|
||||
# #############################################
|
||||
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.utils.data import DataLoader
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import time
|
||||
from Anomaly_Transformer.model.AnomalyTransformer import AnomalyTransformer
|
||||
from data_loader import get_loader_segment
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
def my_kl_loss(p, q, loss_bias):
|
||||
'''calculate my_kl_loss with loss_bias'''
|
||||
res = p * (torch.log(p + loss_bias) - torch.log(q + loss_bias))
|
||||
return torch.mean(torch.sum(res, dim = -1), dim = 1)
|
||||
|
||||
|
||||
def adjust_learning_rate(optimizer, epoch, lr_):
|
||||
'''adjust learning_rate during the training'''
|
||||
lr_adjust = {epoch: lr_ * (0.5 ** ((epoch - 1) // 1))}
|
||||
if epoch in lr_adjust.keys():
|
||||
lr = lr_adjust[epoch]
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
print('Updating learning rate to {}'.format(lr))
|
||||
|
||||
|
||||
class EarlyStopping:
|
||||
'''stop the training earlier according to the training result'''
|
||||
def __init__(self, patience, model_save_file, verbose = False, dataset_name=''):
|
||||
self.patience = patience
|
||||
self.verbose = verbose
|
||||
self.counter = 0
|
||||
self.best_score = None
|
||||
self.best_score2 = None
|
||||
self.early_stop = False
|
||||
self.val_loss_min = np.Inf
|
||||
self.val_loss2_min = np.Inf
|
||||
self.dataset = dataset_name
|
||||
self.model_save_file = model_save_file
|
||||
self.model_check = True
|
||||
|
||||
def __call__(self, val_loss, val_loss2, model, path):
|
||||
'''check the EarlyStopping conditions during the training'''
|
||||
score = -val_loss
|
||||
score2 = -val_loss2
|
||||
if self.best_score is None:
|
||||
self.best_score = score
|
||||
self.best_score2 = score2
|
||||
self.save_checkpoint(val_loss, val_loss2, model, path)
|
||||
elif score < self.best_score or score2 < self.best_score2 :
|
||||
self.counter += 1
|
||||
if not self.counter < self.patience:
|
||||
self.early_stop = True
|
||||
else:
|
||||
self.best_score = score
|
||||
self.best_score2 = score2
|
||||
self.save_checkpoint(val_loss, val_loss2, model, path)
|
||||
self.counter = 0
|
||||
|
||||
def save_checkpoint(self, val_loss, val_loss2, model, path):
|
||||
'''save the model into checkpoint'''
|
||||
model_checkpoint_path = os.path.join(path, str(self.dataset) + self.model_save_file)
|
||||
if self.verbose:
|
||||
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
|
||||
if self.model_check :
|
||||
torch.save(model.state_dict(), model_checkpoint_path)
|
||||
self.val_loss_min = val_loss
|
||||
self.val_loss2_min = val_loss2
|
||||
|
||||
|
||||
class Detection(object):
|
||||
'''anomaly detection'''
|
||||
DEFAULTS = {}
|
||||
|
||||
def __init__(self, config):
|
||||
|
||||
self.__dict__.update(Detection.DEFAULTS, **config)
|
||||
self.build_model()
|
||||
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
self.criterion = nn.MSELoss()
|
||||
|
||||
def build_model(self):
|
||||
self.model = AnomalyTransformer(win_size = self.win_size, enc_in = self.input_c, c_out = self.output_c, e_layers = 3)
|
||||
self.optimizer = torch.optim.Adam(self.model.parameters(), lr = self.lr)
|
||||
|
||||
if torch.cuda.is_available():
|
||||
self.model.cuda()
|
||||
|
||||
def detect(self, mode):
|
||||
'''train the model with mutiple datasets and share the model'''
|
||||
if mode == "train":
|
||||
dataset_list = os.listdir(self.train_path)
|
||||
elif mode == "test":
|
||||
dataset_list = os.listdir(self.test_path)
|
||||
for dataset in dataset_list:
|
||||
print(str(dataset))
|
||||
self.train_loader = get_loader_segment(self.train_path, self.test_path, self.label_path, file_name = str(dataset),
|
||||
batch_size = self.batch_size, win_size = self.win_size, mode = 'train')
|
||||
self.vali_loader = get_loader_segment(self.train_path, self.test_path, self.label_path, file_name=str(dataset),
|
||||
batch_size = self.batch_size, win_size = self.win_size, mode = 'val')
|
||||
self.test_loader = get_loader_segment(self.train_path, self.test_path, self.label_path, file_name=str(dataset),
|
||||
batch_size = self.batch_size, win_size = self.win_size, mode = 'test')
|
||||
self.thre_loader = get_loader_segment(self.train_path, self.test_path, self.label_path, file_name = str(dataset),
|
||||
batch_size = self.batch_size, win_size = self.win_size, mode = 'thre')
|
||||
if mode == "train":
|
||||
self.train()
|
||||
elif mode == "test":
|
||||
accuracy, delay_time, thresh = self.test()
|
||||
performance.append([accuracy, delay_time, thresh])
|
||||
if mode == "test":
|
||||
mean_accuracy=np.mean([i[0] for i in performance])
|
||||
mean_delay_time=np.mean([i[1] for i in performance])
|
||||
mean_thresh=np.mean([i[2] for i in performance])
|
||||
print('Final mean performance of 40 datasets:')
|
||||
print("Mean_thresh_score is {:0.5f}".format(mean_thresh))
|
||||
print("{:0.2%} of Anomaly status is detected!".format(mean_accuracy))
|
||||
|
||||
def vali(self, vali_loader):
|
||||
'''validate the model'''
|
||||
self.model.eval()
|
||||
|
||||
loss_a = []
|
||||
loss_b = []
|
||||
for i, (input_data, _) in enumerate(vali_loader):
|
||||
series_loss, prior_loss, rec_loss = cal_loss(self, input_data)
|
||||
|
||||
loss_a.append((rec_loss - self.k * series_loss).item())
|
||||
loss_b.append((rec_loss + self.k * prior_loss).item())
|
||||
|
||||
return np.average(loss_a), np.average(loss_b)
|
||||
|
||||
|
||||
def train(self):
|
||||
'''train the model with single dataset'''
|
||||
time_now = time.time()
|
||||
path = self.model_save_path
|
||||
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
early_stopping = EarlyStopping(patience = self.patience, verbose = True, dataset_name = self.dataset, model_save_file = self.model_save_file)
|
||||
train_steps = len(self.train_loader)
|
||||
|
||||
#share model for 40datasets of AIops
|
||||
model_path = os.path.join(str(path), str(self.dataset) + self.model_save_file)
|
||||
if os.path.exists(model_path):
|
||||
self.model.load_state_dict(torch.load(model_path))
|
||||
|
||||
for epoch in range(self.num_epochs):
|
||||
train_loss_list = []
|
||||
|
||||
epoch_time = time.time()
|
||||
|
||||
self.model.train()
|
||||
for i, input_data in enumerate(self.train_loader):
|
||||
|
||||
self.optimizer.zero_grad()
|
||||
series_loss, prior_loss, rec_loss = cal_loss(self, input_data)
|
||||
|
||||
train_loss_list.append((rec_loss - self.k * series_loss).item())
|
||||
|
||||
# Minimax strategy
|
||||
loss1 = rec_loss - self.k * series_loss
|
||||
loss1.backward(retain_graph=True)
|
||||
self.optimizer.step()
|
||||
|
||||
loss2 = rec_loss + self.k * prior_loss
|
||||
loss2.backward()
|
||||
self.optimizer.step()
|
||||
|
||||
train_loss = np.mean(train_loss_list)
|
||||
validate_loss1, validate_loss2 = self.vali(self.test_loader)
|
||||
|
||||
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} ".format(
|
||||
epoch + 1, train_steps, train_loss, validate_loss1))
|
||||
print("cost time: {}".format(time.time() - epoch_time))
|
||||
|
||||
early_stopping(validate_loss1, validate_loss2, self.model, path)
|
||||
|
||||
if early_stopping.early_stop:
|
||||
print("Early stopping")
|
||||
break
|
||||
|
||||
adjust_learning_rate(self.optimizer, epoch + 1, self.lr)
|
||||
|
||||
def cal_loss(self, input_data):
|
||||
# calculate Association discrepancy
|
||||
input = input_data.float()
|
||||
output, series, prior, _ = self.model(input.to(self.device))
|
||||
|
||||
for u,item in enumerate(prior):
|
||||
|
||||
prior_p = prior[u] / torch.unsqueeze(torch.sum(prior[u], dim = -1), dim = -1).repeat(1, 1, 1, self.win_size)
|
||||
series_p = (prior[u] / torch.unsqueeze(torch.sum(prior[u], dim = -1), dim = -1).repeat(1, 1, 1, self.win_size)).detach()
|
||||
|
||||
if u ==0:
|
||||
prior_total = (torch.mean(my_kl_loss(prior_p, series[u].detach(), self.loss_bias))
|
||||
+ torch.mean(my_kl_loss(series[u].detach(), prior_p, self.loss_bias)))
|
||||
series_total = (torch.mean(my_kl_loss(series[u], series_p, self.loss_bias))
|
||||
+ torch.mean(my_kl_loss(series_p, series[u], self.loss_bias)))
|
||||
else:
|
||||
prior_total += (torch.mean(my_kl_loss(prior_p, series[u].detach(), self.loss_bias))
|
||||
+ torch.mean(my_kl_loss(series[u].detach(), prior_p, self.loss_bias)))
|
||||
series_total += (torch.mean(my_kl_loss(series[u], series_p, self.loss_bias))
|
||||
+ torch.mean(my_kl_loss(series_p, series[u], self.loss_bias)))
|
||||
|
||||
series_loss = series_total / len(prior)
|
||||
prior_loss = prior_total / len(prior)
|
||||
|
||||
rec_loss = self.criterion(output, input)
|
||||
return series_loss, prior_loss, rec_loss
|
||||
|
||||
def test(self):
|
||||
'''test the model with single dataset'''
|
||||
model_path = os.path.join(str(self.model_save_path), str(self.dataset) + self.model_save_file)
|
||||
if not os.path.exists(model_path):
|
||||
print("model save error!")
|
||||
self.model.load_state_dict(torch.load(model_path))
|
||||
self.model.eval()
|
||||
loss_magnification = 50
|
||||
|
||||
criterion = nn.MSELoss(reduce=False)
|
||||
|
||||
#find the threshold, evaluation
|
||||
test_labels = []
|
||||
attens_score = []
|
||||
for i, (input_data, labels) in enumerate(self.thre_loader):
|
||||
input = input_data.float().to(self.device)
|
||||
output, series, prior, _ = self.model(input)
|
||||
|
||||
for u,item in enumerate(prior):
|
||||
series_p = (prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)).detach()
|
||||
prior_p = prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)
|
||||
|
||||
if u == 0:
|
||||
series_loss = my_kl_loss(series[u], series_p, self.loss_bias) * loss_magnification
|
||||
prior_loss = my_kl_loss(prior_p, series[u].detach(), self.loss_bias) * loss_magnification
|
||||
elif u > 0:
|
||||
series_loss += my_kl_loss(series[u], series_p, self.loss_bias) * loss_magnification
|
||||
prior_loss += my_kl_loss(prior_p, series[u].detach(), self.loss_bias) * loss_magnification
|
||||
|
||||
cri = torch.softmax((-series_loss - prior_loss), dim=-1) * torch.mean(criterion(input, output), dim=-1)
|
||||
cri = cri.detach().cpu().numpy()
|
||||
attens_score.append(cri)
|
||||
test_labels.append(labels)
|
||||
|
||||
attens_score = np.concatenate(attens_score, axis=0).reshape(-1)
|
||||
test_score = np.array(attens_score)
|
||||
total_score = np.concatenate([train_score, test_score], axis = 0)
|
||||
thresh = np.percentile(total_score, 100 - self.anormly_ratio)
|
||||
print("Threshold :", thresh)
|
||||
|
||||
test_labels = np.concatenate(test_labels, axis=0).reshape(-1)
|
||||
test_labels = np.array(test_labels)
|
||||
|
||||
pred = (test_energy > thresh).astype(int)
|
||||
gt = test_labels.astype(int)
|
||||
|
||||
#detection adjustment
|
||||
#define delay_time=time1(when anomaly status was dectected) - time2(true time when anomaly status happened)
|
||||
delay_times = []
|
||||
|
||||
for i in range(len(gt)-10):
|
||||
for j in range(11):
|
||||
if gt[i] == 1 and pred[i+j] == 1:
|
||||
pred[i] = 1
|
||||
delay_times.append(j)
|
||||
|
||||
if len(delay_times) ==0:
|
||||
delay_time=0
|
||||
else:
|
||||
delay_time = np.mean(delay_times)
|
||||
|
||||
pred = np.array(pred)
|
||||
gt = np.array(gt)
|
||||
|
||||
accuracy = accuracy_score(gt, pred)
|
||||
print("Accuracy : {:0.4f}".format(accuracy))
|
||||
|
||||
return accuracy, delay_time, thresh
|
||||
19
examples/Anomaly_Detection/README.md
Normal file
19
examples/Anomaly_Detection/README.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Anomaly-Detection
|
||||
|
||||
## Project Information
|
||||
|
||||
【Project Full Name】:Operating system-oriented intelligent anomaly detection scheme
|
||||
|
||||
【Background】:An operating system is loaded with various application software. An operating system failure may lead to serious accidents. Therefore, it is of great significance to detect potential anomalies in the operating system quickly and accurately. Based performance indicators of the operating system in real time, such as CPU usage, memory usage, and network delay, and uses intelligent detection algorithms to accurately analyze operating system exceptions,this project build a complete testing process and design efficient and intelligent anomaly detection algorithms.
|
||||
|
||||
## Get Started
|
||||
|
||||
1. Recommend to use PyTorch 1.4.0.
|
||||
2. Download [data](https://pan.baidu.com/s/1FAWf4xllmTiQe13MxxxlHA?pwd=5mmm) in `A-Tune/examples/Anomaly_Detection`.
|
||||
3. unzip data
|
||||
4. Process data,Train and Test.
|
||||
```bash
|
||||
bash ./start.sh
|
||||
```
|
||||
|
||||
|
||||
38
examples/Anomaly_Detection/config.yaml
Normal file
38
examples/Anomaly_Detection/config.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
data_process:
|
||||
col_timestamp: timestamp
|
||||
col_value: value
|
||||
col_service: cmdb_id
|
||||
col_label: label
|
||||
col_level: level
|
||||
col_node: node
|
||||
|
||||
raw_train_path: ./data/raw_train
|
||||
raw_test_path: ./data/raw_test
|
||||
raw_label_path: ./data/raw_label
|
||||
|
||||
train_path: ./data/train
|
||||
test_path: ./data/test
|
||||
label_path: ./data/label
|
||||
|
||||
label_start_time: 1651334400
|
||||
save_type: .csv
|
||||
|
||||
detection:
|
||||
lr: 0.0001
|
||||
num_epochs: 10
|
||||
k: 3
|
||||
win_size: 20
|
||||
input_c: 56
|
||||
output_c: 56
|
||||
batch_size: 64
|
||||
pretrained_model: 20
|
||||
dataset: multi_datasets
|
||||
mode: train
|
||||
train_path: ./data/train/
|
||||
test_path: ./data/test/
|
||||
label_path: ./data/label/
|
||||
model_save_path: ./checkpoints
|
||||
model_save_file: _checkpoint.pth
|
||||
anormly_ratio: 0.7
|
||||
loss_bias: 0.0001
|
||||
patience: 3
|
||||
170
examples/Anomaly_Detection/data_process.py
Normal file
170
examples/Anomaly_Detection/data_process.py
Normal file
@@ -0,0 +1,170 @@
|
||||
# Copyright (c) 2022 Huawei Technologies Co., Ltd.
|
||||
# A-Tune is licensed under the Mulan PSL v2.
|
||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
||||
# PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
|
||||
# #############################################
|
||||
# @Author : zhaoyanjun
|
||||
# @Contact : zhao-yanjun@qq.com
|
||||
# @Date : 2022/10/10
|
||||
# @License : Mulan PSL v2
|
||||
# #############################################
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import json
|
||||
|
||||
class Processor(object):
|
||||
'''preprocess data'''
|
||||
DEFAULTS = {}
|
||||
|
||||
def __init__(self, config):
|
||||
self.__dict__.update(Processor.DEFAULTS, **config)
|
||||
|
||||
def get_namelist(self,path):
|
||||
'''get data infomation : kpis, datasets, dates, df_time'''
|
||||
dates = os.listdir(path)
|
||||
kpi_file_names = os.listdir(os.path.join(path, dates[0]))
|
||||
kpis = []
|
||||
for i in kpi_file_names:
|
||||
kpis.append(i)
|
||||
|
||||
timestamps = []
|
||||
for i in range(np.size(dates)):
|
||||
data_df = pd.read_csv(os.path.join(path, dates[i], kpi_file_names[0]))
|
||||
timestamp = list(data_df[self.col_timestamp].drop_duplicates())
|
||||
timestamps.extend(timestamp)
|
||||
|
||||
df_time = pd.DataFrame(data = timestamps,columns=[self.col_timestamp])
|
||||
df_time = df_time.sort_values(by = self.col_timestamp)
|
||||
df_time = df_time.reset_index(drop = True)
|
||||
datasets = list(data_df[self.col_service].drop_duplicates())
|
||||
return kpis, datasets, dates, df_time
|
||||
|
||||
def fill_data(self, df, all_timestamp):
|
||||
'''fill or remove part of the raw data'''
|
||||
|
||||
#df: data that is missed or duplicate and needed to process. all_timestamp : whole timestamp of data
|
||||
time_size = np.shape(all_timestamp)[0]
|
||||
df.drop_duplicates(self.col_timestamp,inplace = True)
|
||||
|
||||
if df.shape[0] == time_size:
|
||||
return df
|
||||
|
||||
elif df.shape[0] < time_size:
|
||||
|
||||
#calculate the feature mean
|
||||
fill_value = df.iloc[:,1].mean()
|
||||
|
||||
#create new dataframe for the lack data
|
||||
df_fill = pd.DataFrame(columns = [self.col_timestamp,self.col_value])
|
||||
|
||||
#calculate the lack timestamp and related feature
|
||||
df_fill[self.col_timestamp] = list(set(all_timestamp[self.col_timestamp]) - set(df[self.col_timestamp]))
|
||||
|
||||
#replace lack feature with the mean of feature
|
||||
df_fill.iloc[:, 1].fillna(fill_value, inplace = True)
|
||||
|
||||
#add the dataframe to initial dataframe
|
||||
df = pd.concat([df, df_fill], ignore_index = True)
|
||||
df.sort_values(by = self.col_timestamp, inplace = True)
|
||||
return df
|
||||
|
||||
elif df.shape[0] > time_size :
|
||||
#duplicate features with same timestamp,so drop them
|
||||
df.drop_duplicates(self.col_timestamp, inplace = True)
|
||||
df.sort_values(by = self.col_timestamp, inplace = True)
|
||||
return df
|
||||
return None
|
||||
|
||||
def process_test_data(self):
|
||||
'''process test data of the filled data'''
|
||||
kpis, datasets, dates, df_timestamp = self.get_namelist(self.raw_test_path)
|
||||
service_df = {}
|
||||
for i in range(np.size(datasets)):
|
||||
service_df[datasets[i]] = df_timestamp
|
||||
|
||||
for kpi in kpis:
|
||||
data_df = pd.DataFrame()
|
||||
for i in range(np.size(dates)):
|
||||
data_df_tmp = pd.read_csv(os.path.join(self.raw_test_path, dates[i], kpi))
|
||||
data_df = pd.concat([data_df, data_df_tmp])
|
||||
|
||||
for dataset in datasets:
|
||||
df_temp = data_df[data_df[self.col_service] == dataset]
|
||||
df_temp = df_temp.sort_values(by = self.col_timestamp)
|
||||
df_temp = self.fill_data(df_temp.loc[:, [self.col_timestamp, self.col_value]], df_timestamp)
|
||||
service_df[dataset][kpi[4:-4]] = list(df_temp[self.col_value])
|
||||
|
||||
for i in range(np.size(datasets)):
|
||||
service_df[datasets[i]].to_csv(os.path.join(self.test_path, datasets[i][7:] + self.save_type), index = 0)
|
||||
|
||||
def process_train_data(self):
|
||||
'''process train data of the filled data'''
|
||||
kpis, datasets, dates, df_timestamp = self.get_namelist(self.raw_train_path)
|
||||
service_df = {}
|
||||
for i in range(np.size(datasets)):
|
||||
service_df[datasets[i]] = df_timestamp
|
||||
|
||||
for kpi in kpis:
|
||||
data_df = pd.DataFrame()
|
||||
for i in range(np.size(dates)):
|
||||
data_df_tmp = pd.read_csv(os.path.join(self.raw_train_path, dates[i], kpi))
|
||||
data_df = pd.concat([data_df, data_df_tmp])
|
||||
|
||||
for dataset in datasets:
|
||||
df_temp = data_df[data_df[self.col_service] == dataset]
|
||||
df_temp = df_temp.sort_values(by = self.col_timestamp)
|
||||
df_temp = self.fill_data(df_temp.loc[:, [self.col_timestamp, self.col_value]], df_timestamp)
|
||||
service_df[dataset][kpi[4:-4]] = list(df_temp[self.col_value])
|
||||
|
||||
for i in range(np.size(datasets)):
|
||||
service_df[datasets[i]].to_csv(os.path.join(self.train_path, datasets[i][7:] + self.save_type), index = 0)
|
||||
|
||||
def df_time_process(self, times):
|
||||
'''process timestamp of the label(anomaly_data)'''
|
||||
df_anomaly_time = pd.DataFrame(columns = [self.col_timestamp, self.col_label])
|
||||
anomaly_time = []
|
||||
for time in times:
|
||||
period = int((time - self.label_start_time)/60)
|
||||
time = self.label_start_time + (period + 1)*60
|
||||
anomaly_time.append(time)
|
||||
df_anomaly_time[self.col_timestamp] = list(set(anomaly_time))
|
||||
df_anomaly_time.iloc[:, 1].fillna(1, inplace = True)
|
||||
return df_anomaly_time
|
||||
|
||||
def process_label(self):
|
||||
'''process label data of the filled data'''
|
||||
kpis, datasets, dates, df_timestamp = self.get_namelist(self.raw_test_path)
|
||||
files = os.listdir(self.raw_label_path)
|
||||
df_label_raw = pd.DataFrame(columns = [self.col_timestamp, self.col_service])
|
||||
|
||||
for file_tmp in files:
|
||||
with open(os.path.join(self.raw_label_path, file_tmp), 'r', encoding = 'utf8') as fp:
|
||||
json_data = json.load(fp)
|
||||
df_temp = pd.DataFrame({self.col_timestamp:json_data[self.col_timestamp], self.col_service:json_data[self.col_service], self.col_level:json_data[self.col_level]})
|
||||
# delete anomaly caused by node
|
||||
df_temp = df_temp.drop(df_temp[df_temp[self.col_level].str.contains(self.col_node)].index)
|
||||
df_label_raw = df_label_raw.append(df_temp.iloc[:,0:2], ignore_index = True)
|
||||
|
||||
for dataset in datasets:
|
||||
anomaly_time = []
|
||||
for index, row in df_label_raw.iterrows():
|
||||
if row[self.col_service] in dataset:
|
||||
anomaly_time.append(row[self.col_timestamp])
|
||||
|
||||
df_anomaly_time = self.df_time_process(anomaly_time)
|
||||
df_temp = pd.DataFrame(columns = [self.col_timestamp, self.col_label])
|
||||
df_temp[self.col_timestamp] = [x for x in list(df_timestamp[self.col_timestamp]) if x not in list(df_anomaly_time[self.col_timestamp])]
|
||||
df_temp[self.col_label] = 0
|
||||
df_label = pd.concat([df_temp, df_anomaly_time], ignore_index=True)
|
||||
df_label.sort_values(by = self.col_timestamp, inplace=True)
|
||||
df_label.to_csv(os.path.join(self.label_path, dataset[7:] + self.save_type), index = 0)
|
||||
|
||||
59
examples/Anomaly_Detection/main.py
Normal file
59
examples/Anomaly_Detection/main.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# Copyright (c) 2022 Huawei Technologies Co., Ltd.
|
||||
# A-Tune is licensed under the Mulan PSL v2.
|
||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
||||
# PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
|
||||
# #############################################
|
||||
# @Author : zhaoyanjun
|
||||
# @Contact : zhao-yanjun@qq.com
|
||||
# @Date : 2022/10/10
|
||||
# @License : Mulan PSL v2
|
||||
# #############################################
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import yaml
|
||||
import json
|
||||
from torch.backends import cudnn
|
||||
from data_process import Processor
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('mode', type=str)
|
||||
parser.add_argument('config_path', type=str)
|
||||
parser.add_argument('model_path', type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
sys.path.append(args.model_path)
|
||||
from detection import Detection
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
config_file = open(args.config_path, 'r', encoding="utf-8")
|
||||
config = yaml.safe_load(config_file)
|
||||
cudnn.benchmark = True
|
||||
|
||||
if args.mode == 'data_process':
|
||||
processor = Processor(config['data_process'])
|
||||
processor.process_train_data()
|
||||
processor.process_test_data()
|
||||
processor.process_label()
|
||||
print('Data is processed successfully')
|
||||
|
||||
elif args.mode == 'train':
|
||||
if (not os.path.exists(config['detection']['model_save_path'])):
|
||||
os.mkdir(config['detection']['model_save_path'])
|
||||
|
||||
detection = Detection(config['detection'])
|
||||
detection.detect("train")
|
||||
|
||||
elif args.mode == 'test':
|
||||
detection = Detection(config['detection'])
|
||||
detection.detect("test")
|
||||
41
examples/Anomaly_Detection/start.sh
Normal file
41
examples/Anomaly_Detection/start.sh
Normal file
@@ -0,0 +1,41 @@
|
||||
# Copyright (c) 2022 Huawei Technologies Co., Ltd.
|
||||
# A-Tune is licensed under the Mulan PSL v2.
|
||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
||||
# PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
|
||||
# #############################################
|
||||
# @Author : zhaoyanjun
|
||||
# @Contact : zhao-yanjun@qq.com
|
||||
# @Date : 2022/10/10
|
||||
# @License : Mulan PSL v2
|
||||
# #############################################
|
||||
|
||||
|
||||
cd ../../anomaly_detection
|
||||
|
||||
#get models
|
||||
git clone https://github.com/thuml/Anomaly-Transformer
|
||||
|
||||
#change "-" to "_"
|
||||
mv Anomaly-Transformer Anomaly_Transformer
|
||||
|
||||
|
||||
cd ../examples/Anomaly_Detection
|
||||
|
||||
mkdir data/train
|
||||
mkdir data/test
|
||||
mkdir data/label
|
||||
|
||||
#data preprocess
|
||||
python main.py data_process ./config.yaml ../../anomaly_detection/
|
||||
|
||||
#train
|
||||
python main.py train ./config.yaml ../../anomaly_detection/
|
||||
|
||||
#test
|
||||
python main.py test ./config.yaml ../../anomaly_detection/
|
||||
Reference in New Issue
Block a user