123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- import matplotlib
- from utils.file.trans_methods import create_file_path
- matplotlib.use('Agg')
- matplotlib.rcParams['font.family'] = 'SimHei'
- matplotlib.rcParams['font.sans-serif'] = ['SimHei']
- matplotlib.rcParams['axes.unicode_minus'] = False
- from matplotlib import pyplot as plt
- def scatter(title, x_label, y_label, x_values, y_values, color=None, col_map=dict(), size=10,
- save_file_path=''):
- if save_file_path:
- create_file_path(save_file_path, True)
- else:
- save_file_path = title + '.png'
- plt.figure(figsize=(8, 6))
- plt.title(title, fontsize=16)
- plt.xlabel(x_label, fontsize=14)
- plt.ylabel(y_label, fontsize=14)
- if color is not None:
- plt.scatter(x_values, y_values, s=size, c=color)
- if col_map:
- patches = [plt.Rectangle((0, 0), 1, 1, fc=c) for c in col_map.values()]
- plt.legend(patches, list(col_map.keys()))
- else:
- plt.scatter(x_values, y_values, s=size)
- plt.savefig(save_file_path)
- plt.close()
- if __name__ == '__main__':
- import pandas as pd
- import numpy as np
- from matplotlib import pyplot as plt
- df = pd.read_csv(r"/home/wzl/test_data/2024_10_17_14_54_46_200k_Root.csv")
- df.reset_index(inplace=True, drop=True)
- df.columns = ['data']
- # Calculate the moving average with a window of 3 (1 before, 1 after)
- window_size = 20
- moving_avg = df['data'].rolling(window=window_size).mean()
- df['moving_avg'] = moving_avg
- # Calculate the percentage difference
- percentage_diff = abs((df['data'] - moving_avg) / moving_avg) * 100
- df['percentage_diff'] = percentage_diff
- # Flag values that differ by more than threshold
- threshold = 3
- df['is_anomaly'] = percentage_diff < threshold
- avg = df['data'].mean()
- df['avg']=df['data'] > avg
- difference_ratio = df.iloc[window_size:]
- difference_ratio.reset_index(inplace=True)
- # 创建图形和轴对象
- plt.figure(figsize=(10, 6))
- colors = np.where((difference_ratio['is_anomaly'] == True) & (difference_ratio['avg'] == True), 'r', np.where((difference_ratio['is_anomaly'] == False) & (difference_ratio['avg'] == False), 'g', 'b'))
- datas = difference_ratio['data'].values
- # for i in range(len(datas)):
- # plt.plot(i, datas[i], marker='o', color=colors[i])
- plt.figure(figsize=(10, 6))
- plt.scatter([i for i in range(len(datas))], datas, c=colors)
- # 添加标题和标签
- plt.title('Difference Ratio of Each Data Point to Its Previous 10 Data Points Mean')
- plt.xlabel('Index')
- plt.ylabel('Difference Ratio')
- # 显示网格
- plt.grid(True)
- # 显示图形
- plt.show()
|