2 달 전 · 286ae47332
--- a/app_run.py
+++ b/app_run.py
@@ -49,7 +49,11 @@ if __name__ == '__main__':
 
				     if len(sys.argv) >= 2:
			
 
				         env = sys.argv[1]
			
 
				 
			
 
				-    conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
			
 
				+    if env.endswith('.yaml'):
			
 
				+        conf_path = env
			
 
				+    else:
			
 
				+        conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
			
 
				+
			
 
				     environ['ETL_CONF'] = conf_path
			
 
				     yaml_config = yaml_conf(conf_path)
			
 
				     environ['env'] = env
			
--- a/conf/etl_config_tidbprod.yaml
+++ b/conf/etl_config_tidbprod.yaml
@@ -0,0 +1,31 @@
 
				+plt:
			
 
				+  database: energy
			
 
				+  host: 192.168.50.234
			
 
				+  password: '123456'
			
 
				+  port: 4000
			
 
				+  user: root
			
 
				+
			
 
				+trans:
			
 
				+  database: energy_data_prod
			
 
				+  host: 192.168.50.235
			
 
				+  password: admin123456
			
 
				+  port: 4000
			
 
				+  user: root
			
 
				+
			
 
				+# 如果要放在原始路径,则配置这个 以下面的名称作为切割点,新建清理数据文件夹
			
 
				+etl_origin_path_contain: 收资数据
			
 
				+# 如果单独保存,配置这个路径
			
 
				+save_path:
			
 
				+
			
 
				+# 日志保存路径
			
 
				+log_path_dir: /data/logs/no_batch_trans_tidb
			
 
				+
			
 
				+# 临时文件存放处,有些甲方公司隔得tmp太小,只好自己配置
			
 
				+tmp_base_path: /data/download/collection_data/tmp
			
 
				+
			
 
				+run_batch_count: 2
			
 
				+
			
 
				+archive_path: /data/download/collection_data/archive/prod_ti_db
			
 
				+
			
 
				+
			
 
				+use_tidb: True
			
--- a/etl/common/PathsAndTable.py
+++ b/etl/common/PathsAndTable.py
@@ -22,6 +22,9 @@ class PathsAndTable(object):
 
				         self.wind_col_trans = wind_col_trans
			
 
				 
			
 
				         save_path_conf = read_conf(yaml_config, "save_path")
			
 
				+
			
 
				+        self.use_tidb = read_conf(yaml_config, 'use_tidb', False)
			
 
				+
			
 
				         self.tmp_base_path = read_conf(yaml_config, "tmp_base_path", "/tmp")
			
 
				         if save_path_conf:
			
 
				             self.save_path = save_path_conf + sep + self.wind_farm_name
			
@@ -73,7 +76,7 @@ class PathsAndTable(object):
 
				         if self.save_db:
			
 
				             trans_print("开始创建表")
			
 
				             if self.read_type in ['second', 'minute']:
			
 
				-                creat_min_sec_table(self.get_table_name(), self.read_type)
			
 
				+                creat_min_sec_table(self.get_table_name(), self.read_type, self.use_tidb)
			
 
				             elif self.read_type in ['fault', 'warn']:
			
 
				                 create_warn_fault_table(self.get_table_name())
			
 
				             else:
			
--- a/etl/common/SaveToDb.py
+++ b/etl/common/SaveToDb.py
@@ -4,7 +4,7 @@ import traceback
 
				 
			
 
				 from etl.common.PathsAndTable import PathsAndTable
			
 
				 from service.trans_conf_service import update_trans_transfer_progress
			
 
				-from service.trans_service import save_partation_file_to_db, save_file_to_db
			
 
				+from service.trans_service import save_scada_file_to_db, save_file_to_db
			
 
				 from utils.file.trans_methods import split_array
			
 
				 from utils.log.trans_log import trans_print
			
 
				 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
			
@@ -32,13 +32,13 @@ class SaveToDb(object):
 
				         all_arrays = split_array(all_saved_files, split_count)
			
 
				         try:
			
 
				             for index, arr in enumerate(all_arrays):
			
 
				-                with multiprocessing.Pool(split_count) as pool:
			
 
				+                with multiprocessing.Pool(10) as pool:
			
 
				                     if self.pathsAndTable.read_type in ['minute', 'second']:
			
 
				-                        pool.starmap(save_partation_file_to_db,
			
 
				+                        pool.starmap(save_scada_file_to_db,
			
 
				                                      [(self.pathsAndTable.get_table_name(), file,
			
 
				                                        self.pathsAndTable.wind_col_trans[os.path.basename(file).split(".")[0]],
			
 
				                                        os.path.basename(os.path.dirname(file)),
			
 
				-                                       self.batch_count) for file in arr])
			
 
				+                                       self.batch_count,self.pathsAndTable.use_tidb) for file in arr])
			
 
				                     else:
			
 
				                         pool.starmap(save_file_to_db,
			
 
				                                      [(self.pathsAndTable.get_table_name(), file, self.batch_count) for file in arr])
			
--- a/etl/wind_power/min_sec/ReadAndSaveTmp.py
+++ b/etl/wind_power/min_sec/ReadAndSaveTmp.py
@@ -244,9 +244,15 @@ class ReadAndSaveTmp(object):
 
				                                      resolve_col_prefix=self.trans_param.resolve_col_prefix)
			
 
				             else:
			
 
				                 if self.trans_param.need_valid_cols:
			
 
				-                    df = read_file_to_df(file_path, read_cols, trans_cols=trans_cols)
			
 
				+                    if self.trans_param.resolve_col_prefix:
			
 
				+                        df = read_file_to_df(file_path, trans_cols=trans_cols,
			
 
				+                                             resolve_col_prefix=self.trans_param.resolve_col_prefix)
			
 
				+                    else:
			
 
				+                        df = read_file_to_df(file_path, read_cols, trans_cols=trans_cols,
			
 
				+                                             resolve_col_prefix=self.trans_param.resolve_col_prefix)
			
 
				                 else:
			
 
				-                    df = read_file_to_df(file_path, trans_cols=trans_cols)
			
 
				+                    df = read_file_to_df(file_path, trans_cols=trans_cols,
			
 
				+                                         resolve_col_prefix=self.trans_param.resolve_col_prefix)
			
 
				 
			
 
				             # 处理列名前缀问题
			
 
				             if self.trans_param.resolve_col_prefix:
			
@@ -360,6 +366,6 @@ class ReadAndSaveTmp(object):
 
				         trans_print("开始保存数据到临时文件")
			
 
				         begin = datetime.datetime.now()
			
 
				         self.read_file_and_save_tmp()
			
 
				-        update_trans_transfer_progress(self.pathsAndTable.id,  50,
			
 
				+        update_trans_transfer_progress(self.pathsAndTable.id, 50,
			
 
				                                        self.pathsAndTable.save_db)
			
 
				         trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin)
			
--- a/etl/wind_power/wave/WaveTrans.py
+++ b/etl/wind_power/wave/WaveTrans.py
@@ -1,17 +1,18 @@
 
				-import datetime
			
 
				 import json
			
 
				 import multiprocessing
			
 
				+import traceback
			
 
				 
			
 
				 from service.plt_service import get_all_wind
			
 
				+from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
			
 
				+    update_trans_status_success, update_trans_status_error
			
 
				 from service.trans_service import get_wave_conf, save_df_to_db, get_or_create_wave_table, \
			
 
				     get_wave_data, delete_exist_wave_data
			
 
				-from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
			
 
				-    update_trans_status_success
			
 
				 from utils.file.trans_methods import *
			
 
				 from utils.log.trans_log import set_trance_id
			
 
				 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
			
 
				 
			
 
				 exec("from os.path import *")
			
 
				+exec("import re")
			
 
				 
			
 
				 
			
 
				 class WaveTrans(object):
			
@@ -46,8 +47,7 @@ class WaveTrans(object):
 
				         update_trans_status_running(self.id)
			
 
				         trance_id = '-'.join([self.wind_farm_code, 'wave'])
			
 
				         set_trance_id(trance_id)
			
 
				-        all_files = read_files(self.read_dir, ['txt'])
			
 
				-        self.data_count = len(all_files)
			
 
				+        all_files = read_files(self.read_dir, ['txt', 'csv'])
			
 
				         update_trans_transfer_progress(self.id, 5)
			
 
				         # 最大取系统cpu的 1/2
			
 
				         split_count = get_available_cpu_count_with_percent(1 / 2)
			
@@ -68,7 +68,7 @@ class WaveTrans(object):
 
				 
			
 
				         mesure_poins = [key for key, value in wave_conf.items() if str(key).startswith('conf_') and value]
			
 
				         for point in mesure_poins:
			
 
				-            map_dict[wave_conf[point]] = point.replace('conf_', '')
			
 
				+            map_dict[wave_conf[point].strip()] = point.replace('conf_', '')
			
 
				 
			
 
				         wind_turbine_name_set = set()
			
 
				 
			
@@ -77,8 +77,15 @@ class WaveTrans(object):
 
				         for index, now_array in enumerate(all_array):
			
 
				             index_begin = datetime.datetime.now()
			
 
				             with multiprocessing.Pool(split_count) as pool:
			
 
				-                file_datas = pool.starmap(self.get_data_exec,
			
 
				-                                          [(base_param_exec, i, list(map_dict.keys())) for i in now_array])
			
 
				+                try:
			
 
				+                    file_datas = pool.starmap(self.get_data_exec,
			
 
				+                                              [(base_param_exec, i, list(map_dict.keys())) for i in now_array])
			
 
				+                    trans_print(f'总数:{len(now_array)},返回个数{len(file_datas)}')
			
 
				+                except Exception as e:
			
 
				+                    message = str(e)
			
 
				+                    trans_print(traceback.format_exc())
			
 
				+                    update_trans_status_error(self.id, message[0:len(message) if len(message) < 100 else 100])
			
 
				+                    raise e
			
 
				 
			
 
				             update_trans_transfer_progress(self.id, 20 + int(index / total_index * 60))
			
 
				             trans_print("读取文件耗时:", datetime.datetime.now() - self.begin)
			
@@ -102,13 +109,13 @@ class WaveTrans(object):
 
				                              mesure_data])
			
 
				 
			
 
				             if result_list:
			
 
				+                self.data_count = self.data_count + len(result_list)
			
 
				                 df = pd.DataFrame(result_list,
			
 
				                                   columns=['wind_turbine_name', 'time_stamp', 'rotational_speed', 'sampling_frequency',
			
 
				                                            'mesure_point_name', 'type', 'mesure_data'])
			
 
				                 df['time_stamp'] = pd.to_datetime(df['time_stamp'], errors='coerce')
			
 
				                 df['mesure_point_name'] = df['mesure_point_name'].map(map_dict)
			
 
				                 df.dropna(subset=['mesure_point_name'], inplace=True)
			
 
				-
			
 
				                 df['wind_turbine_number'] = df['wind_turbine_name'].map(all_wind).fillna(df['wind_turbine_name'])
			
 
				 
			
 
				                 df['mesure_data'] = df['mesure_data'].apply(lambda x: json.dumps(x))
			
--- a/nutika_package.sh
+++ b/nutika_package.sh
@@ -1,3 +1,3 @@
 
				 #!/bin/bash
			
 
				-nuitka --standalone --onefile --include-data-files=./conf/*=./conf/  --output-dir=/home/wzl/project/install_package --remove-output app_run.py
			
 
				+nuitka --standalone --onefile --static-libpython=yes  --include-data-files=./conf/*=./conf/  --output-dir=/home/wzl/project/install_package --remove-output app_run.py
			
 
				 
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,6 @@ PyMySQL~=1.1.0
 
				 SQLAlchemy~=2.0.30
			
 
				 rarfile~=4.2
			
 
				 PyYAML~=6.0.1
			
 
				-matplotlib~=3.9.0
			
 
				 chardet~=3.0.4
			
 
				 psutil~=6.0.0
			
 
				 openpyxl ~= 3.1.4
			
--- a/service/plt_service.py
+++ b/service/plt_service.py
@@ -39,3 +39,11 @@ def get_base_wind_and_power(wind_turbine_number):
 
				         return None
			
 
				     return dict_datas
			
 
				 
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    from os import path,environ
			
 
				+    env = 'prod'
			
 
				+    conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
			
 
				+    environ['ETL_CONF'] = conf_path
			
 
				+    environ['env'] = env
			
 
				+    print(get_all_wind('WOF039800012'))
			
--- a/service/trans_service.py
+++ b/service/trans_service.py
@@ -6,10 +6,10 @@ from os import *
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				+from service.common_connect import trans
			
 
				 from service.trans_conf_service import create_wave_table
			
 
				 from utils.file.trans_methods import split_array
			
 
				 from utils.log.trans_log import trans_print
			
 
				-from service.common_connect import trans
			
 
				 
			
 
				 
			
 
				 def get_min_sec_conf(field_code, trans_type) -> dict:
			
@@ -58,7 +58,7 @@ def get_wave_conf(field_code) -> dict:
 
				     return res[0]
			
 
				 
			
 
				 
			
 
				-def creat_min_sec_table(table_name, trans_type):
			
 
				+def creat_min_sec_table(table_name, trans_type, use_tidb=False):
			
 
				     exists_table_sql = f"""
			
 
				     select count(1) as count from information_schema.tables where table_schema = '{trans.database}' and table_name = '{table_name}'
			
 
				     """
			
@@ -135,7 +135,10 @@ def creat_min_sec_table(table_name, trans_type):
 
				              KEY `time_stamp` (`time_stamp`),
			
 
				              KEY `wind_turbine_number` (`wind_turbine_number`),
			
 
				              {add_key}
			
 
				-        )
			
 
				+        ) 
			
 
				+        """
			
 
				+        # if not use_tidb:
			
 
				+        create_sql = create_sql + f"""
			
 
				         PARTITION BY LIST COLUMNS ({key}, `wind_turbine_number`) (
			
 
				         PARTITION pDefault VALUES IN ((000000, 'wind_turbine_number'))
			
 
				         ) 
			
@@ -177,18 +180,37 @@ def add_or_remove_partation(table_name: str, date_str: str, wind_turbine_number)
 
				         add_partation(table_name, date_str, wind_turbine_number)
			
 
				 
			
 
				 
			
 
				-def save_partation_file_to_db(table_name: str, file: str, wind_turbine_number, date_str, batch_count=100000):
			
 
				+def drop_exists_data(table_name, wind_turbine_number, min_date, max_date):
			
 
				+    # sql = f"# delete from {table_name} where wind_turbine_number = '{wind_turbine_number}' and time_stamp between '{min_date}' and '{max_date}'"
			
 
				+
			
 
				+    sql = f"""
			
 
				+    BATCH ON `time_stamp`, `wind_turbine_number` LIMIT 1000 
			
 
				+    DELETE FROM `{table_name}` 
			
 
				+    WHERE `rated_at` >= "{min_date}" 
			
 
				+    AND `rated_at` <= "{max_date}"
			
 
				+    AND `wind_turbine_number` = "{wind_turbine_number}";
			
 
				+    """
			
 
				+
			
 
				+    count = trans.execute(sql)
			
 
				+    trans_print(f"删除数据{count}条，{table_name},{wind_turbine_number},{min_date},{max_date}")
			
 
				+
			
 
				+
			
 
				+def save_scada_file_to_db(table_name, file: str, wind_turbine_number, date_str, batch_count=100000, use_tidb=False):
			
 
				     base_name = path.basename(file)
			
 
				-    # wind_turbine_number = path.basename(file).split(".")[0]
			
 
				-    # date_str = path.basename(path.dirname(file))
			
 
				+    df = pd.read_csv(file)
			
 
				+    # if use_tidb:
			
 
				+    #     min_date = df['time_stamp'].min()
			
 
				+    #     max_date = df['time_stamp'].max()
			
 
				+    #     # drop_exists_data(table_name, wind_turbine_number, min_date, max_date)
			
 
				+    # else:
			
 
				+    #     add_or_remove_partation(table_name, date_str, wind_turbine_number)
			
 
				 
			
 
				     add_or_remove_partation(table_name, date_str, wind_turbine_number)
			
 
				 
			
 
				     try:
			
 
				-        for i, df in enumerate(pd.read_csv(file, chunksize=batch_count)):
			
 
				-            trans.execute_df_save(df, table_name)
			
 
				-            count = (i + 1) * batch_count
			
 
				-            trans_print(base_name, f"Chunk {count} written to MySQL.")
			
 
				+        trans_print(f"保存{table_name},{base_name},{wind_turbine_number},数据：{df.shape[0]}")
			
 
				+        trans.execute_df_save(df, table_name, batch_count)
			
 
				+        trans_print(f"保存到{table_name},{base_name},{wind_turbine_number} 成功,总条数:{df.shape[0]}")
			
 
				     except Exception as e:
			
 
				         trans_print(traceback.format_exc())
			
 
				         message = base_name + str(e)
			
@@ -198,11 +220,10 @@ def save_partation_file_to_db(table_name: str, file: str, wind_turbine_number, d
 
				 def save_file_to_db(table_name: str, file: str, batch_count=100000):
			
 
				     base_name = path.basename(file)
			
 
				     try:
			
 
				-        for i, df in enumerate(pd.read_csv(file, chunksize=batch_count)):
			
 
				-            # df.to_sql(table_name, engine, if_exists='append', index=False)
			
 
				-            trans.execute_df_save(df, table_name)
			
 
				-            count = (i + 1) * batch_count
			
 
				-            trans_print(base_name, f"Chunk {count} written to MySQL.")
			
 
				+        df = pd.read_csv(file)
			
 
				+        trans_print(f"保存{table_name},总条数：{df.shape[0]}")
			
 
				+        trans.execute_df_save(df, table_name, batch_count)
			
 
				+        trans_print(f"保存到{table_name}成功,总条数:{df.shape[0]}")
			
 
				     except Exception as e:
			
 
				         trans_print(traceback.format_exc())
			
 
				         message = base_name + str(e)
			
@@ -210,12 +231,10 @@ def save_file_to_db(table_name: str, file: str, batch_count=100000):
 
				 
			
 
				 
			
 
				 def save_df_to_db(table_name: str, df: pd.DataFrame(), batch_count=100000):
			
 
				-    split_dfs = [df.iloc[i:i + batch_count] for i in range(0, len(df), batch_count)]
			
 
				     try:
			
 
				-        for i, split_df in enumerate(split_dfs):
			
 
				-            trans.execute_df_save(split_df, table_name)
			
 
				-            count = (i + 1) * batch_count
			
 
				-            trans_print(f"Chunk {count} written to MySQL.")
			
 
				+        trans_print(f"保存{table_name},总条数：{df.shape[0]}")
			
 
				+        trans.execute_df_save(df, table_name, batch_count)
			
 
				+        trans_print(f"保存到{table_name}成功,总条数:{df.shape[0]}")
			
 
				     except Exception as e:
			
 
				         trans_print(traceback.format_exc())
			
 
				         raise Exception(str(e))
			
--- a/utils/db/ConnectMysql.py
+++ b/utils/db/ConnectMysql.py
@@ -48,8 +48,8 @@ class ConnectMysql:
 
				         dbname = config['database']
			
 
				         return create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{dbname}')
			
 
				 
			
 
				-    def execute_df_save(self, df, table_name):
			
 
				-        df.to_sql(table_name, self.get_engine(), index=False, if_exists='append')
			
 
				+    def execute_df_save(self, df, table_name, chunk_size=10000):
			
 
				+        df.to_sql(table_name, self.get_engine(), index=False, if_exists='append', chunksize=chunk_size)
			
 
				 
			
 
				     def read_sql_to_df(self, sql):
			
 
				         df = pd.read_sql_query(sql, self.get_engine())
			
--- a/utils/db/ConnectMysql_tidb_fix.py
+++ b/utils/db/ConnectMysql_tidb_fix.py
@@ -0,0 +1,80 @@
 
				+import time
			
 
				+import traceback
			
 
				+from os import *
			
 
				+
			
 
				+import pandas as pd
			
 
				+import pymysql
			
 
				+from pymysql.cursors import DictCursor
			
 
				+from sqlalchemy import create_engine
			
 
				+
			
 
				+from utils.conf.read_conf import yaml_conf
			
 
				+from utils.log.trans_log import trans_print
			
 
				+
			
 
				+
			
 
				+class ConnectMysql:
			
 
				+
			
 
				+    def __init__(self, connet_name):
			
 
				+        self.yaml_data = yaml_conf(environ.get('ETL_CONF'))
			
 
				+        self.connet_name = connet_name
			
 
				+        self.config = self.yaml_data[self.connet_name]
			
 
				+        self.database = self.config['database']
			
 
				+
			
 
				+    # 从连接池中获取一个连接
			
 
				+    def get_conn(self):
			
 
				+        return pymysql.connect(**self.config, autocommit=True)
			
 
				+
			
 
				+    # 使用连接执行sql
			
 
				+    def execute(self, sql, params=tuple()):
			
 
				+
			
 
				+        with self.get_conn() as conn:
			
 
				+            with conn.cursor(cursor=DictCursor) as cursor:
			
 
				+                try:
			
 
				+                    cursor.execute(sql, params)
			
 
				+                    trans_print("开始执行SQL:", cursor._executed)
			
 
				+                    conn.commit()
			
 
				+                    result = cursor.fetchall()
			
 
				+                    return result
			
 
				+                except Exception as e:
			
 
				+                    trans_print(f"执行sql：{sql}，报错：{e}")
			
 
				+                    trans_print(traceback.format_exc())
			
 
				+                    conn.rollback()
			
 
				+                    raise e
			
 
				+
			
 
				+    def get_engine(self):
			
 
				+        config = self.config
			
 
				+        username = config['user']
			
 
				+        password = config['password']
			
 
				+        host = config['host']
			
 
				+        port = config['port']
			
 
				+        dbname = config['database']
			
 
				+        return create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{dbname}',
			
 
				+                             pool_pre_ping=True,
			
 
				+                             isolation_level="READ COMMITTED",
			
 
				+                             connect_args={
			
 
				+                                 'connect_timeout': 30,
			
 
				+                                 'read_timeout': 120,
			
 
				+                                 'write_timeout': 7200
			
 
				+                             })
			
 
				+
			
 
				+    def execute_df_save(self, df, table_name, chunksize=10000):
			
 
				+        engine = self.get_engine()
			
 
				+        try:
			
 
				+            retry_count = 0
			
 
				+            max_retries = 3
			
 
				+            while retry_count < max_retries:
			
 
				+                try:
			
 
				+                    df.to_sql(table_name, engine, if_exists='append', index=False, chunksize=chunksize)
			
 
				+                except Exception as e:
			
 
				+                    retry_count += 1
			
 
				+                    trans_print(f" 第 {retry_count} 次重试, 错误: {str(e)}")
			
 
				+                    time.sleep(5 * retry_count)  # 指数退避
			
 
				+                    if retry_count == max_retries:
			
 
				+                        trans_print(f"处理失败: {str(e)}")
			
 
				+                        raise
			
 
				+        except Exception as e:
			
 
				+            engine.dispose()
			
 
				+            raise e
			
 
				+
			
 
				+    def read_sql_to_df(self, sql):
			
 
				+        df = pd.read_sql_query(sql, self.get_engine())
			
 
				+        return df
			
--- a/utils/draw/draw_file.py
+++ b/utils/draw/draw_file.py
@@ -1,81 +1,81 @@
 
				-import matplotlib
			
 
				-
			
 
				-from utils.file.trans_methods import create_file_path
			
 
				-
			
 
				-matplotlib.use('Agg')
			
 
				-matplotlib.rcParams['font.family'] = 'SimHei'
			
 
				-matplotlib.rcParams['font.sans-serif'] = ['SimHei']
			
 
				-matplotlib.rcParams['axes.unicode_minus'] = False
			
 
				-from matplotlib import pyplot as plt
			
 
				-
			
 
				-
			
 
				-def scatter(title, x_label, y_label, x_values, y_values, color=None, col_map=dict(), size=10,
			
 
				-            save_file_path=''):
			
 
				-    if save_file_path:
			
 
				-        create_file_path(save_file_path, True)
			
 
				-    else:
			
 
				-        save_file_path = title + '.png'
			
 
				-
			
 
				-    plt.figure(figsize=(8, 6))
			
 
				-    plt.title(title, fontsize=16)
			
 
				-    plt.xlabel(x_label, fontsize=14)
			
 
				-    plt.ylabel(y_label, fontsize=14)
			
 
				-    if color is not None:
			
 
				-        plt.scatter(x_values, y_values, s=size, c=color)
			
 
				-        if col_map:
			
 
				-            patches = [plt.Rectangle((0, 0), 1, 1, fc=c) for c in col_map.values()]
			
 
				-            plt.legend(patches, list(col_map.keys()))
			
 
				-    else:
			
 
				-        plt.scatter(x_values, y_values, s=size)
			
 
				-
			
 
				-    plt.savefig(save_file_path)
			
 
				-    plt.close()
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    import pandas as pd
			
 
				-    import numpy as np
			
 
				-    from matplotlib import pyplot as plt
			
 
				-
			
 
				-    df = pd.read_csv(r"/home/wzl/test_data/2024_10_17_14_54_46_200k_Root.csv")
			
 
				-    df.reset_index(inplace=True, drop=True)
			
 
				-    df.columns = ['data']
			
 
				-
			
 
				-    # Calculate the moving average with a window of 3 (1 before, 1 after)
			
 
				-    window_size = 20
			
 
				-    moving_avg = df['data'].rolling(window=window_size).mean()
			
 
				-    df['moving_avg'] = moving_avg
			
 
				-    # Calculate the percentage difference
			
 
				-    percentage_diff = abs((df['data'] - moving_avg) / moving_avg) * 100
			
 
				-    df['percentage_diff'] = percentage_diff
			
 
				-    # Flag values that differ by more than threshold
			
 
				-    threshold = 3
			
 
				-    df['is_anomaly'] = percentage_diff < threshold
			
 
				-
			
 
				-    avg = df['data'].mean()
			
 
				-    df['avg']=df['data'] > avg
			
 
				-
			
 
				-
			
 
				-    difference_ratio = df.iloc[window_size:]
			
 
				-    difference_ratio.reset_index(inplace=True)
			
 
				-    # 创建图形和轴对象
			
 
				-    plt.figure(figsize=(10, 6))
			
 
				-    colors = np.where((difference_ratio['is_anomaly'] == True) & (difference_ratio['avg'] == True), 'r', np.where((difference_ratio['is_anomaly'] == False) & (difference_ratio['avg'] == False), 'g', 'b'))
			
 
				-
			
 
				-    datas = difference_ratio['data'].values
			
 
				-    # for i in range(len(datas)):
			
 
				-    #     plt.plot(i, datas[i], marker='o', color=colors[i])
			
 
				-
			
 
				-    plt.figure(figsize=(10, 6))
			
 
				-    plt.scatter([i for i in range(len(datas))], datas,  c=colors)
			
 
				-
			
 
				-    # 添加标题和标签
			
 
				-    plt.title('Difference Ratio of Each Data Point to Its Previous 10 Data Points Mean')
			
 
				-    plt.xlabel('Index')
			
 
				-    plt.ylabel('Difference Ratio')
			
 
				-
			
 
				-    # 显示网格
			
 
				-    plt.grid(True)
			
 
				-
			
 
				-    # 显示图形
			
 
				-    plt.show()
			
 
				+# import matplotlib
			
 
				+#
			
 
				+# from utils.file.trans_methods import create_file_path
			
 
				+#
			
 
				+# matplotlib.use('Agg')
			
 
				+# matplotlib.rcParams['font.family'] = 'SimHei'
			
 
				+# matplotlib.rcParams['font.sans-serif'] = ['SimHei']
			
 
				+# matplotlib.rcParams['axes.unicode_minus'] = False
			
 
				+# from matplotlib import pyplot as plt
			
 
				+#
			
 
				+#
			
 
				+# def scatter(title, x_label, y_label, x_values, y_values, color=None, col_map=dict(), size=10,
			
 
				+#             save_file_path=''):
			
 
				+#     if save_file_path:
			
 
				+#         create_file_path(save_file_path, True)
			
 
				+#     else:
			
 
				+#         save_file_path = title + '.png'
			
 
				+#
			
 
				+#     plt.figure(figsize=(8, 6))
			
 
				+#     plt.title(title, fontsize=16)
			
 
				+#     plt.xlabel(x_label, fontsize=14)
			
 
				+#     plt.ylabel(y_label, fontsize=14)
			
 
				+#     if color is not None:
			
 
				+#         plt.scatter(x_values, y_values, s=size, c=color)
			
 
				+#         if col_map:
			
 
				+#             patches = [plt.Rectangle((0, 0), 1, 1, fc=c) for c in col_map.values()]
			
 
				+#             plt.legend(patches, list(col_map.keys()))
			
 
				+#     else:
			
 
				+#         plt.scatter(x_values, y_values, s=size)
			
 
				+#
			
 
				+#     plt.savefig(save_file_path)
			
 
				+#     plt.close()
			
 
				+#
			
 
				+#
			
 
				+# if __name__ == '__main__':
			
 
				+#     import pandas as pd
			
 
				+#     import numpy as np
			
 
				+#     from matplotlib import pyplot as plt
			
 
				+#
			
 
				+#     df = pd.read_csv(r"/home/wzl/test_data/2024_10_17_14_54_46_200k_Root.csv")
			
 
				+#     df.reset_index(inplace=True, drop=True)
			
 
				+#     df.columns = ['data']
			
 
				+#
			
 
				+#     # Calculate the moving average with a window of 3 (1 before, 1 after)
			
 
				+#     window_size = 20
			
 
				+#     moving_avg = df['data'].rolling(window=window_size).mean()
			
 
				+#     df['moving_avg'] = moving_avg
			
 
				+#     # Calculate the percentage difference
			
 
				+#     percentage_diff = abs((df['data'] - moving_avg) / moving_avg) * 100
			
 
				+#     df['percentage_diff'] = percentage_diff
			
 
				+#     # Flag values that differ by more than threshold
			
 
				+#     threshold = 3
			
 
				+#     df['is_anomaly'] = percentage_diff < threshold
			
 
				+#
			
 
				+#     avg = df['data'].mean()
			
 
				+#     df['avg']=df['data'] > avg
			
 
				+#
			
 
				+#
			
 
				+#     difference_ratio = df.iloc[window_size:]
			
 
				+#     difference_ratio.reset_index(inplace=True)
			
 
				+#     # 创建图形和轴对象
			
 
				+#     plt.figure(figsize=(10, 6))
			
 
				+#     colors = np.where((difference_ratio['is_anomaly'] == True) & (difference_ratio['avg'] == True), 'r', np.where((difference_ratio['is_anomaly'] == False) & (difference_ratio['avg'] == False), 'g', 'b'))
			
 
				+#
			
 
				+#     datas = difference_ratio['data'].values
			
 
				+#     # for i in range(len(datas)):
			
 
				+#     #     plt.plot(i, datas[i], marker='o', color=colors[i])
			
 
				+#
			
 
				+#     plt.figure(figsize=(10, 6))
			
 
				+#     plt.scatter([i for i in range(len(datas))], datas,  c=colors)
			
 
				+#
			
 
				+#     # 添加标题和标签
			
 
				+#     plt.title('Difference Ratio of Each Data Point to Its Previous 10 Data Points Mean')
			
 
				+#     plt.xlabel('Index')
			
 
				+#     plt.ylabel('Difference Ratio')
			
 
				+#
			
 
				+#     # 显示网格
			
 
				+#     plt.grid(True)
			
 
				+#
			
 
				+#     # 显示图形
			
 
				+#     plt.show()