Jelajahi Sumber

添加tidb
处理白山的震动数据

魏志亮 2 bulan lalu
induk
melakukan
286ae47332

+ 5 - 1
app_run.py

@@ -49,7 +49,11 @@ if __name__ == '__main__':
     if len(sys.argv) >= 2:
         env = sys.argv[1]
 
-    conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
+    if env.endswith('.yaml'):
+        conf_path = env
+    else:
+        conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
+
     environ['ETL_CONF'] = conf_path
     yaml_config = yaml_conf(conf_path)
     environ['env'] = env

+ 31 - 0
conf/etl_config_tidbprod.yaml

@@ -0,0 +1,31 @@
+plt:
+  database: energy
+  host: 192.168.50.234
+  password: '123456'
+  port: 4000
+  user: root
+
+trans:
+  database: energy_data_prod
+  host: 192.168.50.235
+  password: admin123456
+  port: 4000
+  user: root
+
+# 如果要放在原始路径,则配置这个 以下面的名称作为切割点,新建清理数据文件夹
+etl_origin_path_contain: 收资数据
+# 如果单独保存,配置这个路径
+save_path:
+
+# 日志保存路径
+log_path_dir: /data/logs/no_batch_trans_tidb
+
+# 临时文件存放处,有些甲方公司隔得tmp太小,只好自己配置
+tmp_base_path: /data/download/collection_data/tmp
+
+run_batch_count: 2
+
+archive_path: /data/download/collection_data/archive/prod_ti_db
+
+
+use_tidb: True

+ 4 - 1
etl/common/PathsAndTable.py

@@ -22,6 +22,9 @@ class PathsAndTable(object):
         self.wind_col_trans = wind_col_trans
 
         save_path_conf = read_conf(yaml_config, "save_path")
+
+        self.use_tidb = read_conf(yaml_config, 'use_tidb', False)
+
         self.tmp_base_path = read_conf(yaml_config, "tmp_base_path", "/tmp")
         if save_path_conf:
             self.save_path = save_path_conf + sep + self.wind_farm_name
@@ -73,7 +76,7 @@ class PathsAndTable(object):
         if self.save_db:
             trans_print("开始创建表")
             if self.read_type in ['second', 'minute']:
-                creat_min_sec_table(self.get_table_name(), self.read_type)
+                creat_min_sec_table(self.get_table_name(), self.read_type, self.use_tidb)
             elif self.read_type in ['fault', 'warn']:
                 create_warn_fault_table(self.get_table_name())
             else:

+ 4 - 4
etl/common/SaveToDb.py

@@ -4,7 +4,7 @@ import traceback
 
 from etl.common.PathsAndTable import PathsAndTable
 from service.trans_conf_service import update_trans_transfer_progress
-from service.trans_service import save_partation_file_to_db, save_file_to_db
+from service.trans_service import save_scada_file_to_db, save_file_to_db
 from utils.file.trans_methods import split_array
 from utils.log.trans_log import trans_print
 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
@@ -32,13 +32,13 @@ class SaveToDb(object):
         all_arrays = split_array(all_saved_files, split_count)
         try:
             for index, arr in enumerate(all_arrays):
-                with multiprocessing.Pool(split_count) as pool:
+                with multiprocessing.Pool(10) as pool:
                     if self.pathsAndTable.read_type in ['minute', 'second']:
-                        pool.starmap(save_partation_file_to_db,
+                        pool.starmap(save_scada_file_to_db,
                                      [(self.pathsAndTable.get_table_name(), file,
                                        self.pathsAndTable.wind_col_trans[os.path.basename(file).split(".")[0]],
                                        os.path.basename(os.path.dirname(file)),
-                                       self.batch_count) for file in arr])
+                                       self.batch_count,self.pathsAndTable.use_tidb) for file in arr])
                     else:
                         pool.starmap(save_file_to_db,
                                      [(self.pathsAndTable.get_table_name(), file, self.batch_count) for file in arr])

+ 9 - 3
etl/wind_power/min_sec/ReadAndSaveTmp.py

@@ -244,9 +244,15 @@ class ReadAndSaveTmp(object):
                                      resolve_col_prefix=self.trans_param.resolve_col_prefix)
             else:
                 if self.trans_param.need_valid_cols:
-                    df = read_file_to_df(file_path, read_cols, trans_cols=trans_cols)
+                    if self.trans_param.resolve_col_prefix:
+                        df = read_file_to_df(file_path, trans_cols=trans_cols,
+                                             resolve_col_prefix=self.trans_param.resolve_col_prefix)
+                    else:
+                        df = read_file_to_df(file_path, read_cols, trans_cols=trans_cols,
+                                             resolve_col_prefix=self.trans_param.resolve_col_prefix)
                 else:
-                    df = read_file_to_df(file_path, trans_cols=trans_cols)
+                    df = read_file_to_df(file_path, trans_cols=trans_cols,
+                                         resolve_col_prefix=self.trans_param.resolve_col_prefix)
 
             # 处理列名前缀问题
             if self.trans_param.resolve_col_prefix:
@@ -360,6 +366,6 @@ class ReadAndSaveTmp(object):
         trans_print("开始保存数据到临时文件")
         begin = datetime.datetime.now()
         self.read_file_and_save_tmp()
-        update_trans_transfer_progress(self.pathsAndTable.id,  50,
+        update_trans_transfer_progress(self.pathsAndTable.id, 50,
                                        self.pathsAndTable.save_db)
         trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin)

+ 16 - 9
etl/wind_power/wave/WaveTrans.py

@@ -1,17 +1,18 @@
-import datetime
 import json
 import multiprocessing
+import traceback
 
 from service.plt_service import get_all_wind
+from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
+    update_trans_status_success, update_trans_status_error
 from service.trans_service import get_wave_conf, save_df_to_db, get_or_create_wave_table, \
     get_wave_data, delete_exist_wave_data
-from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
-    update_trans_status_success
 from utils.file.trans_methods import *
 from utils.log.trans_log import set_trance_id
 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
 
 exec("from os.path import *")
+exec("import re")
 
 
 class WaveTrans(object):
@@ -46,8 +47,7 @@ class WaveTrans(object):
         update_trans_status_running(self.id)
         trance_id = '-'.join([self.wind_farm_code, 'wave'])
         set_trance_id(trance_id)
-        all_files = read_files(self.read_dir, ['txt'])
-        self.data_count = len(all_files)
+        all_files = read_files(self.read_dir, ['txt', 'csv'])
         update_trans_transfer_progress(self.id, 5)
         # 最大取系统cpu的 1/2
         split_count = get_available_cpu_count_with_percent(1 / 2)
@@ -68,7 +68,7 @@ class WaveTrans(object):
 
         mesure_poins = [key for key, value in wave_conf.items() if str(key).startswith('conf_') and value]
         for point in mesure_poins:
-            map_dict[wave_conf[point]] = point.replace('conf_', '')
+            map_dict[wave_conf[point].strip()] = point.replace('conf_', '')
 
         wind_turbine_name_set = set()
 
@@ -77,8 +77,15 @@ class WaveTrans(object):
         for index, now_array in enumerate(all_array):
             index_begin = datetime.datetime.now()
             with multiprocessing.Pool(split_count) as pool:
-                file_datas = pool.starmap(self.get_data_exec,
-                                          [(base_param_exec, i, list(map_dict.keys())) for i in now_array])
+                try:
+                    file_datas = pool.starmap(self.get_data_exec,
+                                              [(base_param_exec, i, list(map_dict.keys())) for i in now_array])
+                    trans_print(f'总数:{len(now_array)},返回个数{len(file_datas)}')
+                except Exception as e:
+                    message = str(e)
+                    trans_print(traceback.format_exc())
+                    update_trans_status_error(self.id, message[0:len(message) if len(message) < 100 else 100])
+                    raise e
 
             update_trans_transfer_progress(self.id, 20 + int(index / total_index * 60))
             trans_print("读取文件耗时:", datetime.datetime.now() - self.begin)
@@ -102,13 +109,13 @@ class WaveTrans(object):
                              mesure_data])
 
             if result_list:
+                self.data_count = self.data_count + len(result_list)
                 df = pd.DataFrame(result_list,
                                   columns=['wind_turbine_name', 'time_stamp', 'rotational_speed', 'sampling_frequency',
                                            'mesure_point_name', 'type', 'mesure_data'])
                 df['time_stamp'] = pd.to_datetime(df['time_stamp'], errors='coerce')
                 df['mesure_point_name'] = df['mesure_point_name'].map(map_dict)
                 df.dropna(subset=['mesure_point_name'], inplace=True)
-
                 df['wind_turbine_number'] = df['wind_turbine_name'].map(all_wind).fillna(df['wind_turbine_name'])
 
                 df['mesure_data'] = df['mesure_data'].apply(lambda x: json.dumps(x))

+ 1 - 1
nutika_package.sh

@@ -1,3 +1,3 @@
 #!/bin/bash
-nuitka --standalone --onefile --include-data-files=./conf/*=./conf/  --output-dir=/home/wzl/project/install_package --remove-output app_run.py
+nuitka --standalone --onefile --static-libpython=yes  --include-data-files=./conf/*=./conf/  --output-dir=/home/wzl/project/install_package --remove-output app_run.py
 

+ 0 - 1
requirements.txt

@@ -4,7 +4,6 @@ PyMySQL~=1.1.0
 SQLAlchemy~=2.0.30
 rarfile~=4.2
 PyYAML~=6.0.1
-matplotlib~=3.9.0
 chardet~=3.0.4
 psutil~=6.0.0
 openpyxl ~= 3.1.4

+ 8 - 0
service/plt_service.py

@@ -39,3 +39,11 @@ def get_base_wind_and_power(wind_turbine_number):
         return None
     return dict_datas
 
+
+if __name__ == '__main__':
+    from os import path,environ
+    env = 'prod'
+    conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
+    environ['ETL_CONF'] = conf_path
+    environ['env'] = env
+    print(get_all_wind('WOF039800012'))

+ 39 - 20
service/trans_service.py

@@ -6,10 +6,10 @@ from os import *
 
 import pandas as pd
 
+from service.common_connect import trans
 from service.trans_conf_service import create_wave_table
 from utils.file.trans_methods import split_array
 from utils.log.trans_log import trans_print
-from service.common_connect import trans
 
 
 def get_min_sec_conf(field_code, trans_type) -> dict:
@@ -58,7 +58,7 @@ def get_wave_conf(field_code) -> dict:
     return res[0]
 
 
-def creat_min_sec_table(table_name, trans_type):
+def creat_min_sec_table(table_name, trans_type, use_tidb=False):
     exists_table_sql = f"""
     select count(1) as count from information_schema.tables where table_schema = '{trans.database}' and table_name = '{table_name}'
     """
@@ -135,7 +135,10 @@ def creat_min_sec_table(table_name, trans_type):
              KEY `time_stamp` (`time_stamp`),
              KEY `wind_turbine_number` (`wind_turbine_number`),
              {add_key}
-        )
+        ) 
+        """
+        # if not use_tidb:
+        create_sql = create_sql + f"""
         PARTITION BY LIST COLUMNS ({key}, `wind_turbine_number`) (
         PARTITION pDefault VALUES IN ((000000, 'wind_turbine_number'))
         ) 
@@ -177,18 +180,37 @@ def add_or_remove_partation(table_name: str, date_str: str, wind_turbine_number)
         add_partation(table_name, date_str, wind_turbine_number)
 
 
-def save_partation_file_to_db(table_name: str, file: str, wind_turbine_number, date_str, batch_count=100000):
+def drop_exists_data(table_name, wind_turbine_number, min_date, max_date):
+    # sql = f"# delete from {table_name} where wind_turbine_number = '{wind_turbine_number}' and time_stamp between '{min_date}' and '{max_date}'"
+
+    sql = f"""
+    BATCH ON `time_stamp`, `wind_turbine_number` LIMIT 1000 
+    DELETE FROM `{table_name}` 
+    WHERE `rated_at` >= "{min_date}" 
+    AND `rated_at` <= "{max_date}"
+    AND `wind_turbine_number` = "{wind_turbine_number}";
+    """
+
+    count = trans.execute(sql)
+    trans_print(f"删除数据{count}条,{table_name},{wind_turbine_number},{min_date},{max_date}")
+
+
+def save_scada_file_to_db(table_name, file: str, wind_turbine_number, date_str, batch_count=100000, use_tidb=False):
     base_name = path.basename(file)
-    # wind_turbine_number = path.basename(file).split(".")[0]
-    # date_str = path.basename(path.dirname(file))
+    df = pd.read_csv(file)
+    # if use_tidb:
+    #     min_date = df['time_stamp'].min()
+    #     max_date = df['time_stamp'].max()
+    #     # drop_exists_data(table_name, wind_turbine_number, min_date, max_date)
+    # else:
+    #     add_or_remove_partation(table_name, date_str, wind_turbine_number)
 
     add_or_remove_partation(table_name, date_str, wind_turbine_number)
 
     try:
-        for i, df in enumerate(pd.read_csv(file, chunksize=batch_count)):
-            trans.execute_df_save(df, table_name)
-            count = (i + 1) * batch_count
-            trans_print(base_name, f"Chunk {count} written to MySQL.")
+        trans_print(f"保存{table_name},{base_name},{wind_turbine_number},数据:{df.shape[0]}")
+        trans.execute_df_save(df, table_name, batch_count)
+        trans_print(f"保存到{table_name},{base_name},{wind_turbine_number} 成功,总条数:{df.shape[0]}")
     except Exception as e:
         trans_print(traceback.format_exc())
         message = base_name + str(e)
@@ -198,11 +220,10 @@ def save_partation_file_to_db(table_name: str, file: str, wind_turbine_number, d
 def save_file_to_db(table_name: str, file: str, batch_count=100000):
     base_name = path.basename(file)
     try:
-        for i, df in enumerate(pd.read_csv(file, chunksize=batch_count)):
-            # df.to_sql(table_name, engine, if_exists='append', index=False)
-            trans.execute_df_save(df, table_name)
-            count = (i + 1) * batch_count
-            trans_print(base_name, f"Chunk {count} written to MySQL.")
+        df = pd.read_csv(file)
+        trans_print(f"保存{table_name},总条数:{df.shape[0]}")
+        trans.execute_df_save(df, table_name, batch_count)
+        trans_print(f"保存到{table_name}成功,总条数:{df.shape[0]}")
     except Exception as e:
         trans_print(traceback.format_exc())
         message = base_name + str(e)
@@ -210,12 +231,10 @@ def save_file_to_db(table_name: str, file: str, batch_count=100000):
 
 
 def save_df_to_db(table_name: str, df: pd.DataFrame(), batch_count=100000):
-    split_dfs = [df.iloc[i:i + batch_count] for i in range(0, len(df), batch_count)]
     try:
-        for i, split_df in enumerate(split_dfs):
-            trans.execute_df_save(split_df, table_name)
-            count = (i + 1) * batch_count
-            trans_print(f"Chunk {count} written to MySQL.")
+        trans_print(f"保存{table_name},总条数:{df.shape[0]}")
+        trans.execute_df_save(df, table_name, batch_count)
+        trans_print(f"保存到{table_name}成功,总条数:{df.shape[0]}")
     except Exception as e:
         trans_print(traceback.format_exc())
         raise Exception(str(e))

+ 2 - 2
utils/db/ConnectMysql.py

@@ -48,8 +48,8 @@ class ConnectMysql:
         dbname = config['database']
         return create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{dbname}')
 
-    def execute_df_save(self, df, table_name):
-        df.to_sql(table_name, self.get_engine(), index=False, if_exists='append')
+    def execute_df_save(self, df, table_name, chunk_size=10000):
+        df.to_sql(table_name, self.get_engine(), index=False, if_exists='append', chunksize=chunk_size)
 
     def read_sql_to_df(self, sql):
         df = pd.read_sql_query(sql, self.get_engine())

+ 80 - 0
utils/db/ConnectMysql_tidb_fix.py

@@ -0,0 +1,80 @@
+import time
+import traceback
+from os import *
+
+import pandas as pd
+import pymysql
+from pymysql.cursors import DictCursor
+from sqlalchemy import create_engine
+
+from utils.conf.read_conf import yaml_conf
+from utils.log.trans_log import trans_print
+
+
+class ConnectMysql:
+
+    def __init__(self, connet_name):
+        self.yaml_data = yaml_conf(environ.get('ETL_CONF'))
+        self.connet_name = connet_name
+        self.config = self.yaml_data[self.connet_name]
+        self.database = self.config['database']
+
+    # 从连接池中获取一个连接
+    def get_conn(self):
+        return pymysql.connect(**self.config, autocommit=True)
+
+    # 使用连接执行sql
+    def execute(self, sql, params=tuple()):
+
+        with self.get_conn() as conn:
+            with conn.cursor(cursor=DictCursor) as cursor:
+                try:
+                    cursor.execute(sql, params)
+                    trans_print("开始执行SQL:", cursor._executed)
+                    conn.commit()
+                    result = cursor.fetchall()
+                    return result
+                except Exception as e:
+                    trans_print(f"执行sql:{sql},报错:{e}")
+                    trans_print(traceback.format_exc())
+                    conn.rollback()
+                    raise e
+
+    def get_engine(self):
+        config = self.config
+        username = config['user']
+        password = config['password']
+        host = config['host']
+        port = config['port']
+        dbname = config['database']
+        return create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{dbname}',
+                             pool_pre_ping=True,
+                             isolation_level="READ COMMITTED",
+                             connect_args={
+                                 'connect_timeout': 30,
+                                 'read_timeout': 120,
+                                 'write_timeout': 7200
+                             })
+
+    def execute_df_save(self, df, table_name, chunksize=10000):
+        engine = self.get_engine()
+        try:
+            retry_count = 0
+            max_retries = 3
+            while retry_count < max_retries:
+                try:
+                    df.to_sql(table_name, engine, if_exists='append', index=False, chunksize=chunksize)
+                except Exception as e:
+                    retry_count += 1
+                    trans_print(f" 第 {retry_count} 次重试, 错误: {str(e)}")
+                    time.sleep(5 * retry_count)  # 指数退避
+                    if retry_count == max_retries:
+                        trans_print(f"处理失败: {str(e)}")
+                        raise
+        except Exception as e:
+            engine.dispose()
+            raise e
+
+    def read_sql_to_df(self, sql):
+        df = pd.read_sql_query(sql, self.get_engine())
+        return df

+ 81 - 81
utils/draw/draw_file.py

@@ -1,81 +1,81 @@
-import matplotlib
-
-from utils.file.trans_methods import create_file_path
-
-matplotlib.use('Agg')
-matplotlib.rcParams['font.family'] = 'SimHei'
-matplotlib.rcParams['font.sans-serif'] = ['SimHei']
-matplotlib.rcParams['axes.unicode_minus'] = False
-from matplotlib import pyplot as plt
-
-
-def scatter(title, x_label, y_label, x_values, y_values, color=None, col_map=dict(), size=10,
-            save_file_path=''):
-    if save_file_path:
-        create_file_path(save_file_path, True)
-    else:
-        save_file_path = title + '.png'
-
-    plt.figure(figsize=(8, 6))
-    plt.title(title, fontsize=16)
-    plt.xlabel(x_label, fontsize=14)
-    plt.ylabel(y_label, fontsize=14)
-    if color is not None:
-        plt.scatter(x_values, y_values, s=size, c=color)
-        if col_map:
-            patches = [plt.Rectangle((0, 0), 1, 1, fc=c) for c in col_map.values()]
-            plt.legend(patches, list(col_map.keys()))
-    else:
-        plt.scatter(x_values, y_values, s=size)
-
-    plt.savefig(save_file_path)
-    plt.close()
-
-
-if __name__ == '__main__':
-    import pandas as pd
-    import numpy as np
-    from matplotlib import pyplot as plt
-
-    df = pd.read_csv(r"/home/wzl/test_data/2024_10_17_14_54_46_200k_Root.csv")
-    df.reset_index(inplace=True, drop=True)
-    df.columns = ['data']
-
-    # Calculate the moving average with a window of 3 (1 before, 1 after)
-    window_size = 20
-    moving_avg = df['data'].rolling(window=window_size).mean()
-    df['moving_avg'] = moving_avg
-    # Calculate the percentage difference
-    percentage_diff = abs((df['data'] - moving_avg) / moving_avg) * 100
-    df['percentage_diff'] = percentage_diff
-    # Flag values that differ by more than threshold
-    threshold = 3
-    df['is_anomaly'] = percentage_diff < threshold
-
-    avg = df['data'].mean()
-    df['avg']=df['data'] > avg
-
-
-    difference_ratio = df.iloc[window_size:]
-    difference_ratio.reset_index(inplace=True)
-    # 创建图形和轴对象
-    plt.figure(figsize=(10, 6))
-    colors = np.where((difference_ratio['is_anomaly'] == True) & (difference_ratio['avg'] == True), 'r', np.where((difference_ratio['is_anomaly'] == False) & (difference_ratio['avg'] == False), 'g', 'b'))
-
-    datas = difference_ratio['data'].values
-    # for i in range(len(datas)):
-    #     plt.plot(i, datas[i], marker='o', color=colors[i])
-
-    plt.figure(figsize=(10, 6))
-    plt.scatter([i for i in range(len(datas))], datas,  c=colors)
-
-    # 添加标题和标签
-    plt.title('Difference Ratio of Each Data Point to Its Previous 10 Data Points Mean')
-    plt.xlabel('Index')
-    plt.ylabel('Difference Ratio')
-
-    # 显示网格
-    plt.grid(True)
-
-    # 显示图形
-    plt.show()
+# import matplotlib
+#
+# from utils.file.trans_methods import create_file_path
+#
+# matplotlib.use('Agg')
+# matplotlib.rcParams['font.family'] = 'SimHei'
+# matplotlib.rcParams['font.sans-serif'] = ['SimHei']
+# matplotlib.rcParams['axes.unicode_minus'] = False
+# from matplotlib import pyplot as plt
+#
+#
+# def scatter(title, x_label, y_label, x_values, y_values, color=None, col_map=dict(), size=10,
+#             save_file_path=''):
+#     if save_file_path:
+#         create_file_path(save_file_path, True)
+#     else:
+#         save_file_path = title + '.png'
+#
+#     plt.figure(figsize=(8, 6))
+#     plt.title(title, fontsize=16)
+#     plt.xlabel(x_label, fontsize=14)
+#     plt.ylabel(y_label, fontsize=14)
+#     if color is not None:
+#         plt.scatter(x_values, y_values, s=size, c=color)
+#         if col_map:
+#             patches = [plt.Rectangle((0, 0), 1, 1, fc=c) for c in col_map.values()]
+#             plt.legend(patches, list(col_map.keys()))
+#     else:
+#         plt.scatter(x_values, y_values, s=size)
+#
+#     plt.savefig(save_file_path)
+#     plt.close()
+#
+#
+# if __name__ == '__main__':
+#     import pandas as pd
+#     import numpy as np
+#     from matplotlib import pyplot as plt
+#
+#     df = pd.read_csv(r"/home/wzl/test_data/2024_10_17_14_54_46_200k_Root.csv")
+#     df.reset_index(inplace=True, drop=True)
+#     df.columns = ['data']
+#
+#     # Calculate the moving average with a window of 3 (1 before, 1 after)
+#     window_size = 20
+#     moving_avg = df['data'].rolling(window=window_size).mean()
+#     df['moving_avg'] = moving_avg
+#     # Calculate the percentage difference
+#     percentage_diff = abs((df['data'] - moving_avg) / moving_avg) * 100
+#     df['percentage_diff'] = percentage_diff
+#     # Flag values that differ by more than threshold
+#     threshold = 3
+#     df['is_anomaly'] = percentage_diff < threshold
+#
+#     avg = df['data'].mean()
+#     df['avg']=df['data'] > avg
+#
+#
+#     difference_ratio = df.iloc[window_size:]
+#     difference_ratio.reset_index(inplace=True)
+#     # 创建图形和轴对象
+#     plt.figure(figsize=(10, 6))
+#     colors = np.where((difference_ratio['is_anomaly'] == True) & (difference_ratio['avg'] == True), 'r', np.where((difference_ratio['is_anomaly'] == False) & (difference_ratio['avg'] == False), 'g', 'b'))
+#
+#     datas = difference_ratio['data'].values
+#     # for i in range(len(datas)):
+#     #     plt.plot(i, datas[i], marker='o', color=colors[i])
+#
+#     plt.figure(figsize=(10, 6))
+#     plt.scatter([i for i in range(len(datas))], datas,  c=colors)
+#
+#     # 添加标题和标签
+#     plt.title('Difference Ratio of Each Data Point to Its Previous 10 Data Points Mean')
+#     plt.xlabel('Index')
+#     plt.ylabel('Difference Ratio')
+#
+#     # 显示网格
+#     plt.grid(True)
+#
+#     # 显示图形
+#     plt.show()