4 mesi fa · 038509a11a
--- a/app_run.py
+++ b/app_run.py
@@ -1,8 +1,8 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # @Time    : 2024/6/11
			
 
				 # @Author  : 魏志亮
			
 
				+import os
			
 
				 import sys
			
 
				-from os import *
			
 
				 
			
 
				 from utils.conf.read_conf import yaml_conf, read_conf
			
 
				 
			
@@ -11,7 +11,7 @@ def get_exec_data(run_count=1):
 
				     now_run_count = get_now_running_count()
			
 
				     data = None
			
 
				     if now_run_count >= run_count:
			
 
				-        trans_print(f"当前有{now_run_count}个任务在执行")
			
 
				+        info(f"当前有{now_run_count}个任务在执行")
			
 
				     else:
			
 
				         data = get_batch_exec_data()
			
 
				     return data
			
@@ -22,7 +22,7 @@ def run(save_db=True, run_count=1, yaml_config=None, step=0, end=999):
 
				     data = get_exec_data(run_count)
			
 
				 
			
 
				     if data is None:
			
 
				-        trans_print("没有需要执行的任务")
			
 
				+        info("没有需要执行的任务")
			
 
				         return
			
 
				 
			
 
				     exec_process = None
			
@@ -55,14 +55,14 @@ if __name__ == "__main__":
 
				     if env.endswith(".yaml"):
			
 
				         conf_path = env
			
 
				     else:
			
 
				-        conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
			
 
				+        conf_path = os.path.abspath(f"./conf/etl_config_{env}.yaml")
			
 
				 
			
 
				-    environ["ETL_CONF"] = conf_path
			
 
				+    os.environ["ETL_CONF"] = conf_path
			
 
				     yaml_config = yaml_conf(conf_path)
			
 
				-    environ["env"] = env
			
 
				+    os.environ["env"] = env
			
 
				     run_count = int(read_conf(yaml_config, "run_batch_count", 1))
			
 
				 
			
 
				-    from utils.log.trans_log import trans_print
			
 
				+    from utils.log.trans_log import info
			
 
				     from service.trans_conf_service import (
			
 
				         update_timeout_trans_data,
			
 
				         get_now_running_count,
			
@@ -73,7 +73,7 @@ if __name__ == "__main__":
 
				     from etl.wind_power.laser.LaserTrans import LaserTrans
			
 
				     from etl.wind_power.wave.WaveTrans import WaveTrans
			
 
				 
			
 
				-    trans_print("所有请求参数:", sys.argv, "env:", env, "最大可执行个数:", run_count)
			
 
				-    trans_print("配置文件路径:", environ.get("ETL_CONF"))
			
 
				+    info("所有请求参数:", sys.argv, "env:", env, "最大可执行个数:", run_count)
			
 
				+    info("配置文件路径:", os.environ.get("ETL_CONF"))
			
 
				 
			
 
				     run(run_count=run_count, yaml_config=yaml_config, step=0)
			
--- a/conf/constants.py
+++ b/conf/constants.py
@@ -0,0 +1,97 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Time    : 2026/3/12
			
 
				+# @Author  : 系统自动生成
			
 
				+# 项目常量定义
			
 
				+
			
 
				+
			
 
				+# 文件类型常量
			
 
				+class FileTypes:
			
 
				+    """文件类型常量"""
			
 
				+    # Excel相关文件类型
			
 
				+    EXCEL_TYPES = ['xls', 'xlsx', 'xlsm', 'xlsb', 'odf', 'ods', 'csv', 'csv.gz']
			
 
				+    # 压缩文件类型
			
 
				+    ZIP_TYPES = ['rar', 'zip']
			
 
				+
			
 
				+
			
 
				+# 数据处理常量
			
 
				+class DataProcessing:
			
 
				+    """数据处理常量"""
			
 
				+    # 时间戳列名
			
 
				+    TIME_STAMP_COLUMN = 'time_stamp'
			
 
				+    # NaN替换值
			
 
				+    NAN_REPLACE_VALUE = -999999999
			
 
				+    # 有功功率单位判断阈值
			
 
				+    POWER_UNIT_THRESHOLD = 100000
			
 
				+    # 时间间隔
			
 
				+    TIME_INTERVAL = '10T'
			
 
				+    # 非数值列
			
 
				+    NOT_DOUBLE_COLS = ['wind_turbine_number', 'wind_turbine_name', 'time_stamp', 
			
 
				+                       'param6', 'param7', 'param8', 'param9', 'param10']
			
 
				+
			
 
				+
			
 
				+# 并行处理常量
			
 
				+class ParallelProcessing:
			
 
				+    """并行处理常量"""
			
 
				+    # 最大进程数
			
 
				+    MAX_PROCESSES = 8
			
 
				+    # 最大批次数
			
 
				+    MAX_BATCHES = 10
			
 
				+    # CPU使用百分比
			
 
				+    CPU_USAGE_PERCENT = 2 / 3
			
 
				+
			
 
				+
			
 
				+# 数据库常量
			
 
				+class Database:
			
 
				+    """数据库常量"""
			
 
				+    # 表引擎
			
 
				+    TABLE_ENGINE = 'InnoDB'
			
 
				+    # 默认字符集
			
 
				+    DEFAULT_CHARSET = 'utf8mb4'
			
 
				+    # 批处理大小
			
 
				+    BATCH_SIZE = 100000
			
 
				+
			
 
				+
			
 
				+# 日志常量
			
 
				+class Log:
			
 
				+    """日志常量"""
			
 
				+    # 默认日志路径
			
 
				+    DEFAULT_LOG_PATH = "/data/logs"
			
 
				+    # 日志文件名前缀
			
 
				+    LOG_FILE_PREFIX = "etl_tools_"
			
 
				+
			
 
				+
			
 
				+# 路径常量
			
 
				+class Paths:
			
 
				+    """路径常量"""
			
 
				+    # 临时文件基础路径
			
 
				+    DEFAULT_TMP_BASE_PATH = "/tmp"
			
 
				+    # 归档路径
			
 
				+    DEFAULT_ARCHIVE_PATH = "/tmp/archive"
			
 
				+
			
 
				+
			
 
				+# 状态常量
			
 
				+class Status:
			
 
				+    """状态常量"""
			
 
				+    # 成功状态
			
 
				+    SUCCESS = 1
			
 
				+    # 错误状态
			
 
				+    ERROR = 0
			
 
				+    # 运行状态
			
 
				+    RUNNING = 2
			
 
				+
			
 
				+
			
 
				+# 类型常量
			
 
				+class Types:
			
 
				+    """类型常量"""
			
 
				+    # 秒级数据
			
 
				+    SECOND = 'second'
			
 
				+    # 分钟级数据
			
 
				+    MINUTE = 'minute'
			
 
				+    # 故障数据
			
 
				+    FAULT = 'fault'
			
 
				+    # 告警数据
			
 
				+    WARN = 'warn'
			
 
				+    # 波形数据
			
 
				+    WAVE = 'wave'
			
 
				+    # 激光数据
			
 
				+    LASER = 'laser'
			
--- a/conf/etl_config_dev.yaml
+++ b/conf/etl_config_dev.yaml
@@ -1,24 +1,24 @@
 
				 plt:
			
 
				-  database: energy_ty
			
 
				+  database: energy
			
 
				   host: 192.168.50.233
			
 
				   password: admin123456
			
 
				   port: 3306
			
 
				   user: admin
			
 
				 
			
 
				-# trans:
			
 
				-#   database: energy_data
			
 
				-#   host: 192.168.50.235
			
 
				-#   password: admin123456
			
 
				-#   port: 30306
			
 
				-#   user: root
			
 
				-
			
 
				 trans:
			
 
				   database: energy_data
			
 
				-  host: 106.120.102.238
			
 
				+  host: 192.168.50.235
			
 
				   password: admin123456
			
 
				-  port: 10336
			
 
				+  port: 30306
			
 
				   user: root
			
 
				 
			
 
				+#trans:
			
 
				+#  database: energy_data
			
 
				+#  host: 106.120.102.238
			
 
				+#  password: admin123456
			
 
				+#  port: 10336
			
 
				+#  user: root
			
 
				+
			
 
				 # 如果要放在原始路径,则配置这个 以下面的名称作为切割点,新建清理数据文件夹
			
 
				 etl_origin_path_contain: 收资数据
			
 
				 # 如果单独保存,配置这个路径
			
--- a/etl/common/ArchiveFile.py
+++ b/etl/common/ArchiveFile.py
@@ -3,7 +3,7 @@ import shutil
 
				 
			
 
				 from etl.common.PathsAndTable import PathsAndTable
			
 
				 from service.trans_conf_service import update_archive_success
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import info
			
 
				 
			
 
				 
			
 
				 class ArchiveFile(object):
			
@@ -19,6 +19,6 @@ class ArchiveFile(object):
 
				         if os.path.exists(self.pathsAndTable.get_tmp_formal_path()):
			
 
				             shutil.make_archive(self.pathsAndTable.get_archive_path(), 'zip', self.pathsAndTable.get_tmp_formal_path())
			
 
				             update_archive_success(self.exec_id, f"{self.pathsAndTable.get_archive_path()}.zip")
			
 
				-            trans_print(f"文件夹已归档为 {self.pathsAndTable.get_archive_path()}.zip")
			
 
				+            info(f"文件夹已归档为 {self.pathsAndTable.get_archive_path()}.zip")
			
 
				         else:
			
 
				-            trans_print(f"文件夹 {self.pathsAndTable.get_tmp_formal_path()} 不存在")
			
 
				+            info(f"文件夹 {self.pathsAndTable.get_tmp_formal_path()} 不存在")
			
--- a/etl/common/BaseDataTrans.py
+++ b/etl/common/BaseDataTrans.py
@@ -10,12 +10,24 @@ from service.plt_service import get_all_wind
 
				 from service.trans_conf_service import update_trans_status_success, update_trans_status_error, \
			
 
				     update_trans_status_running
			
 
				 from utils.file.trans_methods import read_excel_files
			
 
				-from utils.log.trans_log import trans_print, set_trance_id
			
 
				+from utils.log.trans_log import set_trance_id, info, error
			
 
				 
			
 
				 
			
 
				 class BaseDataTrans(object):
			
 
				-    def __init__(self, data: dict = None, save_db=True, yaml_config=None, step=0, end=999):
			
 
				-
			
 
				+    """数据转换基类"""
			
 
				+
			
 
				+    def __init__(self, data: dict = None, save_db: bool = True, yaml_config: dict = None, step: int = 0,
			
 
				+                 end: int = 999):
			
 
				+        """
			
 
				+        初始化数据转换基类
			
 
				+        
			
 
				+        Args:
			
 
				+            data: 任务数据字典
			
 
				+            save_db: 是否保存到数据库
			
 
				+            yaml_config: YAML配置
			
 
				+            step: 开始步骤
			
 
				+            end: 结束步骤
			
 
				+        """
			
 
				         self.id = data['id']
			
 
				         self.task_name = data['task_name']
			
 
				         self.transfer_type = data['transfer_type']
			
@@ -37,7 +49,7 @@ class BaseDataTrans(object):
 
				                                                self.wind_farm_name, self.transfer_type, save_db, self.save_zip,
			
 
				                                                self.yaml_config, self.wind_col_trans)
			
 
				         except Exception as e:
			
 
				-            trans_print(traceback.format_exc())
			
 
				+            error(traceback.format_exc())
			
 
				             update_trans_status_error(self.id, str(e), self.save_db)
			
 
				             raise e
			
 
				 
			
@@ -94,70 +106,70 @@ class BaseDataTrans(object):
 
				             # 0
			
 
				             if self.step <= now_index <= self.end:
			
 
				                 begin = datetime.datetime.now()
			
 
				-                trans_print("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
			
 
				+                info("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
			
 
				                 self.clean_file_and_db()
			
 
				-                trans_print("清理数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				-                            datetime.datetime.now() - total_begin)
			
 
				+                info("清理数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				+                     datetime.datetime.now() - total_begin)
			
 
				 
			
 
				             now_index = now_index + 1
			
 
				             # 1
			
 
				             if self.step <= now_index <= self.end:
			
 
				                 begin = datetime.datetime.now()
			
 
				-                trans_print("开始解压移动文件")
			
 
				+                info("开始解压移动文件")
			
 
				                 self.unzip_or_remove_to_tmp_dir()
			
 
				-                trans_print("解压移动文件结束:耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				-                            datetime.datetime.now() - total_begin)
			
 
				+                info("解压移动文件结束:耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				+                     datetime.datetime.now() - total_begin)
			
 
				 
			
 
				             now_index = now_index + 1
			
 
				             # 2
			
 
				             if self.step <= now_index <= self.end:
			
 
				                 begin = datetime.datetime.now()
			
 
				-                trans_print("开始保存数据到临时文件")
			
 
				+                info("开始保存数据到临时文件")
			
 
				                 self.read_and_save_tmp_file()
			
 
				-                trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				-                            datetime.datetime.now() - total_begin)
			
 
				+                info("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				+                     datetime.datetime.now() - total_begin)
			
 
				 
			
 
				             now_index = now_index + 1
			
 
				             # 3
			
 
				             if self.step <= now_index <= self.end:
			
 
				                 begin = datetime.datetime.now()
			
 
				-                trans_print("开始保存到临时正式文件")
			
 
				+                info("开始保存到临时正式文件")
			
 
				                 self.statistics_and_save_tmp_formal_file()
			
 
				-                trans_print("保存到临时正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				-                            datetime.datetime.now() - total_begin)
			
 
				+                info("保存到临时正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				+                     datetime.datetime.now() - total_begin)
			
 
				 
			
 
				             now_index = now_index + 1
			
 
				             # 4
			
 
				             if self.step <= now_index <= self.end:
			
 
				                 begin = datetime.datetime.now()
			
 
				-                trans_print("开始保存归档文件")
			
 
				+                info("开始保存归档文件")
			
 
				                 self.archive_file()
			
 
				-                trans_print("保存到保存归档文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				-                            datetime.datetime.now() - total_begin)
			
 
				+                info("保存到保存归档文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				+                     datetime.datetime.now() - total_begin)
			
 
				 
			
 
				             now_index = now_index + 1
			
 
				             # 5
			
 
				             if self.step <= now_index <= self.end:
			
 
				                 begin = datetime.datetime.now()
			
 
				-                trans_print("开始保存数据到正式文件")
			
 
				+                info("开始保存数据到正式文件")
			
 
				                 self.combine_and_save_formal_file()
			
 
				-                trans_print("保存数据到正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				-                            datetime.datetime.now() - total_begin)
			
 
				+                info("保存数据到正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				+                     datetime.datetime.now() - total_begin)
			
 
				 
			
 
				             now_index = now_index + 1
			
 
				             # 6
			
 
				             if self.step <= now_index <= self.end:
			
 
				                 begin = datetime.datetime.now()
			
 
				-                trans_print("开始保存到数据库,是否存库:", self.pathsAndTable.save_db)
			
 
				+                info("开始保存到数据库,是否存库:", self.pathsAndTable.save_db)
			
 
				                 self.save_to_db()
			
 
				-                trans_print("保存到数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				-                            datetime.datetime.now() - total_begin)
			
 
				+                info("保存到数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
			
 
				+                     datetime.datetime.now() - total_begin)
			
 
				 
			
 
				             self.update_exec_progress()
			
 
				         except Exception as e:
			
 
				-            trans_print(traceback.format_exc())
			
 
				+            error(traceback.format_exc())
			
 
				             update_trans_status_error(self.id, str(e), self.save_db)
			
 
				             raise e
			
 
				         finally:
			
 
				             self.pathsAndTable.delete_tmp_files()
			
 
				-            trans_print("执行结束,总耗时:", str(datetime.datetime.now() - total_begin))
			
 
				+            info("执行结束,总耗时:", str(datetime.datetime.now() - total_begin))
			
--- a/etl/common/ClearData.py
+++ b/etl/common/ClearData.py
@@ -2,7 +2,7 @@ import datetime
 
				 
			
 
				 from etl.common.PathsAndTable import PathsAndTable
			
 
				 from service.trans_conf_service import update_trans_transfer_progress
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import info
			
 
				 
			
 
				 
			
 
				 class ClearData(object):
			
@@ -19,8 +19,8 @@ class ClearData(object):
 
				         # self.pathsAndTable.delete_batch_files()
			
 
				 
			
 
				     def run(self):
			
 
				-        trans_print("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
			
 
				+        info("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
			
 
				         begin = datetime.datetime.now()
			
 
				         self.clean_data()
			
 
				         update_trans_transfer_progress(self.pathsAndTable.id, 5, self.pathsAndTable.save_db)
			
 
				-        trans_print("清理数据结束,耗时:", datetime.datetime.now() - begin)
			
 
				+        info("清理数据结束,耗时:", datetime.datetime.now() - begin)
			
--- a/etl/common/CombineAndSaveFormalFile.py
+++ b/etl/common/CombineAndSaveFormalFile.py
@@ -1,61 +1,134 @@
 
				 import multiprocessing
			
 
				 import os
			
 
				+from typing import Dict, List, Tuple, Optional
			
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				+from conf.constants import DataProcessing, ParallelProcessing
			
 
				 from etl.common.PathsAndTable import PathsAndTable
			
 
				 from utils.file.trans_methods import read_excel_files, read_file_to_df, copy_to_new
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import info, debug
			
 
				 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
			
 
				 
			
 
				 
			
 
				-class CombineAndSaveFormalFile(object):
			
 
				+class CombineAndSaveFormalFile:
			
 
				+    """合并并保存正式文件"""
			
 
				 
			
 
				-    def __init__(self, pathsAndTable: PathsAndTable):
			
 
				-        self.pathsAndTable = pathsAndTable
			
 
				-        self.update_files = multiprocessing.Manager().list()
			
 
				+    # 常量定义
			
 
				+    TIME_STAMP_COLUMN = DataProcessing.TIME_STAMP_COLUMN
			
 
				 
			
 
				-    def combine_and_save(self, file_path, key, exists_file_path):
			
 
				-        exists_same = False
			
 
				-        if exists_file_path:
			
 
				-            exists_same = True
			
 
				+    def __init__(self, paths_and_table: PathsAndTable):
			
 
				+        """
			
 
				+        初始化合并器
			
 
				+
			
 
				+        Args:
			
 
				+            paths_and_table: 路径和表信息对象
			
 
				+        """
			
 
				+        self.paths_and_table = paths_and_table
			
 
				+        self.updated_files = multiprocessing.Manager().list()
			
 
				+
			
 
				+    def _merge_dataframes(self, exists_df: pd.DataFrame, now_df: pd.DataFrame) -> pd.DataFrame:
			
 
				+        """
			
 
				+        合并两个数据框并去重排序
			
 
				+
			
 
				+        Args:
			
 
				+            exists_df: 已存在的数据框
			
 
				+            now_df: 当前的数据框
			
 
				+
			
 
				+        Returns:
			
 
				+            合并后的数据框
			
 
				+        """
			
 
				+        combined_df = pd.concat([exists_df, now_df])
			
 
				+        # 去重，保留最新的数据
			
 
				+        combined_df = combined_df.drop_duplicates(
			
 
				+            subset=self.TIME_STAMP_COLUMN,
			
 
				+            keep='last'
			
 
				+        )
			
 
				+        # 按时间戳排序
			
 
				+        return combined_df.sort_values(
			
 
				+            by=self.TIME_STAMP_COLUMN
			
 
				+        ).reset_index(drop=True)
			
 
				+
			
 
				+    def _save_combined_file(self, file_path: str, key: Tuple[str, str], exists_file_path: Optional[str]) -> None:
			
 
				+        """
			
 
				+        保存合并后的文件
			
 
				+
			
 
				+        Args:
			
 
				+            file_path: 新文件路径
			
 
				+            key: 文件键值 (目录名, 文件名)
			
 
				+            exists_file_path: 已存在的文件路径，如果为None则表示不存在
			
 
				+        """
			
 
				+        has_exists = exists_file_path is not None
			
 
				+
			
 
				+        if has_exists:
			
 
				+            # 合并并保存
			
 
				             exists_df = read_file_to_df(exists_file_path)
			
 
				             now_df = read_file_to_df(file_path)
			
 
				-            # 合并两个 DataFrame
			
 
				-            combined_df = pd.concat([exists_df, now_df])
			
 
				-            # 去重，保留 now_df 的值
			
 
				-            combined_df = combined_df.drop_duplicates(subset='time_stamp', keep='last')
			
 
				-            # 按 time_stamp 排序
			
 
				-            combined_df = combined_df.sort_values(by='time_stamp').reset_index(drop=True)
			
 
				+            combined_df = self._merge_dataframes(exists_df, now_df)
			
 
				             combined_df.to_csv(exists_file_path, encoding='utf-8', index=False)
			
 
				-            self.update_files.append(exists_file_path)
			
 
				+            self.updated_files.append(exists_file_path)
			
 
				         else:
			
 
				-            save_path = str(os.path.join(self.pathsAndTable.get_save_path(), key[0], key[1]))
			
 
				-            copy_to_new(file_path, save_path)
			
 
				-            self.update_files.append(save_path)
			
 
				-        trans_print(f"{key[0]}/{key[1]} {'包含' if exists_same else '不包含'} 相同文件,保存成功")
			
 
				-
			
 
				-    def combine_and_save_formal_file(self):
			
 
				-        exists_files = read_excel_files(self.pathsAndTable.get_save_path())
			
 
				-        exists_file_maps = dict()
			
 
				-        for file_path in exists_files:
			
 
				-            name = (os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path))
			
 
				-            exists_file_maps[name] = file_path
			
 
				-
			
 
				-        new_files = read_excel_files(self.pathsAndTable.get_tmp_formal_path())
			
 
				-        new_file_maps = dict()
			
 
				-        for file_path in new_files:
			
 
				-            name = (os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path))
			
 
				-            new_file_maps[name] = file_path
			
 
				-
			
 
				-        same_keys = list(set(exists_file_maps.keys()).intersection(new_file_maps.keys()))
			
 
				-        split_count = get_available_cpu_count_with_percent(2 / 3)
			
 
				-        with multiprocessing.Pool(split_count) as pool:
			
 
				-            pool.starmap(self.combine_and_save,
			
 
				-                         [(file_path, key, exists_file_maps[key] if key in same_keys else None) for key, file_path in
			
 
				-                          new_file_maps.items()])
			
 
				-
			
 
				-    def run(self):
			
 
				+            # 复制新文件
			
 
				+            save_dir = str(os.path.join(
			
 
				+                self.paths_and_table.get_save_path(),
			
 
				+                key[0],
			
 
				+                key[1]
			
 
				+            ))
			
 
				+            copy_to_new(file_path, save_dir)
			
 
				+            self.updated_files.append(save_dir)
			
 
				+
			
 
				+        # 记录日志
			
 
				+        status = "包含" if has_exists else "不包含"
			
 
				+        debug(f"{key[0]}/{key[1]} {status} 相同文件,保存成功")
			
 
				+
			
 
				+    def _build_file_maps(self, base_path: str) -> Dict[Tuple[str, str], str]:
			
 
				+        """
			
 
				+        构建文件映射字典
			
 
				+
			
 
				+        Args:
			
 
				+            base_path: 基础路径
			
 
				+
			
 
				+        Returns:
			
 
				+            文件路径映射字典，键为(目录名, 文件名)，值为完整路径
			
 
				+        """
			
 
				+        files = read_excel_files(base_path)
			
 
				+        return {
			
 
				+            (os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path)): file_path
			
 
				+            for file_path in files
			
 
				+        }
			
 
				+
			
 
				+    def combine_and_save_formal_file(self) -> None:
			
 
				+        """合并并保存正式文件的主方法"""
			
 
				+        # 构建已存在文件和新文件的映射
			
 
				+        exists_file_maps = self._build_file_maps(self.paths_and_table.get_save_path())
			
 
				+        new_file_maps = self._build_file_maps(self.paths_and_table.get_tmp_formal_path())
			
 
				+
			
 
				+        # 找出相同键的文件
			
 
				+        same_keys = set(exists_file_maps.keys()) & set(new_file_maps.keys())
			
 
				+
			
 
				+        # 准备并行处理参数
			
 
				+        process_args = [
			
 
				+            (
			
 
				+                file_path,
			
 
				+                key,
			
 
				+                exists_file_maps.get(key) if key in same_keys else None
			
 
				+            )
			
 
				+            for key, file_path in new_file_maps.items()
			
 
				+        ]
			
 
				+
			
 
				+        # 使用并行处理
			
 
				+        cpu_count = get_available_cpu_count_with_percent(ParallelProcessing.CPU_USAGE_PERCENT)
			
 
				+        cpu_count = min(cpu_count, ParallelProcessing.MAX_PROCESSES)
			
 
				+        with multiprocessing.Pool(cpu_count) as pool:
			
 
				+            pool.starmap(self._save_combined_file, process_args)
			
 
				+
			
 
				+    def run(self) -> List[str]:
			
 
				+        """
			
 
				+        执行合并操作
			
 
				+
			
 
				+        Returns:
			
 
				+            更新后的文件路径列表
			
 
				+        """
			
 
				         self.combine_and_save_formal_file()
			
 
				-        print(self.update_files)
			
 
				-        return list(self.update_files)
			
 
				+        info(f"共处理了 {len(self.updated_files)} 个文件")
			
 
				+        return list(self.updated_files)
			
--- a/etl/common/PathsAndTable.py
+++ b/etl/common/PathsAndTable.py
@@ -1,14 +1,33 @@
 
				 import shutil
			
 
				 from os import path, sep
			
 
				 
			
 
				+from conf.constants import Paths
			
 
				 from service.trans_service import creat_min_sec_table, create_warn_fault_table
			
 
				 from utils.conf.read_conf import *
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import info
			
 
				 
			
 
				 
			
 
				 class PathsAndTable(object):
			
 
				-    def __init__(self, id=None, task_name=None, read_dir=None, wind_farm_code=None, wind_farm_name=None,
			
 
				-                 read_type=None, save_db=True, save_zip=True, yaml_config=None, wind_col_trans=None):
			
 
				+    """路径和表管理类"""
			
 
				+
			
 
				+    def __init__(self, id: int = None, task_name: str = None, read_dir: str = None, wind_farm_code: str = None,
			
 
				+                 wind_farm_name: str = None, read_type: str = None, save_db: bool = True,
			
 
				+                 save_zip: bool = True, yaml_config: dict = None, wind_col_trans: dict = None):
			
 
				+        """
			
 
				+        初始化路径和表管理类
			
 
				+        
			
 
				+        Args:
			
 
				+            id: 任务ID
			
 
				+            task_name: 任务名称
			
 
				+            read_dir: 读取目录
			
 
				+            wind_farm_code: 风电场编码
			
 
				+            wind_farm_name: 风电场名称
			
 
				+            read_type: 读取类型
			
 
				+            save_db: 是否保存到数据库
			
 
				+            save_zip: 是否保存为压缩文件
			
 
				+            yaml_config: YAML配置
			
 
				+            wind_col_trans: 风机列转换映射
			
 
				+        """
			
 
				         self.id = id
			
 
				         self.task_name = task_name
			
 
				         self.read_dir = read_dir
			
@@ -25,11 +44,11 @@ class PathsAndTable(object):
 
				 
			
 
				         self.use_tidb = read_conf(yaml_config, 'use_tidb', False)
			
 
				 
			
 
				-        self.tmp_base_path = read_conf(yaml_config, "tmp_base_path", "/tmp")
			
 
				+        self.tmp_base_path = read_conf(yaml_config, "tmp_base_path", Paths.DEFAULT_TMP_BASE_PATH)
			
 
				         if save_path_conf:
			
 
				             self.save_path = save_path_conf + sep + self.wind_farm_name
			
 
				         else:
			
 
				-            find_index = read_dir.find(read_conf(yaml_config, 'etl_origin_path_contain', "etl_origin_path_contain"))
			
 
				+            find_index = read_dir.find(read_conf(yaml_config, 'etl_origin_path_contain', "收资数据"))
			
 
				             if find_index == -1:
			
 
				                 raise Exception("路径未包含原始数据特定字符:" + read_dir)
			
 
				             self.save_path = read_dir[0:find_index] + sep + "清理数据"
			
@@ -37,48 +56,105 @@ class PathsAndTable(object):
 
				         if self.save_path is None:
			
 
				             raise Exception("未配置保存路径:" + read_dir)
			
 
				 
			
 
				-        self.archive_path = read_conf(yaml_config, "archive_path", "/tmp/archive")
			
 
				+        self.archive_path = read_conf(yaml_config, "archive_path", Paths.DEFAULT_ARCHIVE_PATH)
			
 
				 
			
 
				-    def get_save_path(self):
			
 
				+    def get_save_path(self) -> str:
			
 
				+        """
			
 
				+        获取保存路径
			
 
				+        
			
 
				+        Returns:
			
 
				+            保存路径
			
 
				+        """
			
 
				         return path.join(self.save_path, self.read_type)
			
 
				 
			
 
				-    def get_tmp_path(self):
			
 
				+    def get_tmp_path(self) -> str:
			
 
				+        """
			
 
				+        获取临时路径
			
 
				+        
			
 
				+        Returns:
			
 
				+            临时路径
			
 
				+        """
			
 
				         return str(path.join(self.tmp_base_path, str(self.id) + "_" + self.task_name + "_" + self.read_type))
			
 
				 
			
 
				-    def get_excel_tmp_path(self):
			
 
				+    def get_excel_tmp_path(self) -> str:
			
 
				+        """
			
 
				+        获取Excel临时路径
			
 
				+        
			
 
				+        Returns:
			
 
				+            Excel临时路径
			
 
				+        """
			
 
				         return path.join(self.get_tmp_path(), 'excel_tmp' + sep)
			
 
				 
			
 
				-    def get_read_tmp_path(self):
			
 
				+    def get_read_tmp_path(self) -> str:
			
 
				+        """
			
 
				+        获取读取临时路径
			
 
				+        
			
 
				+        Returns:
			
 
				+            读取临时路径
			
 
				+        """
			
 
				         return path.join(self.get_tmp_path(), 'read_tmp')
			
 
				 
			
 
				-    def get_merge_tmp_path(self, wind_turbine_number=None):
			
 
				+    def get_merge_tmp_path(self, wind_turbine_number=None) -> str:
			
 
				+        """
			
 
				+        获取合并临时路径
			
 
				+        
			
 
				+        Args:
			
 
				+            wind_turbine_number: 风机编号
			
 
				+            
			
 
				+        Returns:
			
 
				+            合并临时路径
			
 
				+        """
			
 
				         if wind_turbine_number is None:
			
 
				             return path.join(self.get_tmp_path(), 'merge_tmp')
			
 
				         else:
			
 
				             return path.join(self.get_tmp_path(), 'merge_tmp', str(wind_turbine_number))
			
 
				 
			
 
				-    def get_tmp_formal_path(self):
			
 
				+    def get_tmp_formal_path(self) -> str:
			
 
				+        """
			
 
				+        获取正式临时路径
			
 
				+        
			
 
				+        Returns:
			
 
				+            正式临时路径
			
 
				+        """
			
 
				         return path.join(self.get_tmp_path(), 'formal_tmp')
			
 
				 
			
 
				-    def get_archive_path(self):
			
 
				+    def get_archive_path(self) -> str:
			
 
				+        """
			
 
				+        获取归档路径
			
 
				+        
			
 
				+        Returns:
			
 
				+            归档路径
			
 
				+        """
			
 
				         return path.join(self.archive_path, self.wind_farm_name, self.read_type, f'{self.id}_{self.task_name}')
			
 
				 
			
 
				-    def get_table_name(self):
			
 
				+    def get_table_name(self) -> str:
			
 
				+        """
			
 
				+        获取表名
			
 
				+        
			
 
				+        Returns:
			
 
				+            表名
			
 
				+        """
			
 
				         return "_".join([self.wind_farm_code, self.read_type])
			
 
				 
			
 
				-    def delete_tmp_files(self):
			
 
				-        trans_print("开始删除临时文件夹")
			
 
				+    def delete_tmp_files(self) -> None:
			
 
				+        """
			
 
				+        删除临时文件
			
 
				+        """
			
 
				+        info("开始删除临时文件夹")
			
 
				         if path.exists(self.get_tmp_path()):
			
 
				             shutil.rmtree(self.get_tmp_path())
			
 
				-        trans_print("删除临时文件夹删除成功")
			
 
				+        info("删除临时文件夹删除成功")
			
 
				 
			
 
				-    def create_wind_farm_db(self):
			
 
				+    def create_wind_farm_db(self) -> None:
			
 
				+        """
			
 
				+        创建风电场数据库表
			
 
				+        """
			
 
				         if self.save_db:
			
 
				-            trans_print("开始创建表")
			
 
				+            info("开始创建表")
			
 
				             if self.read_type in ['second', 'minute']:
			
 
				                 creat_min_sec_table(self.get_table_name(), self.read_type, self.wind_farm_name, self.use_tidb)
			
 
				             elif self.read_type in ['fault', 'warn']:
			
 
				                 create_warn_fault_table(self.get_table_name(), self.wind_farm_name, )
			
 
				             else:
			
 
				                 raise Exception("不支持的读取类型:" + self.read_type)
			
 
				-            trans_print("建表结束")
			
 
				+            info("建表结束")
			
--- a/etl/common/SaveToDb.py
+++ b/etl/common/SaveToDb.py
@@ -5,8 +5,7 @@ import traceback
 
				 from etl.common.PathsAndTable import PathsAndTable
			
 
				 from service.trans_conf_service import update_trans_transfer_progress
			
 
				 from service.trans_service import save_scada_file_to_db, save_file_to_db
			
 
				-from utils.file.trans_methods import split_array
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import info, error
			
 
				 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
			
 
				 
			
 
				 
			
@@ -25,34 +24,60 @@ class SaveToDb(object):
 
				         all_saved_files = [i for i in all_saved_files if
			
 
				                            os.path.basename(i).split(".")[0] in self.pathsAndTable.wind_col_trans.keys()]
			
 
				 
			
 
				+        if not all_saved_files:
			
 
				+            info("没有文件需要保存到数据库")
			
 
				+            return
			
 
				+
			
 
				         self.pathsAndTable.create_wind_farm_db()
			
 
				 
			
 
				-        split_count = get_available_cpu_count_with_percent(percent=2 / 3)
			
 
				-        split_count = split_count if split_count <= len(all_saved_files) else len(all_saved_files)
			
 
				-        all_arrays = split_array(all_saved_files, split_count)
			
 
				+        # 计算最佳进程数
			
 
				+        max_processes = get_available_cpu_count_with_percent(percent=2 / 3)
			
 
				+        max_processes = min(max_processes, len(all_saved_files), 10)  # 限制最大进程数为10
			
 
				+
			
 
				         try:
			
 
				-            for index, arr in enumerate(all_arrays):
			
 
				-                with multiprocessing.Pool(10) as pool:
			
 
				-                    if self.pathsAndTable.read_type in ['minute', 'second']:
			
 
				-                        pool.starmap(save_scada_file_to_db,
			
 
				-                                     [(self.pathsAndTable.get_table_name(), file,
			
 
				-                                       self.pathsAndTable.wind_col_trans[os.path.basename(file).split(".")[0]],
			
 
				-                                       os.path.basename(os.path.dirname(file)),
			
 
				-                                       self.batch_count,self.pathsAndTable.use_tidb) for file in arr])
			
 
				-                    else:
			
 
				-                        pool.starmap(save_file_to_db,
			
 
				-                                     [(self.pathsAndTable.get_table_name(), file, self.batch_count) for file in arr])
			
 
				-
			
 
				-                update_trans_transfer_progress(self.pathsAndTable.id,
			
 
				-                                               round(70 + 29 * (index + 1) / len(all_arrays), 2),
			
 
				-                                               self.pathsAndTable.save_db)
			
 
				+            # 创建一个进程池处理所有文件
			
 
				+            with multiprocessing.Pool(max_processes) as pool:
			
 
				+                if self.pathsAndTable.read_type in ['minute', 'second']:
			
 
				+                    # 准备参数
			
 
				+                    params = [(self.pathsAndTable.get_table_name(), file,
			
 
				+                               self.pathsAndTable.wind_col_trans[os.path.basename(file).split(".")[0]],
			
 
				+                               os.path.basename(os.path.dirname(file)),
			
 
				+                               self.batch_count, self.pathsAndTable.use_tidb) for file in all_saved_files]
			
 
				+
			
 
				+                    # 分批次处理并更新进度
			
 
				+                    batch_size = max(1, len(params) // 10)  # 最多10个批次
			
 
				+                    for i in range(0, len(params), batch_size):
			
 
				+                        batch_params = params[i:i + batch_size]
			
 
				+                        pool.starmap(save_scada_file_to_db, batch_params)
			
 
				+
			
 
				+                        # 更新进度
			
 
				+                        progress = 70 + 29 * (i + len(batch_params)) / len(params)
			
 
				+                        update_trans_transfer_progress(self.pathsAndTable.id,
			
 
				+                                                       round(progress, 2),
			
 
				+                                                       self.pathsAndTable.save_db)
			
 
				+
			
 
				+                else:
			
 
				+                    # 准备参数
			
 
				+                    params = [(self.pathsAndTable.get_table_name(), file, self.batch_count) for file in all_saved_files]
			
 
				+
			
 
				+                    # 分批次处理并更新进度
			
 
				+                    batch_size = max(1, len(params) // 10)  # 最多10个批次
			
 
				+                    for i in range(0, len(params), batch_size):
			
 
				+                        batch_params = params[i:i + batch_size]
			
 
				+                        pool.starmap(save_file_to_db, batch_params)
			
 
				+
			
 
				+                        # 更新进度
			
 
				+                        progress = 70 + 29 * (i + len(batch_params)) / len(params)
			
 
				+                        update_trans_transfer_progress(self.pathsAndTable.id,
			
 
				+                                                       round(progress, 2),
			
 
				+                                                       self.pathsAndTable.save_db)
			
 
				         except Exception as e:
			
 
				-            trans_print(traceback.format_exc())
			
 
				+            error(traceback.format_exc())
			
 
				             message = "保存到数据库错误,系统返回错误:" + str(e)
			
 
				             raise ValueError(message)
			
 
				 
			
 
				     def run(self):
			
 
				         if self.pathsAndTable.save_db:
			
 
				             self.mutiprocessing_to_save_db()
			
 
				-            update_trans_transfer_progress(self.pathsAndTable.id,  99,
			
 
				+            update_trans_transfer_progress(self.pathsAndTable.id, 99,
			
 
				                                            self.pathsAndTable.save_db)
			
--- a/etl/common/UnzipAndRemove.py
+++ b/etl/common/UnzipAndRemove.py
@@ -1,54 +1,76 @@
 
				 import multiprocessing
			
 
				+import os
			
 
				 import traceback
			
 
				-from os import *
			
 
				+from typing import List, Optional
			
 
				 
			
 
				+from conf.constants import ParallelProcessing
			
 
				 from etl.common.PathsAndTable import PathsAndTable
			
 
				 from service.trans_conf_service import update_trans_transfer_progress
			
 
				 from utils.file.trans_methods import read_files, read_excel_files, copy_to_new, split_array
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import info, error
			
 
				 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
			
 
				 from utils.zip.unzip import unzip, unrar, get_desc_path
			
 
				 
			
 
				 
			
 
				 class UnzipAndRemove(object):
			
 
				-    def __init__(self, pathsAndTable: PathsAndTable, filter_types=None):
			
 
				+    """解压缩并移动文件类"""
			
 
				+
			
 
				+    def __init__(self, pathsAndTable: PathsAndTable, filter_types: Optional[List[str]] = None):
			
 
				+        """
			
 
				+        初始化解压缩并移动文件类
			
 
				+        
			
 
				+        Args:
			
 
				+            pathsAndTable: 路径和表对象
			
 
				+            filter_types: 文件类型过滤器
			
 
				+        """
			
 
				         self.pathsAndTable = pathsAndTable
			
 
				         self.filter_types = filter_types
			
 
				 
			
 
				-    def get_and_remove(self, file):
			
 
				-
			
 
				+    def get_and_remove(self, file: str) -> None:
			
 
				+        """
			
 
				+        解压缩或移动文件到临时路径
			
 
				+        
			
 
				+        Args:
			
 
				+            file: 文件路径
			
 
				+        """
			
 
				         to_path = self.pathsAndTable.get_excel_tmp_path()
			
 
				-        if str(file).endswith("zip"):
			
 
				-            if str(file).endswith("csv.zip"):
			
 
				+        file_lower = str(file).lower()
			
 
				+        if file_lower.endswith("zip"):
			
 
				+            if file_lower.endswith("csv.zip"):
			
 
				                 copy_to_new(file, file.replace(self.pathsAndTable.read_dir, to_path).replace("csv.zip", 'csv.gz'))
			
 
				             else:
			
 
				                 desc_path = file.replace(self.pathsAndTable.read_dir, to_path)
			
 
				                 unzip(file, get_desc_path(desc_path))
			
 
				                 self.pathsAndTable.has_zip = True
			
 
				-        elif str(file).endswith("rar"):
			
 
				+        elif file_lower.endswith("rar"):
			
 
				             desc_path = file.replace(self.pathsAndTable.read_dir, to_path)
			
 
				             is_success, e = unrar(file, get_desc_path(desc_path))
			
 
				             self.pathsAndTable.has_zip = True
			
 
				-            if not is_success:
			
 
				-                trans_print(traceback.format_exc())
			
 
				-                pass
			
 
				         else:
			
 
				             copy_to_new(file, file.replace(self.pathsAndTable.read_dir, to_path))
			
 
				 
			
 
				-    def remove_file_to_tmp_path(self):
			
 
				+    def remove_file_to_tmp_path(self) -> List[str]:
			
 
				+        """
			
 
				+        将文件移动到临时路径
			
 
				+        
			
 
				+        Returns:
			
 
				+            处理后的文件列表
			
 
				+        """
			
 
				         # 读取文件
			
 
				         try:
			
 
				-            if path.isfile(self.pathsAndTable.read_dir):
			
 
				+            if os.path.isfile(self.pathsAndTable.read_dir):
			
 
				                 all_files = [self.pathsAndTable.read_dir]
			
 
				             else:
			
 
				                 all_files = read_files(self.pathsAndTable.read_dir)
			
 
				 
			
 
				             # 最大取系统cpu的 三分之二
			
 
				             split_count = get_available_cpu_count_with_percent(2 / 3)
			
 
				+            # 限制最大进程数
			
 
				+            split_count = min(split_count, ParallelProcessing.MAX_PROCESSES)
			
 
				             all_arrays = split_array(all_files, split_count)
			
 
				 
			
 
				             for index, arr in enumerate(all_arrays):
			
 
				-                pool_count = split_count if split_count < len(arr) else len(arr)
			
 
				+                pool_count = min(split_count, len(arr))
			
 
				                 with multiprocessing.Pool(pool_count) as pool:
			
 
				                     pool.starmap(self.get_and_remove, [(i,) for i in arr])
			
 
				                 update_trans_transfer_progress(self.pathsAndTable.id,
			
@@ -57,14 +79,17 @@ class UnzipAndRemove(object):
 
				 
			
 
				             all_files = read_excel_files(self.pathsAndTable.get_excel_tmp_path())
			
 
				 
			
 
				-            trans_print('读取文件数量:', len(all_files))
			
 
				+            info('读取文件数量:', len(all_files))
			
 
				         except Exception as e:
			
 
				-            trans_print(traceback.format_exc())
			
 
				+            error(traceback.format_exc())
			
 
				             message = "读取文件列表错误:" + self.pathsAndTable.read_dir + ",系统返回错误:" + str(e)
			
 
				             raise ValueError(message)
			
 
				         return all_files
			
 
				 
			
 
				-    def run(self):
			
 
				+    def run(self) -> None:
			
 
				+        """
			
 
				+        运行解压缩和移动文件流程
			
 
				+        """
			
 
				         self.remove_file_to_tmp_path()
			
 
				-        update_trans_transfer_progress(self.pathsAndTable.id,  20,
			
 
				+        update_trans_transfer_progress(self.pathsAndTable.id, 20,
			
 
				                                        self.pathsAndTable.save_db)
			
--- a/etl/wind_power/fault_warn/FaultWarnTrans.py
+++ b/etl/wind_power/fault_warn/FaultWarnTrans.py
@@ -10,7 +10,7 @@ from service.trans_service import get_fault_warn_conf, drop_table, create_warn_f
 
				     save_file_to_db
			
 
				 from utils.conf.read_conf import read_conf
			
 
				 from utils.file.trans_methods import read_excel_files, read_file_to_df, create_file_path, valid_eval
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import info, error
			
 
				 
			
 
				 
			
 
				 class FaultWarnTrans(BaseDataTrans):
			
@@ -27,14 +27,14 @@ class FaultWarnTrans(BaseDataTrans):
 
				 
			
 
				     # 第三步 读取 并 保存到临时文件
			
 
				     def read_and_save_tmp_file(self):
			
 
				-        trans_print("无需保存临时文件")
			
 
				+        info("无需保存临时文件")
			
 
				 
			
 
				     # 读取并保存到临时正式文件
			
 
				     def statistics_and_save_tmp_formal_file(self):
			
 
				         conf_map = self.get_filed_conf()
			
 
				         if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
			
 
				             message = f"未找到{self.id}的{self.transfer_type}配置"
			
 
				-            trans_print(message)
			
 
				+            error(message)
			
 
				             update_trans_status_error(self.id, message, self.save_db)
			
 
				         else:
			
 
				 
			
--- a/etl/wind_power/laser/LaserTrans.py
+++ b/etl/wind_power/laser/LaserTrans.py
@@ -7,11 +7,11 @@ import numpy as np
 
				 import pandas as pd
			
 
				 
			
 
				 from service.plt_service import get_all_wind
			
 
				-from service.trans_service import save_df_to_db
			
 
				 from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
			
 
				     update_trans_status_success
			
 
				+from service.trans_service import save_df_to_db
			
 
				 from utils.file.trans_methods import read_files, read_file_to_df
			
 
				-from utils.log.trans_log import set_trance_id, trans_print
			
 
				+from utils.log.trans_log import set_trance_id, info
			
 
				 
			
 
				 
			
 
				 class LaserTrans():
			
@@ -56,7 +56,7 @@ class LaserTrans():
 
				         trance_id = '-'.join([self.wind_farm_code, 'laser'])
			
 
				         set_trance_id(trance_id)
			
 
				         all_files = read_files(self.read_path, ['csv'])
			
 
				-        trans_print(self.wind_farm_code, '获取文件总数为:', len(all_files))
			
 
				+        info(self.wind_farm_code, '获取文件总数为:', len(all_files))
			
 
				         pool_count = 8 if len(all_files) > 8 else len(all_files)
			
 
				 
			
 
				         with multiprocessing.Pool(pool_count) as pool:
			
@@ -70,7 +70,7 @@ class LaserTrans():
 
				         update_trans_status_success(self.id, len(df['wind_turbine_number'].unique()), None,
			
 
				                                     df['acquisition_time'].min(), df['acquisition_time'].max(), df.shape[0])
			
 
				         # update_trans_status_success(self.id)
			
 
				-        trans_print(self.wind_farm_code, '执行结束,总耗时:', (datetime.datetime.now() - self.begin))
			
 
				+        info(self.wind_farm_code, '执行结束,总耗时:', (datetime.datetime.now() - self.begin))
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
--- a/etl/wind_power/min_sec/ClassIdentifier.py
+++ b/etl/wind_power/min_sec/ClassIdentifier.py
@@ -5,7 +5,7 @@ import numpy as np
 
				 from pandas import DataFrame
			
 
				 
			
 
				 from utils.file.trans_methods import read_file_to_df
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import error, warning, debug
			
 
				 
			
 
				 
			
 
				 class ClassIdentifier(object):
			
@@ -35,11 +35,11 @@ class ClassIdentifier(object):
 
				         self.cut_out_speed = cut_out_speed
			
 
				 
			
 
				         if self.rated_power is None:
			
 
				-            trans_print(wind_turbine_number, "WARNING:rated_power配置为空的")
			
 
				+            warning(wind_turbine_number, "WARNING:rated_power配置为空的")
			
 
				             self.rated_power = 1500
			
 
				 
			
 
				         if self.cut_out_speed is None:
			
 
				-            trans_print(cut_out_speed, "WARNING:cut_out_speed配置为空的")
			
 
				+            warning(cut_out_speed, "WARNING:cut_out_speed配置为空的")
			
 
				             self.cut_out_speed = 20
			
 
				 
			
 
				         if file_path is None and origin_df is None:
			
@@ -350,12 +350,12 @@ class ClassIdentifier(object):
 
				     def run(self):
			
 
				         # Implement your class identification logic here
			
 
				         begin = datetime.datetime.now()
			
 
				-        trans_print("打标签开始,风机号:", self.wind_turbine_number, self.df.shape)
			
 
				+        debug("打标签开始,风机号:", self.wind_turbine_number, self.df.shape)
			
 
				         try:
			
 
				             df = self.identifier()
			
 
				         except Exception as e:
			
 
				-            trans_print(traceback.format_exc())
			
 
				+            error(traceback.format_exc())
			
 
				             message = str(e) + ',风机编号:' + self.wind_turbine_number
			
 
				             raise Exception('打标签失败:' + message)
			
 
				-        trans_print("打标签结束,", df.shape, ",耗时:", datetime.datetime.now() - begin)
			
 
				+        debug("打标签结束,", df.shape, ",耗时:", datetime.datetime.now() - begin)
			
 
				         return df
			
--- a/etl/wind_power/min_sec/MinSecTrans.py
+++ b/etl/wind_power/min_sec/MinSecTrans.py
@@ -3,6 +3,7 @@
 
				 # @Author  : 魏志亮
			
 
				 import multiprocessing
			
 
				 import os.path
			
 
				+from typing import Optional
			
 
				 
			
 
				 from etl.common.BaseDataTrans import BaseDataTrans
			
 
				 from etl.common.CombineAndSaveFormalFile import CombineAndSaveFormalFile
			
@@ -12,26 +13,67 @@ from etl.wind_power.min_sec.TransParam import TransParam
 
				 from service.trans_conf_service import update_trans_status_success, update_trans_status_error
			
 
				 from service.trans_service import get_min_sec_conf
			
 
				 from utils.conf.read_conf import read_conf
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import error
			
 
				 
			
 
				 
			
 
				 class MinSecTrans(BaseDataTrans):
			
 
				+    """分钟/秒级数据转换类"""
			
 
				 
			
 
				-    def __init__(self, data: dict = None, save_db=True, yaml_config=None, step=0, end=999):
			
 
				+    # 转换列名列表
			
 
				+    TRANS_COLS = [
			
 
				+        'wind_turbine_number', 'time_stamp', 'active_power', 'rotor_speed', 'generator_speed',
			
 
				+        'wind_velocity', 'pitch_angle_blade_1', 'pitch_angle_blade_2', 'pitch_angle_blade_3',
			
 
				+        'cabin_position', 'true_wind_direction', 'yaw_error1', 'set_value_of_active_power',
			
 
				+        'gearbox_oil_temperature', 'generatordrive_end_bearing_temperature',
			
 
				+        'generatornon_drive_end_bearing_temperature', 'wind_turbine_status', 'wind_turbine_status2',
			
 
				+        'cabin_temperature', 'twisted_cable_angle', 'front_back_vibration_of_the_cabin',
			
 
				+        'side_to_side_vibration_of_the_cabin', 'actual_torque', 'given_torque',
			
 
				+        'clockwise_yaw_count', 'counterclockwise_yaw_count', 'unusable', 'power_curve_available',
			
 
				+        'required_gearbox_speed',
			
 
				+        'inverter_speed_master_control', 'outside_cabin_temperature', 'main_bearing_temperature',
			
 
				+        'main_bearing_temperature_2', 'gearbox_high_speed_shaft_bearing_temperature',
			
 
				+        'gearboxmedium_speed_shaftbearing_temperature',
			
 
				+        'gearbox_low_speed_shaft_bearing_temperature', 'generator_winding1_temperature',
			
 
				+        'generator_winding2_temperature', 'generator_winding3_temperature',
			
 
				+        'turbulence_intensity', 'grid_a_phase_current', 'grid_b_phase_current',
			
 
				+        'grid_c_phase_current', 'reactive_power', 'param1', 'param2', 'param3', 'param4', 'param5',
			
 
				+        'param6', 'param7', 'param8', 'param9', 'param10'
			
 
				+    ]
			
 
				+
			
 
				+    def __init__(self, data: dict = None, save_db: bool = True, yaml_config: dict = None, step: int = 0,
			
 
				+                 end: int = 999):
			
 
				+        """
			
 
				+        初始化分钟/秒级数据转换类
			
 
				+        
			
 
				+        Args:
			
 
				+            data: 任务数据字典
			
 
				+            save_db: 是否保存到数据库
			
 
				+            yaml_config: YAML配置
			
 
				+            step: 开始步骤
			
 
				+            end: 结束步骤
			
 
				+        """
			
 
				         super(MinSecTrans, self).__init__(data, save_db, yaml_config, step, end)
			
 
				         self.statistics_map = multiprocessing.Manager().dict()
			
 
				         self.trans_param = self.get_trans_param()
			
 
				         self.trans_param.wind_col_trans = self.wind_col_trans
			
 
				 
			
 
				     def get_filed_conf(self):
			
 
				+        """获取配置"""
			
 
				         return get_min_sec_conf(self.wind_farm_code, self.transfer_type)
			
 
				 
			
 
				-    def get_trans_param(self):
			
 
				+    def get_trans_param(self) -> Optional[TransParam]:
			
 
				+        """
			
 
				+        获取转换参数
			
 
				+        
			
 
				+        Returns:
			
 
				+            TransParam对象
			
 
				+        """
			
 
				         conf_map = self.get_filed_conf()
			
 
				         if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
			
 
				             message = f"未找到{self.id}的{self.transfer_type}配置"
			
 
				-            trans_print(message)
			
 
				+            error(message)
			
 
				             update_trans_status_error(self.id, message, self.save_db)
			
 
				+            return None
			
 
				         else:
			
 
				             resolve_col_prefix = read_conf(conf_map, 'resolve_col_prefix')
			
 
				             wind_name_exec = read_conf(conf_map, 'wind_name_exec', None)
			
@@ -48,25 +90,7 @@ class MinSecTrans(BaseDataTrans):
 
				             boolean_sec_to_min = int(boolean_sec_to_min) == 1
			
 
				 
			
 
				             cols_trans_all = dict()
			
 
				-            trans_cols = ['wind_turbine_number', 'time_stamp', 'active_power', 'rotor_speed', 'generator_speed',
			
 
				-                          'wind_velocity', 'pitch_angle_blade_1', 'pitch_angle_blade_2', 'pitch_angle_blade_3',
			
 
				-                          'cabin_position', 'true_wind_direction', 'yaw_error1', 'set_value_of_active_power',
			
 
				-                          'gearbox_oil_temperature', 'generatordrive_end_bearing_temperature',
			
 
				-                          'generatornon_drive_end_bearing_temperature', 'wind_turbine_status', 'wind_turbine_status2',
			
 
				-                          'cabin_temperature', 'twisted_cable_angle', 'front_back_vibration_of_the_cabin',
			
 
				-                          'side_to_side_vibration_of_the_cabin', 'actual_torque', 'given_torque',
			
 
				-                          'clockwise_yaw_count', 'counterclockwise_yaw_count', 'unusable', 'power_curve_available',
			
 
				-                          'required_gearbox_speed',
			
 
				-                          'inverter_speed_master_control', 'outside_cabin_temperature', 'main_bearing_temperature',
			
 
				-                          'main_bearing_temperature_2', 'gearbox_high_speed_shaft_bearing_temperature',
			
 
				-                          'gearboxmedium_speed_shaftbearing_temperature',
			
 
				-                          'gearbox_low_speed_shaft_bearing_temperature', 'generator_winding1_temperature',
			
 
				-                          'generator_winding2_temperature', 'generator_winding3_temperature',
			
 
				-                          'turbulence_intensity', 'grid_a_phase_current', 'grid_b_phase_current',
			
 
				-                          'grid_c_phase_current', 'reactive_power', 'param1', 'param2', 'param3', 'param4', 'param5',
			
 
				-                          'param6', 'param7', 'param8', 'param9', 'param10']
			
 
				-
			
 
				-            for col in trans_cols:
			
 
				+            for col in self.TRANS_COLS:
			
 
				                 cols_trans_all[col] = read_conf(conf_map, col, '')
			
 
				 
			
 
				             return TransParam(read_type=self.transfer_type, read_path=self.read_dir,
			
@@ -77,13 +101,13 @@ class MinSecTrans(BaseDataTrans):
 
				                               resolve_col_prefix=resolve_col_prefix, need_valid_cols=need_valid_cols,
			
 
				                               boolean_sec_to_min=boolean_sec_to_min)
			
 
				 
			
 
				-    # 第三步 读取 并 保存到临时文件
			
 
				     def read_and_save_tmp_file(self):
			
 
				+        """第三步：读取并保存到临时文件"""
			
 
				         read_and_save_tmp = ReadAndSaveTmp(self.pathsAndTable, self.trans_param)
			
 
				         read_and_save_tmp.run()
			
 
				 
			
 
				-    # 第四步 统计 并 保存到正式文件
			
 
				     def statistics_and_save_tmp_formal_file(self):
			
 
				+        """第四步：统计并保存到正式文件"""
			
 
				         # 保存到正式文件
			
 
				         statistics_and_save_tmp_formal_file = StatisticsAndSaveTmpFormalFile(self.pathsAndTable, self.trans_param,
			
 
				                                                                              self.statistics_map,
			
@@ -91,11 +115,12 @@ class MinSecTrans(BaseDataTrans):
 
				         statistics_and_save_tmp_formal_file.run()
			
 
				 
			
 
				     def combine_and_save_formal_file(self):
			
 
				+        """合并并保存正式文件"""
			
 
				         combine_and_save_formal_file = CombineAndSaveFormalFile(self.pathsAndTable)
			
 
				         self.update_files = combine_and_save_formal_file.run()
			
 
				 
			
 
				-    # 最后更新执行程度
			
 
				     def update_exec_progress(self):
			
 
				+        """最后更新执行进度"""
			
 
				         all_files = set([os.path.basename(i) for i in self.update_files])
			
 
				         update_trans_status_success(self.id, len(all_files),
			
 
				                                     self.statistics_map['time_granularity'],
			
--- a/etl/wind_power/min_sec/ReadAndSaveTmp.py
+++ b/etl/wind_power/min_sec/ReadAndSaveTmp.py
@@ -1,31 +1,47 @@
 
				 import datetime
			
 
				 import multiprocessing
			
 
				+import os
			
 
				 import traceback
			
 
				-from os import *
			
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				+from conf.constants import ParallelProcessing
			
 
				 from etl.common.PathsAndTable import PathsAndTable
			
 
				 from etl.wind_power.min_sec import TransParam
			
 
				 from service.trans_conf_service import update_trans_transfer_progress
			
 
				 from utils.file.trans_methods import read_excel_files, split_array, del_blank, \
			
 
				     create_file_path, read_file_to_df, valid_eval
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import info, debug, error
			
 
				 from utils.systeminfo.sysinfo import use_files_get_max_cpu_count, get_dir_size
			
 
				 
			
 
				 
			
 
				 class ReadAndSaveTmp(object):
			
 
				+    """读取并保存临时文件类"""
			
 
				 
			
 
				     def __init__(self, pathsAndTable: PathsAndTable, trans_param: TransParam):
			
 
				+        """
			
 
				+        初始化读取并保存临时文件类
			
 
				+        
			
 
				+        Args:
			
 
				+            pathsAndTable: 路径和表对象
			
 
				+            trans_param: 转换参数对象
			
 
				+        """
			
 
				         self.pathsAndTable = pathsAndTable
			
 
				         self.trans_param = trans_param
			
 
				         self.exist_wind_names = multiprocessing.Manager().list()
			
 
				         self.lock = multiprocessing.Manager().Lock()
			
 
				         self.file_lock = multiprocessing.Manager().dict()
			
 
				 
			
 
				-    def _save_to_tmp_csv_by_name(self, df, name):
			
 
				+    def _save_to_tmp_csv_by_name(self, df: pd.DataFrame, name: str):
			
 
				+        """
			
 
				+        根据风机名称保存到临时CSV文件
			
 
				+        
			
 
				+        Args:
			
 
				+            df: 数据帧
			
 
				+            name: 风机名称
			
 
				+        """
			
 
				         save_name = str(name) + '.csv'
			
 
				-        save_path = path.join(self.pathsAndTable.get_read_tmp_path(), save_name)
			
 
				+        save_path = os.path.join(self.pathsAndTable.get_read_tmp_path(), save_name)
			
 
				         create_file_path(save_path, is_file_path=True)
			
 
				 
			
 
				         with self.lock:
			
@@ -41,7 +57,13 @@ class ReadAndSaveTmp(object):
 
				             else:
			
 
				                 df.to_csv(save_path, index=False, encoding='utf8')
			
 
				 
			
 
				-    def save_merge_data(self, file_path):
			
 
				+    def save_merge_data(self, file_path: str):
			
 
				+        """
			
 
				+        保存合并数据
			
 
				+        
			
 
				+        Args:
			
 
				+            file_path: 文件路径
			
 
				+        """
			
 
				         df = self.read_excel_to_df(file_path)
			
 
				         if self.trans_param.wind_name_exec:
			
 
				             if valid_eval(self.trans_param.wind_name_exec):
			
@@ -67,7 +89,7 @@ class ReadAndSaveTmp(object):
 
				                         else:
			
 
				                             contains_name = False
			
 
				                             self.exist_wind_names.append(exist_name)
			
 
				-                        save_path = path.join(merge_path, csv_name)
			
 
				+                        save_path = os.path.join(merge_path, csv_name)
			
 
				                         now_df = df[df['wind_turbine_number'] == wind_name][['time_stamp', col]]
			
 
				                         if contains_name:
			
 
				                             now_df.to_csv(save_path, index=False, encoding='utf-8', mode='a',
			
@@ -75,7 +97,16 @@ class ReadAndSaveTmp(object):
 
				                         else:
			
 
				                             now_df.to_csv(save_path, index=False, encoding='utf-8')
			
 
				 
			
 
				-    def trans_df_cols(self, df):
			
 
				+    def trans_df_cols(self, df: pd.DataFrame) -> pd.DataFrame:
			
 
				+        """
			
 
				+        转换数据帧列名
			
 
				+        
			
 
				+        Args:
			
 
				+            df: 数据帧
			
 
				+        
			
 
				+        Returns:
			
 
				+            转换后的数据帧
			
 
				+        """
			
 
				         if self.trans_param.is_vertical_table:
			
 
				             pass
			
 
				         else:
			
@@ -120,8 +151,13 @@ class ReadAndSaveTmp(object):
 
				 
			
 
				         return df
			
 
				 
			
 
				-    def df_save_to_tmp_file(self, df=pd.DataFrame()):
			
 
				-
			
 
				+    def df_save_to_tmp_file(self, df: pd.DataFrame = pd.DataFrame()):
			
 
				+        """
			
 
				+        保存数据帧到临时文件
			
 
				+        
			
 
				+        Args:
			
 
				+            df: 数据帧
			
 
				+        """
			
 
				         df = self.trans_df_cols(df)
			
 
				 
			
 
				         df = del_blank(df, ['wind_turbine_number'])
			
@@ -133,19 +169,34 @@ class ReadAndSaveTmp(object):
 
				 
			
 
				         self.save_to_tmp_csv(df)
			
 
				 
			
 
				-    def save_to_tmp_csv(self, df):
			
 
				+    def save_to_tmp_csv(self, df: pd.DataFrame):
			
 
				+        """
			
 
				+        保存到临时CSV文件
			
 
				+        
			
 
				+        Args:
			
 
				+            df: 数据帧
			
 
				+        """
			
 
				         names = set(df['wind_turbine_number'].values)
			
 
				         if names:
			
 
				-            trans_print("开始保存", str(names), "到临时文件", df.shape)
			
 
				+            debug("开始保存", str(names), "到临时文件", df.shape)
			
 
				 
			
 
				             for name in names:
			
 
				                 self._save_to_tmp_csv_by_name(df[df['wind_turbine_number'] == name], name)
			
 
				             del df
			
 
				-            trans_print("保存", str(names), "到临时文件成功, 风机数量", len(names))
			
 
				-
			
 
				-    def merge_df(self, dir_path):
			
 
				+            debug("保存", str(names), "到临时文件成功, 风机数量", len(names))
			
 
				+
			
 
				+    def merge_df(self, dir_path: str) -> pd.DataFrame:
			
 
				+        """
			
 
				+        合并数据帧
			
 
				+        
			
 
				+        Args:
			
 
				+            dir_path: 目录路径
			
 
				+        
			
 
				+        Returns:
			
 
				+            合并后的数据帧
			
 
				+        """
			
 
				         all_files = read_excel_files(dir_path)
			
 
				-        wind_turbine_number = path.basename(dir_path)
			
 
				+        wind_turbine_number = os.path.basename(dir_path)
			
 
				         df = pd.DataFrame()
			
 
				         for file in all_files:
			
 
				             now_df = read_file_to_df(file)
			
@@ -161,8 +212,13 @@ class ReadAndSaveTmp(object):
 
				         return df
			
 
				 
			
 
				     def read_file_and_save_tmp(self):
			
 
				+        """
			
 
				+        读取文件并保存到临时文件
			
 
				+        """
			
 
				         all_files = read_excel_files(self.pathsAndTable.get_excel_tmp_path())
			
 
				         split_count = use_files_get_max_cpu_count(all_files)
			
 
				+        # 限制最大进程数
			
 
				+        split_count = min(split_count, ParallelProcessing.MAX_PROCESSES)
			
 
				         all_arrays = split_array(all_files, split_count)
			
 
				 
			
 
				         if self.trans_param.merge_columns:
			
@@ -172,7 +228,7 @@ class ReadAndSaveTmp(object):
 
				                         pool.starmap(self.save_merge_data, [(ar,) for ar in arr])
			
 
				 
			
 
				                 except Exception as e:
			
 
				-                    trans_print(traceback.format_exc())
			
 
				+                    error(traceback.format_exc())
			
 
				                     message = "整理临时文件,系统返回错误:" + str(e)
			
 
				                     raise ValueError(message)
			
 
				 
			
@@ -180,28 +236,28 @@ class ReadAndSaveTmp(object):
 
				                                                round(20 + 20 * (index + 1) / len(all_arrays), 2),
			
 
				                                                self.pathsAndTable.save_db)
			
 
				 
			
 
				-            dirs = [path.join(self.pathsAndTable.get_merge_tmp_path(), dir_name) for dir_name in
			
 
				-                    listdir(self.pathsAndTable.get_merge_tmp_path())]
			
 
				-            dir_total_size = get_dir_size(dirs[0])
			
 
				-            # split_count = max_file_size_get_max_cpu_count(dir_total_size, memory_percent=1 / 12, cpu_percent=1 / 10)
			
 
				-            split_count = 2
			
 
				-            all_arrays = split_array(dirs, split_count)
			
 
				-            for index, arr in enumerate(all_arrays):
			
 
				-                try:
			
 
				-                    with multiprocessing.Pool(split_count) as pool:
			
 
				-                        pool.starmap(self.merge_df, [(ar,) for ar in arr])
			
 
				-
			
 
				-                except Exception as e:
			
 
				-                    trans_print(traceback.format_exc())
			
 
				-                    message = "整理临时文件,系统返回错误:" + str(e)
			
 
				-                    raise ValueError(message)
			
 
				-
			
 
				-                update_trans_transfer_progress(self.pathsAndTable.id,
			
 
				-                                               round(20 + 30 * (index + 1) / len(all_arrays), 2),
			
 
				-                                               self.pathsAndTable.save_db)
			
 
				+            dirs = [os.path.join(self.pathsAndTable.get_merge_tmp_path(), dir_name) for dir_name in
			
 
				+                    os.listdir(self.pathsAndTable.get_merge_tmp_path())]
			
 
				+            if dirs:
			
 
				+                dir_total_size = get_dir_size(dirs[0])
			
 
				+                # 限制最大进程数
			
 
				+                split_count = min(dir_total_size, ParallelProcessing.MAX_PROCESSES)
			
 
				+                all_arrays = split_array(dirs, split_count)
			
 
				+                for index, arr in enumerate(all_arrays):
			
 
				+                    try:
			
 
				+                        with multiprocessing.Pool(split_count) as pool:
			
 
				+                            pool.starmap(self.merge_df, [(ar,) for ar in arr])
			
 
				+
			
 
				+                    except Exception as e:
			
 
				+                        error(traceback.format_exc())
			
 
				+                        message = "整理临时文件,系统返回错误:" + str(e)
			
 
				+                        raise ValueError(message)
			
 
				+
			
 
				+                    update_trans_transfer_progress(self.pathsAndTable.id,
			
 
				+                                                   round(20 + 30 * (index + 1) / len(all_arrays), 2),
			
 
				+                                                   self.pathsAndTable.save_db)
			
 
				 
			
 
				         else:
			
 
				-
			
 
				             for index, arr in enumerate(all_arrays):
			
 
				                 try:
			
 
				                     with multiprocessing.Pool(split_count) as pool:
			
@@ -209,7 +265,7 @@ class ReadAndSaveTmp(object):
 
				                     for df in dfs:
			
 
				                         self.df_save_to_tmp_file(df)
			
 
				                 except Exception as e:
			
 
				-                    trans_print(traceback.format_exc())
			
 
				+                    error(traceback.format_exc())
			
 
				                     message = "整理临时文件,系统返回错误:" + str(e)
			
 
				                     raise ValueError(message)
			
 
				 
			
@@ -217,8 +273,16 @@ class ReadAndSaveTmp(object):
 
				                                                round(20 + 30 * (index + 1) / len(all_arrays), 2),
			
 
				                                                self.pathsAndTable.save_db)
			
 
				 
			
 
				-    def read_excel_to_df(self, file_path):
			
 
				-
			
 
				+    def read_excel_to_df(self, file_path: str) -> pd.DataFrame:
			
 
				+        """
			
 
				+        读取Excel文件到数据帧
			
 
				+        
			
 
				+        Args:
			
 
				+            file_path: 文件路径
			
 
				+        
			
 
				+        Returns:
			
 
				+            数据帧
			
 
				+        """
			
 
				         read_cols = [v.split(",")[0] for k, v in self.trans_param.cols_tran.items() if v and not v.startswith("$")]
			
 
				 
			
 
				         trans_dict = {}
			
@@ -300,7 +364,7 @@ class ReadAndSaveTmp(object):
 
				 
			
 
				             for k, v in trans_dict.items():
			
 
				                 if k.startswith("$file"):
			
 
				-                    file = ".".join(path.basename(file_path).split(".")[0:-1])
			
 
				+                    file = ".".join(os.path.basename(file_path).split(".")[0:-1])
			
 
				                     if k == "$file":
			
 
				                         ks = k.split("|")
			
 
				                         bool_contains = False
			
@@ -337,7 +401,7 @@ class ReadAndSaveTmp(object):
 
				                     datas = str(k.split(",")[1].replace("$file_date", "").replace("[", "").replace("]", "")).split(":")
			
 
				                     if len(datas) != 2:
			
 
				                         raise Exception("字段映射出现错误 :" + str(trans_dict))
			
 
				-                    file = ".".join(path.basename(file_path).split(".")[0:-1])
			
 
				+                    file = ".".join(os.path.basename(file_path).split(".")[0:-1])
			
 
				                     date_str = str(file[int(datas[0]):int(datas[1])]).strip()
			
 
				                     df[v] = df[k.split(",")[0]].apply(lambda x: date_str + " " + str(x))
			
 
				 
			
@@ -351,8 +415,8 @@ class ReadAndSaveTmp(object):
 
				                     if not bool_contains:
			
 
				                         cengshu = int(str(ks[0].replace("$folder", "").replace("[", "").replace("]", "")))
			
 
				                         for i in range(cengshu):
			
 
				-                            folder = path.dirname(folder)
			
 
				-                        df[v] = str(str(folder).split(sep)[-1]).strip()
			
 
				+                            folder = os.path.dirname(folder)
			
 
				+                        df[v] = str(str(folder).split(os.sep)[-1]).strip()
			
 
				                 elif k.startswith("$sheet_name"):
			
 
				                     df[v] = df['sheet_name']
			
 
				 
			
@@ -374,9 +438,11 @@ class ReadAndSaveTmp(object):
 
				             return df
			
 
				 
			
 
				     def run(self):
			
 
				-        trans_print("开始保存数据到临时文件")
			
 
				+        """
			
 
				+        """
			
 
				+        info("开始保存数据到临时文件")
			
 
				         begin = datetime.datetime.now()
			
 
				         self.read_file_and_save_tmp()
			
 
				         update_trans_transfer_progress(self.pathsAndTable.id, 50,
			
 
				                                        self.pathsAndTable.save_db)
			
 
				-        trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin)
			
 
				+        info("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin)
			
--- a/etl/wind_power/min_sec/StatisticsAndSaveTmpFormalFile.py
+++ b/etl/wind_power/min_sec/StatisticsAndSaveTmpFormalFile.py
@@ -5,15 +5,16 @@ from os import path
 
				 import numpy as np
			
 
				 import pandas as pd
			
 
				 
			
 
				+from conf.constants import DataProcessing, ParallelProcessing, Types
			
 
				 from etl.common.PathsAndTable import PathsAndTable
			
 
				 from etl.wind_power.min_sec import TransParam
			
 
				 from etl.wind_power.min_sec.ClassIdentifier import ClassIdentifier
			
 
				 from etl.wind_power.min_sec.FilterValidData import FilterValidData
			
 
				 from service.trans_conf_service import update_trans_transfer_progress
			
 
				 from utils.conf.read_conf import read_conf
			
 
				-from utils.df_utils.util import get_time_space
			
 
				-from utils.file.trans_methods import create_file_path, read_excel_files, read_file_to_df, split_array
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.df_utils.util import estimate_time_interval as get_time_space
			
 
				+from utils.file.trans_methods import create_file_path, read_excel_files, read_file_to_df
			
 
				+from utils.log.trans_log import debug, error
			
 
				 from utils.systeminfo.sysinfo import use_files_get_max_cpu_count
			
 
				 
			
 
				 exec("import math")
			
@@ -74,13 +75,12 @@ class StatisticsAndSaveTmpFormalFile(object):
 
				             self.trans_param.wind_col_trans).fillna(df['wind_turbine_number'])
			
 
				         wind_col_name = str(df['wind_turbine_number'].values[0])
			
 
				 
			
 
				-        not_double_cols = ['wind_turbine_number', 'wind_turbine_name', 'time_stamp', 'param6', 'param7', 'param8',
			
 
				-                           'param9', 'param10']
			
 
				+        not_double_cols = DataProcessing.NOT_DOUBLE_COLS
			
 
				 
			
 
				         # 删除 有功功率 和 风速均为空的情况
			
 
				         df.dropna(subset=['active_power', 'wind_velocity'], how='any', inplace=True)
			
 
				-        trans_print(origin_wind_name, wind_col_name, "删除有功功率和风速有空的情况后:", df.shape)
			
 
				-        df.replace(np.nan, -999999999, inplace=True)
			
 
				+        debug(origin_wind_name, wind_col_name, "删除有功功率和风速有空的情况后:", df.shape)
			
 
				+        df.replace(np.nan, DataProcessing.NAN_REPLACE_VALUE, inplace=True)
			
 
				         number_cols = df.select_dtypes(include=['number']).columns.tolist()
			
 
				         for col in df.columns:
			
 
				             if col not in not_double_cols and col not in number_cols:
			
@@ -88,8 +88,8 @@ class StatisticsAndSaveTmpFormalFile(object):
 
				                     df[col] = pd.to_numeric(df[col], errors='coerce')
			
 
				                     # 删除包含NaN的行（即那些列A转换失败的行）
			
 
				                     df = df.dropna(subset=[col])
			
 
				-                    trans_print(origin_wind_name, wind_col_name, "删除非数值列名:", col)
			
 
				-        df.replace(-999999999, np.nan, inplace=True)
			
 
				+                    debug(origin_wind_name, wind_col_name, "删除非数值列名:", col)
			
 
				+        df.replace(DataProcessing.NAN_REPLACE_VALUE, np.nan, inplace=True)
			
 
				 
			
 
				         df.drop_duplicates(['wind_turbine_number', 'time_stamp'], keep='first', inplace=True)
			
 
				 
			
@@ -102,40 +102,40 @@ class StatisticsAndSaveTmpFormalFile(object):
 
				         # 删除每行有空值的行(2025-3-24)
			
 
				         # origin_count = df.shape[0]
			
 
				         # df = df.dropna()
			
 
				-        # trans_print(f'原始数据量:{origin_count},去除na后数据量:{df.shape[0]}')
			
 
				+        # trans_print(f"原始数据量:{origin_count},去除na后数据量:{df.shape[0]}")
			
 
				 
			
 
				         # 如果秒级有可能合并到分钟级
			
 
				         # TODO add 秒转分钟
			
 
				         if self.trans_param.boolean_sec_to_min:
			
 
				             df['time_stamp'] = df['time_stamp'].apply(lambda x: x + pd.Timedelta(minutes=(10 - x.minute % 10) % 10))
			
 
				-            df['time_stamp'] = df['time_stamp'].dt.floor('10T')
			
 
				+            df['time_stamp'] = df['time_stamp'].dt.floor(DataProcessing.TIME_INTERVAL)
			
 
				             df = df.groupby(['wind_turbine_number', 'time_stamp']).mean().reset_index()
			
 
				-        trans_print('有功功率前10个', df.head(10)['active_power'].values)
			
 
				+        debug('有功功率前10个', df.head(10)['active_power'].values)
			
 
				         power_df = df[df['active_power'] > 0]
			
 
				-        trans_print(origin_wind_name, wind_col_name, "功率大于0的数量:", power_df.shape)
			
 
				+        debug(origin_wind_name, wind_col_name, "功率大于0的数量:", power_df.shape)
			
 
				         power = power_df.sample(int(power_df.shape[0] / 100))['active_power'].median()
			
 
				 
			
 
				-        trans_print(origin_wind_name, wind_col_name, '有功功率，中位数', power)
			
 
				-        if power > 100000:
			
 
				+        debug(origin_wind_name, wind_col_name, '有功功率，中位数', power)
			
 
				+        if power > DataProcessing.POWER_UNIT_THRESHOLD:
			
 
				             df['active_power'] = df['active_power'] / 1000
			
 
				-        ## 做数据检测前,羡强行处理有功功率
			
 
				+        # 做数据检测前,羡强行处理有功功率
			
 
				         # df = df[df['active_power'] < 50000]
			
 
				 
			
 
				         rated_power_and_cutout_speed_tuple = read_conf(self.rated_power_and_cutout_speed_map, str(wind_col_name))
			
 
				         if rated_power_and_cutout_speed_tuple is None:
			
 
				-            rated_power_and_cutout_speed_tuple = (None, None)
			
 
				-            trans_print(origin_wind_name, '未从平台匹配到额定功率')
			
 
				+            # rated_power_and_cutout_speed_tuple = (None, None)
			
 
				+            error(origin_wind_name, '未从平台匹配到额定功率')
			
 
				         else:
			
 
				-            trans_print(origin_wind_name, '过滤数据前数据大小', df.shape)
			
 
				-            trans_print(origin_wind_name, '额定功率', rated_power_and_cutout_speed_tuple[0])
			
 
				+            debug(origin_wind_name, '过滤数据前数据大小', df.shape)
			
 
				+            debug(origin_wind_name, '额定功率', rated_power_and_cutout_speed_tuple[0])
			
 
				             # trans_print(origin_wind_name, '\n', df.head(10))
			
 
				             filter_valid_data = FilterValidData(df, rated_power_and_cutout_speed_tuple[0])
			
 
				             try:
			
 
				                 df = filter_valid_data.run()
			
 
				             except:
			
 
				-                trans_print(origin_wind_name, '过滤数据异常', filename)
			
 
				+                error(origin_wind_name, '过滤数据异常', filename)
			
 
				                 raise
			
 
				-            trans_print(origin_wind_name, '过滤数据后数据大小', df.shape)
			
 
				+            debug(origin_wind_name, '过滤数据后数据大小', df.shape)
			
 
				 
			
 
				             # 如果有需要处理的,先进行代码处理,在进行打标签
			
 
				             # exec_code = get_trans_exec_code(self.paths_and_table.exec_id, self.paths_and_table.read_type)
			
@@ -147,10 +147,10 @@ class StatisticsAndSaveTmpFormalFile(object):
 
				             if power_df.shape[0] == 0:
			
 
				                 df.loc[:, 'lab'] = -1
			
 
				             else:
			
 
				-                class_identifiler = ClassIdentifier(wind_turbine_number=origin_wind_name, origin_df=df,
			
 
				-                                                    rated_power=rated_power_and_cutout_speed_tuple[0],
			
 
				-                                                    cut_out_speed=rated_power_and_cutout_speed_tuple[1])
			
 
				-                df = class_identifiler.run()
			
 
				+                class_identifier = ClassIdentifier(wind_turbine_number=origin_wind_name, origin_df=df,
			
 
				+                                                   rated_power=rated_power_and_cutout_speed_tuple[0],
			
 
				+                                                   cut_out_speed=rated_power_and_cutout_speed_tuple[1])
			
 
				+                df = class_identifier.run()
			
 
				 
			
 
				             del power_df
			
 
				 
			
@@ -163,7 +163,7 @@ class StatisticsAndSaveTmpFormalFile(object):
 
				             df['year_month'] = df[['year', 'month']].apply(lambda x: str(x['year']) + str(x['month']).zfill(2), axis=1)
			
 
				             cols = df.columns
			
 
				 
			
 
				-            if self.paths_and_table.read_type == 'second':
			
 
				+            if self.paths_and_table.read_type == Types.SECOND:
			
 
				                 type_col = 'year_month'
			
 
				             else:
			
 
				                 type_col = 'year'
			
@@ -185,29 +185,42 @@ class StatisticsAndSaveTmpFormalFile(object):
 
				             self.set_statistics_data(df)
			
 
				 
			
 
				             del df
			
 
				-            trans_print("保存" + str(wind_col_name) + "成功")
			
 
				+            debug("保存" + str(wind_col_name) + "成功")
			
 
				 
			
 
				-    def mutiprocessing_to_save_file(self):
			
 
				+    def multiprocessing_to_save_file(self):
			
 
				         # 开始保存到正式文件
			
 
				         all_tmp_files = read_excel_files(self.paths_and_table.get_read_tmp_path())
			
 
				-        # split_count = self.pathsAndTable.multi_pool_count
			
 
				-        split_count = use_files_get_max_cpu_count(all_tmp_files)
			
 
				-        all_arrays = split_array(all_tmp_files, split_count)
			
 
				+
			
 
				+        if not all_tmp_files:
			
 
				+            debug("没有临时文件需要处理")
			
 
				+            return
			
 
				+
			
 
				+        # 计算最佳进程数
			
 
				+        max_processes = use_files_get_max_cpu_count(all_tmp_files)
			
 
				+        max_processes = min(max_processes, len(all_tmp_files), ParallelProcessing.MAX_PROCESSES)  # 限制最大进程数
			
 
				 
			
 
				         try:
			
 
				-            for index, arr in enumerate(all_arrays):
			
 
				-                with multiprocessing.Pool(split_count) as pool:
			
 
				-                    pool.starmap(self.save_to_csv, [(i,) for i in arr])
			
 
				-                update_trans_transfer_progress(self.paths_and_table.id,
			
 
				-                                               round(50 + 15 * (index + 1) / len(all_arrays), 2),
			
 
				-                                               self.paths_and_table.save_db)
			
 
				+            # 创建一个进程池处理所有文件
			
 
				+            with multiprocessing.Pool(max_processes) as pool:
			
 
				+                # 分批次处理并更新进度
			
 
				+                batch_size = max(1, len(all_tmp_files) // ParallelProcessing.MAX_BATCHES)  # 最多10个批次
			
 
				+
			
 
				+                for i in range(0, len(all_tmp_files), batch_size):
			
 
				+                    batch_files = all_tmp_files[i:i + batch_size]
			
 
				+                    pool.starmap(self.save_to_csv, [(file,) for file in batch_files])
			
 
				+
			
 
				+                    # 更新进度
			
 
				+                    progress = 50 + 15 * (i + len(batch_files)) / len(all_tmp_files)
			
 
				+                    update_trans_transfer_progress(self.paths_and_table.id,
			
 
				+                                                   round(progress, 2),
			
 
				+                                                   self.paths_and_table.save_db)
			
 
				 
			
 
				         except Exception as e:
			
 
				-            trans_print(traceback.format_exc())
			
 
				+            error(traceback.format_exc())
			
 
				             message = "保存文件错误,系统返回错误:" + str(e)
			
 
				             raise ValueError(message)
			
 
				 
			
 
				     def run(self):
			
 
				-        self.mutiprocessing_to_save_file()
			
 
				+        self.multiprocessing_to_save_file()
			
 
				         update_trans_transfer_progress(self.paths_and_table.id, 65,
			
 
				                                        self.paths_and_table.save_db)
			
--- a/etl/wind_power/min_sec/TransParam.py
+++ b/etl/wind_power/min_sec/TransParam.py
@@ -1,23 +1,58 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # @Time    : 2024/5/16
			
 
				 # @Author  : 魏志亮
			
 
				+from typing import Optional, Dict, List
			
 
				 
			
 
				 
			
 
				 class TransParam(object):
			
 
				+    """转换参数类
			
 
				+    
			
 
				+    存储数据转换过程中的各种参数配置
			
 
				+    """
			
 
				 
			
 
				-    def __init__(self, read_type=None, read_path=None, cols_tran=dict(),
			
 
				-                 wind_name_exec=str(), is_vertical_table=False, vertical_cols=list(), vertical_key=None,
			
 
				-                 vertical_value=None, index_cols=list(), merge_columns=False, resolve_col_prefix=None,
			
 
				-                 need_valid_cols=True, wind_col_trans: dict = None, boolean_sec_to_min=False):
			
 
				+    def __init__(self, 
			
 
				+                 read_type: Optional[str] = None, 
			
 
				+                 read_path: Optional[str] = None, 
			
 
				+                 cols_tran: Dict[str, str] = None,
			
 
				+                 wind_name_exec: str = "", 
			
 
				+                 is_vertical_table: bool = False, 
			
 
				+                 vertical_cols: List[str] = None,
			
 
				+                 vertical_key: Optional[str] = None,
			
 
				+                 vertical_value: Optional[str] = None, 
			
 
				+                 index_cols: List[str] = None, 
			
 
				+                 merge_columns: bool = False, 
			
 
				+                 resolve_col_prefix: Optional[str] = None,
			
 
				+                 need_valid_cols: bool = True, 
			
 
				+                 wind_col_trans: Optional[Dict[str, str]] = None, 
			
 
				+                 boolean_sec_to_min: bool = False):
			
 
				+        """
			
 
				+        初始化转换参数
			
 
				+        
			
 
				+        Args:
			
 
				+            read_type: 读取类型，如 'second' 或 'minute'
			
 
				+            read_path: 读取路径
			
 
				+            cols_tran: 列名转换映射
			
 
				+            wind_name_exec: 风机名称处理表达式
			
 
				+            is_vertical_table: 是否为垂直表
			
 
				+            vertical_cols: 垂直表列名列表
			
 
				+            vertical_key: 垂直表键列
			
 
				+            vertical_value: 垂直表值列
			
 
				+            index_cols: 索引列列表
			
 
				+            merge_columns: 是否合并列
			
 
				+            resolve_col_prefix: 列名前缀解析表达式
			
 
				+            need_valid_cols: 是否需要验证列
			
 
				+            wind_col_trans: 风机列转换映射
			
 
				+            boolean_sec_to_min: 是否将秒级数据转换为分钟级
			
 
				+        """
			
 
				         self.read_type = read_type
			
 
				         self.read_path = read_path
			
 
				-        self.cols_tran = cols_tran
			
 
				+        self.cols_tran = cols_tran or {}
			
 
				         self.is_vertical_table = is_vertical_table
			
 
				         self.wind_name_exec = wind_name_exec
			
 
				-        self.vertical_cols = vertical_cols
			
 
				+        self.vertical_cols = vertical_cols or []
			
 
				         self.vertical_key = vertical_key
			
 
				         self.vertical_value = vertical_value
			
 
				-        self.index_cols = index_cols
			
 
				+        self.index_cols = index_cols or []
			
 
				         self.merge_columns = merge_columns
			
 
				         self.resolve_col_prefix = resolve_col_prefix
			
 
				         self.need_valid_cols = need_valid_cols
			
--- a/etl/wind_power/wave/WaveTrans.py
+++ b/etl/wind_power/wave/WaveTrans.py
@@ -1,14 +1,16 @@
 
				 import json
			
 
				 import multiprocessing
			
 
				 import traceback
			
 
				+from typing import Tuple
			
 
				 
			
 
				+from conf.constants import ParallelProcessing, Types
			
 
				 from service.plt_service import get_all_wind
			
 
				 from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
			
 
				     update_trans_status_success, update_trans_status_error
			
 
				 from service.trans_service import get_wave_conf, save_df_to_db, get_or_create_wave_table, \
			
 
				     get_wave_data, delete_exist_wave_data
			
 
				 from utils.file.trans_methods import *
			
 
				-from utils.log.trans_log import set_trance_id
			
 
				+from utils.log.trans_log import set_trance_id, info, error
			
 
				 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
			
 
				 
			
 
				 exec("from os.path import *")
			
@@ -16,8 +18,17 @@ exec("import re")
 
				 
			
 
				 
			
 
				 class WaveTrans(object):
			
 
				-
			
 
				-    def __init__(self, id, wind_farm_code, read_dir):
			
 
				+    """波形数据转换类"""
			
 
				+
			
 
				+    def __init__(self, id: int, wind_farm_code: str, read_dir: str):
			
 
				+        """
			
 
				+        初始化波形数据转换类
			
 
				+        
			
 
				+        Args:
			
 
				+            id: 任务ID
			
 
				+            wind_farm_code: 风电场编码
			
 
				+            read_dir: 读取目录
			
 
				+        """
			
 
				         self.id = id
			
 
				         self.wind_farm_code = wind_farm_code
			
 
				         self.read_dir = read_dir
			
@@ -28,11 +39,28 @@ class WaveTrans(object):
 
				         self.max_date = None
			
 
				         self.data_count = 0
			
 
				 
			
 
				-    def get_data_exec(self, func_code, filepath, measupoint_names: set):
			
 
				+    def get_data_exec(self, func_code: str, filepath: str, measupoint_names: List[str]) -> Optional[Tuple]:
			
 
				+        """
			
 
				+        执行数据获取函数
			
 
				+        
			
 
				+        Args:
			
 
				+            func_code: 函数代码
			
 
				+            filepath: 文件路径
			
 
				+            measupoint_names: 测量点名称列表
			
 
				+        
			
 
				+        Returns:
			
 
				+            数据元组
			
 
				+        """
			
 
				         exec(func_code)
			
 
				         return locals()['get_data'](filepath, measupoint_names)
			
 
				 
			
 
				-    def del_exists_data(self, df):
			
 
				+    def del_exists_data(self, df: pd.DataFrame):
			
 
				+        """
			
 
				+        删除已存在的数据
			
 
				+        
			
 
				+        Args:
			
 
				+            df: 数据帧
			
 
				+        """
			
 
				         min_date, max_date = df['time_stamp'].min(), df['time_stamp'].max()
			
 
				         db_df = get_wave_data(self.wind_farm_code + '_wave', min_date, max_date)
			
 
				 
			
@@ -44,13 +72,17 @@ class WaveTrans(object):
 
				             delete_exist_wave_data(self.wind_farm_code + "_wave", ids)
			
 
				 
			
 
				     def run(self):
			
 
				+        """运行波形数据转换"""
			
 
				         update_trans_status_running(self.id)
			
 
				         trance_id = '-'.join([self.wind_farm_code, 'wave'])
			
 
				         set_trance_id(trance_id)
			
 
				         all_files = read_files(self.read_dir, ['txt', 'csv'])
			
 
				         update_trans_transfer_progress(self.id, 5)
			
 
				+
			
 
				         # 最大取系统cpu的 1/2
			
 
				         split_count = get_available_cpu_count_with_percent(1 / 2)
			
 
				+        # 限制最大进程数
			
 
				+        split_count = min(split_count, ParallelProcessing.MAX_PROCESSES)
			
 
				 
			
 
				         all_wind, _ = get_all_wind(self.wind_farm_code, False)
			
 
				 
			
@@ -58,11 +90,11 @@ class WaveTrans(object):
 
				 
			
 
				         wave_conf = get_wave_conf(self.wind_farm_code)
			
 
				 
			
 
				-        base_param_exec = wave_conf['base_param_exec']
			
 
				+        base_param_exec = wave_conf.get('base_param_exec', '')
			
 
				         map_dict = {}
			
 
				         if base_param_exec:
			
 
				             base_param_exec = base_param_exec.replace('\r\n', '\n').replace('\t', '    ')
			
 
				-            trans_print(base_param_exec)
			
 
				+            info(base_param_exec)
			
 
				             if 'import ' in base_param_exec:
			
 
				                 raise Exception("方法不支持import方法")
			
 
				 
			
@@ -72,23 +104,26 @@ class WaveTrans(object):
 
				 
			
 
				         wind_turbine_name_set = set()
			
 
				 
			
 
				-        all_array = split_array(all_files, split_count * 10)
			
 
				+        # 优化批次大小
			
 
				+        batch_size = split_count * 10
			
 
				+        all_array = split_array(all_files, batch_size)
			
 
				         total_index = len(all_array)
			
 
				+
			
 
				         for index, now_array in enumerate(all_array):
			
 
				             index_begin = datetime.datetime.now()
			
 
				             with multiprocessing.Pool(split_count) as pool:
			
 
				                 try:
			
 
				                     file_datas = pool.starmap(self.get_data_exec,
			
 
				                                               [(base_param_exec, i, list(map_dict.keys())) for i in now_array])
			
 
				-                    trans_print(f'总数:{len(now_array)},返回个数{len(file_datas)}')
			
 
				+                    info(f'总数:{len(now_array)},返回个数{len(file_datas)}')
			
 
				                 except Exception as e:
			
 
				                     message = str(e)
			
 
				-                    trans_print(traceback.format_exc())
			
 
				+                    error(traceback.format_exc())
			
 
				                     update_trans_status_error(self.id, message[0:len(message) if len(message) < 100 else 100])
			
 
				                     raise e
			
 
				 
			
 
				             update_trans_transfer_progress(self.id, 20 + int(index / total_index * 60))
			
 
				-            trans_print("读取文件耗时:", datetime.datetime.now() - self.begin)
			
 
				+            info("读取文件耗时:", datetime.datetime.now() - self.begin)
			
 
				 
			
 
				             result_list = list()
			
 
				             for file_data in file_datas:
			
@@ -96,7 +131,7 @@ class WaveTrans(object):
 
				                     wind_turbine_name, time_stamp, sampling_frequency, rotational_speed, mesure_point_name, type, mesure_data = \
			
 
				                         file_data[0], file_data[1], file_data[2], file_data[3], file_data[4], file_data[5], file_data[6]
			
 
				 
			
 
				-                    if mesure_point_name in map_dict.keys():
			
 
				+                    if mesure_point_name in map_dict:
			
 
				                         wind_turbine_name_set.add(wind_turbine_name)
			
 
				                         if self.min_date is None or self.min_date > time_stamp:
			
 
				                             self.min_date = time_stamp
			
@@ -109,7 +144,7 @@ class WaveTrans(object):
 
				                              mesure_data])
			
 
				 
			
 
				             if result_list:
			
 
				-                self.data_count = self.data_count + len(result_list)
			
 
				+                self.data_count += len(result_list)
			
 
				                 df = pd.DataFrame(result_list,
			
 
				                                   columns=['wind_turbine_name', 'time_stamp', 'rotational_speed', 'sampling_frequency',
			
 
				                                            'mesure_point_name', 'type', 'mesure_data'])
			
@@ -118,16 +153,16 @@ class WaveTrans(object):
 
				                 df.dropna(subset=['mesure_point_name'], inplace=True)
			
 
				                 df['wind_turbine_number'] = df['wind_turbine_name'].map(all_wind).fillna(df['wind_turbine_name'])
			
 
				 
			
 
				+                # 批量处理JSON序列化
			
 
				                 df['mesure_data'] = df['mesure_data'].apply(lambda x: json.dumps(x))
			
 
				 
			
 
				                 df.sort_values(by=['time_stamp', 'mesure_point_name'], inplace=True)
			
 
				                 # self.del_exists_data(df)
			
 
				                 save_df_to_db(self.wind_farm_code + '_wave', df, batch_count=400)
			
 
				-            trans_print(f"总共{total_index}组,当前{index + 1}", "本次写入耗时:", datetime.datetime.now() - index_begin,
			
 
				-                        "总耗时:", datetime.datetime.now() - self.begin)
			
 
				+            info(f"总共{total_index}组,当前{index + 1}", "本次写入耗时:", datetime.datetime.now() - index_begin,
			
 
				+                 "总耗时:", datetime.datetime.now() - self.begin)
			
 
				 
			
 
				-        update_trans_status_success(self.id, len(wind_turbine_name_set), None,
			
 
				+        update_trans_status_success(self.id, len(wind_turbine_name_set), Types.WAVE,
			
 
				                                     self.min_date, self.max_date, self.data_count)
			
 
				 
			
 
				-        # update_trans_status_success(self.id)
			
 
				-        trans_print("总耗时:", datetime.datetime.now() - self.begin)
			
 
				+        info("总耗时:", datetime.datetime.now() - self.begin)
			
--- a/service/common_connect.py
+++ b/service/common_connect.py
@@ -1,5 +1,5 @@
 
				-from utils.db.ConnectMysql import ConnectMysql
			
 
				+from utils.db.ConnectMysql import MySQLDatabase
			
 
				 
			
 
				-plt = ConnectMysql("plt")
			
 
				+plt = MySQLDatabase("plt")
			
 
				 
			
 
				-trans = ConnectMysql("trans")
			
 
				+trans = MySQLDatabase("trans")
			
--- a/service/trans_conf_service.py
+++ b/service/trans_conf_service.py
@@ -4,6 +4,7 @@
 
				 from datetime import datetime
			
 
				 
			
 
				 from service.common_connect import trans
			
 
				+from utils.log.trans_log import info
			
 
				 
			
 
				 
			
 
				 def update_timeout_trans_data():
			
@@ -46,6 +47,7 @@ def update_trans_status_error(id, message="", save_db=True):
 
				 
			
 
				         message = message if len(message) <= 200 else message[0:200]
			
 
				         trans.execute(exec_sql, (message, id))
			
 
				+    info("执行失败:", message)
			
 
				 
			
 
				 
			
 
				 def update_trans_status_success(id, wind_count=0, time_granularity=0,
			
@@ -70,14 +72,16 @@ def update_trans_status_success(id, wind_count=0, time_granularity=0,
 
				             trans.execute(exec_sql, (wind_count, time_granularity, id))
			
 
				 
			
 
				 
			
 
				-def update_trans_transfer_progress(id,  transfer_progress=0, save_db=True):
			
 
				-    print(id,  transfer_progress)
			
 
				+def update_trans_transfer_progress(id, transfer_progress=0, save_db=True):
			
 
				+    print(id, transfer_progress)
			
 
				     if save_db:
			
 
				         exec_sql = """
			
 
				         update data_transfer set transfer_progress =%s where id = %s 
			
 
				         """
			
 
				         trans.execute(exec_sql, (int(transfer_progress), id))
			
 
				 
			
 
				+    info('当前进度:', transfer_progress)
			
 
				+
			
 
				 
			
 
				 def get_now_running_count():
			
 
				     query_running_sql = """
			
--- a/service/trans_service.py
+++ b/service/trans_service.py
@@ -9,53 +9,65 @@ import pandas as pd
 
				 from service.common_connect import trans
			
 
				 from service.trans_conf_service import create_wave_table
			
 
				 from utils.file.trans_methods import split_array
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import info, error
			
 
				 
			
 
				 
			
 
				-def get_min_sec_conf(field_code, trans_type) -> dict:
			
 
				-    query_sql = "SELECT * FROM trans_conf where wind_code = %s and type = %s and status = 1"
			
 
				-    res = trans.execute(query_sql, (field_code, trans_type))
			
 
				+def get_config(table_name, field_code, trans_type=None, field_name='wind_code', status=1) -> dict:
			
 
				+    """
			
 
				+    通用配置获取函数
			
 
				+    
			
 
				+    Args:
			
 
				+        table_name: 表名
			
 
				+        field_code: 字段值
			
 
				+        trans_type: 类型参数
			
 
				+        field_name: 字段名，默认为wind_code
			
 
				+        status: 状态值，默认为1
			
 
				+        
			
 
				+    Returns:
			
 
				+        配置字典
			
 
				+    """
			
 
				+    if table_name == 'warn_fault_conf':
			
 
				+        types = list()
			
 
				+        if trans_type == 'fault':
			
 
				+            types.append(1)
			
 
				+        elif trans_type == 'warn':
			
 
				+            types.append(2)
			
 
				+        else:
			
 
				+            error(f"未找到{trans_type}告警/故障的配置")
			
 
				+            raise ValueError(f"未找到{trans_type}告警/故障的配置")
			
 
				+        types.append(3)
			
 
				+        query_sql = f"SELECT * FROM {table_name} where {field_name} = %s and type in %s and status = %s"
			
 
				+        params = (field_code, types, status)
			
 
				+    elif table_name == 'trans_conf' and field_name == 'wind_name':
			
 
				+        query_sql = f"SELECT * FROM {table_name} where {field_name} = %s and type = %s and status = %s"
			
 
				+        params = (field_code, trans_type, status)
			
 
				+    elif table_name == 'trans_conf':
			
 
				+        query_sql = f"SELECT * FROM {table_name} where {field_name} = %s and type = %s and status = %s"
			
 
				+        params = (field_code, trans_type, status)
			
 
				+    else:
			
 
				+        query_sql = f"SELECT * FROM {table_name} where {field_name} = %s and status = %s"
			
 
				+        params = (field_code, status)
			
 
				+
			
 
				+    res = trans.execute(query_sql, params)
			
 
				     if type(res) == tuple or type(res) == str:
			
 
				         return None
			
 
				     return res[0]
			
 
				 
			
 
				 
			
 
				-def get_min_sec_conf_test(field_code, trans_type) -> dict:
			
 
				-    query_sql = "SELECT * FROM trans_conf where wind_name = %s and type = %s and status = 1"
			
 
				-    res = trans.execute(query_sql, (field_code, trans_type))
			
 
				-    print(res)
			
 
				-    if type(res) == tuple or type(res) == str:
			
 
				-        return None
			
 
				-    return res[0]
			
 
				+def get_min_sec_conf(field_code, trans_type) -> dict:
			
 
				+    return get_config('trans_conf', field_code, trans_type)
			
 
				 
			
 
				 
			
 
				-def get_fault_warn_conf(field_code, trans_type) -> dict:
			
 
				-    types = list()
			
 
				-    if trans_type == 'fault':
			
 
				-        types.append(1)
			
 
				-    elif trans_type == 'warn':
			
 
				-        types.append(2)
			
 
				-    else:
			
 
				-        trans_print(f"未找到{trans_type}告警/故障的配置")
			
 
				-        raise ValueError(f"未找到{trans_type}告警/故障的配置")
			
 
				+def get_min_sec_conf_test(field_code, trans_type) -> dict:
			
 
				+    return get_config('trans_conf', field_code, trans_type, field_name='wind_name')
			
 
				 
			
 
				-    types.append(3)
			
 
				 
			
 
				-    query_sql = "SELECT * FROM warn_fault_conf where wind_code = %s and type in %s and status = 1"
			
 
				-    res = trans.execute(query_sql, (field_code, types))
			
 
				-    print(res)
			
 
				-    if type(res) == tuple or type(res) == str:
			
 
				-        return None
			
 
				-    return res[0]
			
 
				+def get_fault_warn_conf(field_code, trans_type) -> dict:
			
 
				+    return get_config('warn_fault_conf', field_code, trans_type)
			
 
				 
			
 
				 
			
 
				 def get_wave_conf(field_code) -> dict:
			
 
				-    query_sql = "SELECT * FROM wave_conf where wind_code = %s and status = 1"
			
 
				-    res = trans.execute(query_sql, (field_code))
			
 
				-    print(res)
			
 
				-    if type(res) == tuple or type(res) == str:
			
 
				-        return None
			
 
				-    return res[0]
			
 
				+    return get_config('wave_conf', field_code)
			
 
				 
			
 
				 
			
 
				 def creat_min_sec_table(table_name, trans_type, wind_farm_name='', use_tidb=False):
			
@@ -64,7 +76,7 @@ def creat_min_sec_table(table_name, trans_type, wind_farm_name='', use_tidb=Fals
 
				     """
			
 
				     count = trans.execute(exists_table_sql)[0]['count']
			
 
				     if count > 0:
			
 
				-        trans_print(f"{table_name}已存在")
			
 
				+        info(f"{table_name}已存在")
			
 
				 
			
 
				     if trans_type == 'second':
			
 
				         add_key = 'KEY `year_month` (`year_month`)'
			
@@ -197,52 +209,63 @@ def drop_exists_data(table_name, wind_turbine_number, min_date, max_date):
 
				     """
			
 
				 
			
 
				     count = trans.execute(sql)
			
 
				-    trans_print(f"删除数据{count}条，{table_name},{wind_turbine_number},{min_date},{max_date}")
			
 
				-
			
 
				+    info(f"删除数据{count}条，{table_name},{wind_turbine_number},{min_date},{max_date}")
			
 
				 
			
 
				-def save_scada_file_to_db(table_name, file: str, wind_turbine_number, date_str, batch_count=100000, use_tidb=False):
			
 
				-    base_name = path.basename(file)
			
 
				-    df = pd.read_csv(file)
			
 
				-    # if use_tidb:
			
 
				-    #     min_date = df['time_stamp'].min()
			
 
				-    #     max_date = df['time_stamp'].max()
			
 
				-    #     # drop_exists_data(table_name, wind_turbine_number, min_date, max_date)
			
 
				-    # else:
			
 
				-    #     add_or_remove_partation(table_name, date_str, wind_turbine_number)
			
 
				-
			
 
				-    add_or_remove_partation(table_name, date_str, wind_turbine_number)
			
 
				 
			
 
				+def save_data_to_db(table_name: str, data, batch_count=100000, wind_turbine_number=None, date_str=None, file_name=None):
			
 
				+    """
			
 
				+    通用数据保存函数
			
 
				+    
			
 
				+    Args:
			
 
				+        table_name: 表名
			
 
				+        data: 数据，可以是DataFrame或文件路径
			
 
				+        batch_count: 批处理大小
			
 
				+        wind_turbine_number: 风机编号
			
 
				+        date_str: 日期字符串
			
 
				+        file_name: 文件名
			
 
				+        
			
 
				+    Returns:
			
 
				+        None
			
 
				+    """
			
 
				     try:
			
 
				-        trans_print(f"保存{table_name},{base_name},{wind_turbine_number},数据：{df.shape[0]}")
			
 
				-        trans.execute_df_save(df, table_name, batch_count)
			
 
				-        trans_print(f"保存到{table_name},{base_name},{wind_turbine_number} 成功,总条数:{df.shape[0]}")
			
 
				+        # 处理数据
			
 
				+        if isinstance(data, str):
			
 
				+            # 从文件读取数据
			
 
				+            df = pd.read_csv(data)
			
 
				+            file_name = file_name or path.basename(data)
			
 
				+        else:
			
 
				+            # 直接使用DataFrame
			
 
				+            df = data
			
 
				+
			
 
				+        # 处理分区
			
 
				+        if wind_turbine_number and date_str:
			
 
				+            add_or_remove_partation(table_name, date_str, wind_turbine_number)
			
 
				+
			
 
				+        # 保存数据
			
 
				+        if wind_turbine_number:
			
 
				+            trans.execute_df_save(df, table_name, batch_count)
			
 
				+            info(f"保存到{table_name},{file_name},{wind_turbine_number} 成功,总条数:{df.shape[0]}")
			
 
				+        else:
			
 
				+            trans.execute_df_save(df, table_name, batch_count)
			
 
				+            info(f"保存到{table_name}成功,总条数:{df.shape[0]}")
			
 
				     except Exception as e:
			
 
				-        trans_print(traceback.format_exc())
			
 
				-        message = base_name + str(e)
			
 
				+        if file_name:
			
 
				+            message = file_name + str(e)
			
 
				+        else:
			
 
				+            message = str(e)
			
 
				         raise Exception(message)
			
 
				 
			
 
				 
			
 
				+def save_scada_file_to_db(table_name, file: str, wind_turbine_number, date_str, batch_count=100000, use_tidb=False):
			
 
				+    save_data_to_db(table_name, file, batch_count, wind_turbine_number, date_str)
			
 
				+
			
 
				+
			
 
				 def save_file_to_db(table_name: str, file: str, batch_count=100000):
			
 
				-    base_name = path.basename(file)
			
 
				-    try:
			
 
				-        df = pd.read_csv(file)
			
 
				-        trans_print(f"保存{table_name},总条数：{df.shape[0]}")
			
 
				-        trans.execute_df_save(df, table_name, batch_count)
			
 
				-        trans_print(f"保存到{table_name}成功,总条数:{df.shape[0]}")
			
 
				-    except Exception as e:
			
 
				-        trans_print(traceback.format_exc())
			
 
				-        message = base_name + str(e)
			
 
				-        raise Exception(message)
			
 
				+    save_data_to_db(table_name, file, batch_count)
			
 
				 
			
 
				 
			
 
				-def save_df_to_db(table_name: str, df: pd.DataFrame(), batch_count=100000):
			
 
				-    try:
			
 
				-        trans_print(f"保存{table_name},总条数：{df.shape[0]}")
			
 
				-        trans.execute_df_save(df, table_name, batch_count)
			
 
				-        trans_print(f"保存到{table_name}成功,总条数:{df.shape[0]}")
			
 
				-    except Exception as e:
			
 
				-        trans_print(traceback.format_exc())
			
 
				-        raise Exception(str(e))
			
 
				+def save_df_to_db(table_name: str, df: pd.DataFrame, batch_count=100000):
			
 
				+    save_data_to_db(table_name, df, batch_count)
			
 
				 
			
 
				 
			
 
				 def batch_statistics(table_name):
			
@@ -251,7 +274,7 @@ def batch_statistics(table_name):
 
				         res = trans.execute(query_sql)
			
 
				         return res[0]
			
 
				     except:
			
 
				-        trans_print(traceback.format_exc())
			
 
				+        error(traceback.format_exc())
			
 
				         return None
			
 
				 
			
 
				 
			
@@ -319,7 +342,7 @@ def get_trans_exec_code(id, query_type):
 
				     if type(res) == tuple or type(res) == str:
			
 
				         return None
			
 
				     exec_code = res[0]['exec_code']
			
 
				-    trans_print("任务ID", id, '类型', type, '获取到执行代码:', exec_code)
			
 
				+    info("任务ID", id, '类型', type, '获取到执行代码:', exec_code)
			
 
				     return exec_code
			
 
				 
			
 
				 
			
--- a/utils/common.py
+++ b/utils/common.py
@@ -1,3 +1,5 @@
 
				-excel_types = ['xls', 'xlsx', 'xlsm', 'xlsb', 'odf', 'ods', 'csv', 'csv.gz']
			
 
				+from conf.constants import FileTypes
			
 
				 
			
 
				-zip_types = ['rar', 'zip']
			
 
				+excel_types = FileTypes.EXCEL_TYPES
			
 
				+
			
 
				+zip_types = FileTypes.ZIP_TYPES
			
--- a/utils/conf/read_conf.py
+++ b/utils/conf/read_conf.py
@@ -1,22 +1,147 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # @Time    : 2024/6/7
			
 
				 # @Author  : 魏志亮
			
 
				+import os
			
 
				 
			
 
				 import yaml
			
 
				+from typing import Any, Optional, Dict
			
 
				 
			
 
				 
			
 
				-def yaml_conf(path, encoding='utf-8'):
			
 
				-    with open(path, 'r', encoding=encoding) as f:
			
 
				-        data = yaml.safe_load(f)
			
 
				-    return data
			
 
				+def load_yaml_config(file_path: str, encoding: str = 'utf-8') -> Dict[str, Any]:
			
 
				+    """
			
 
				+    加载YAML配置文件
			
 
				+    
			
 
				+    Args:
			
 
				+        file_path: YAML文件路径
			
 
				+        encoding: 文件编码，默认为utf-8
			
 
				+        
			
 
				+    Returns:
			
 
				+        解析后的配置字典
			
 
				+        
			
 
				+    Raises:
			
 
				+        FileNotFoundError: 文件不存在时抛出
			
 
				+        yaml.YAMLError: YAML解析错误时抛出
			
 
				+    """
			
 
				+    try:
			
 
				+        with open(file_path, 'r', encoding=encoding) as f:
			
 
				+            data = yaml.safe_load(f)
			
 
				+            # 确保返回字典类型，防止YAML文件为空时返回None
			
 
				+            return data if isinstance(data, dict) else {}
			
 
				+    except FileNotFoundError:
			
 
				+        raise FileNotFoundError(f"配置文件不存在: {file_path}")
			
 
				+    except yaml.YAMLError as e:
			
 
				+        raise yaml.YAMLError(f"YAML解析错误: {e}")
			
 
				 
			
 
				 
			
 
				-def read_conf(dict_conf, col, default_value=None):
			
 
				-    if col in dict_conf:
			
 
				-        res = dict_conf[col]
			
 
				-        if res is None and default_value is not None:
			
 
				-            return default_value
			
 
				-        return res
			
 
				-    else:
			
 
				-        return default_value
			
 
				+def get_config_value(config: Dict[str, Any], key: str, default: Optional[Any] = None) -> Any:
			
 
				+    """
			
 
				+    从配置字典中安全地获取值
			
 
				+    
			
 
				+    Args:
			
 
				+        config: 配置字典
			
 
				+        key: 配置键名
			
 
				+        default: 默认值，当键不存在或值为None时返回
			
 
				+        
			
 
				+    Returns:
			
 
				+        配置值或默认值
			
 
				+    """
			
 
				+    # 处理config为None的情况
			
 
				+    if config is None:
			
 
				+        return default
			
 
				+    
			
 
				+    # 支持嵌套键，如 "database.host"
			
 
				+    keys = key.split('.')
			
 
				+    value = config
			
 
				+    
			
 
				+    for k in keys:
			
 
				+        if isinstance(value, dict) and k in value:
			
 
				+            value = value[k]
			
 
				+        else:
			
 
				+            value = None
			
 
				+            break
			
 
				+    
			
 
				+    # 如果值为None且提供了默认值，返回默认值
			
 
				+    if value is None and default is not None:
			
 
				+        return default
			
 
				+    
			
 
				+    return value
			
 
				 
			
 
				+
			
 
				+def merge_configs(base_config: Dict[str, Any], override_config: Dict[str, Any]) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    合并配置字典
			
 
				+    
			
 
				+    Args:
			
 
				+        base_config: 基础配置
			
 
				+        override_config: 覆盖配置
			
 
				+        
			
 
				+    Returns:
			
 
				+        合并后的配置
			
 
				+    """
			
 
				+    result = base_config.copy()
			
 
				+    
			
 
				+    for key, value in override_config.items():
			
 
				+        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
			
 
				+            # 递归合并嵌套字典
			
 
				+            result[key] = merge_configs(result[key], value)
			
 
				+        else:
			
 
				+            # 直接覆盖
			
 
				+            result[key] = value
			
 
				+    
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def load_config_with_env(file_path: str, encoding: str = 'utf-8') -> Dict[str, Any]:
			
 
				+    """
			
 
				+    加载配置文件并支持环境变量覆盖
			
 
				+    
			
 
				+    Args:
			
 
				+        file_path: YAML文件路径
			
 
				+        encoding: 文件编码，默认为utf-8
			
 
				+        
			
 
				+    Returns:
			
 
				+        解析后的配置字典
			
 
				+    """
			
 
				+    # 加载基础配置
			
 
				+    base_config = load_yaml_config(file_path, encoding)
			
 
				+    
			
 
				+    # 检查是否有环境变量覆盖
			
 
				+    env_prefix = "ETL_"
			
 
				+    override_config = {}
			
 
				+    
			
 
				+    for key, value in os.environ.items():
			
 
				+        if key.startswith(env_prefix):
			
 
				+            # 转换环境变量名到配置键名
			
 
				+            config_key = key[len(env_prefix):].lower().replace('_', '.')
			
 
				+            
			
 
				+            # 解析值
			
 
				+            if value.lower() == 'true':
			
 
				+                parsed_value = True
			
 
				+            elif value.lower() == 'false':
			
 
				+                parsed_value = False
			
 
				+            elif value.isdigit():
			
 
				+                parsed_value = int(value)
			
 
				+            elif '.' in value and all(part.isdigit() for part in value.split('.')):
			
 
				+                parsed_value = float(value)
			
 
				+            else:
			
 
				+                parsed_value = value
			
 
				+            
			
 
				+            # 构建嵌套配置
			
 
				+            keys = config_key.split('.')
			
 
				+            current = override_config
			
 
				+            for k in keys[:-1]:
			
 
				+                if k not in current:
			
 
				+                    current[k] = {}
			
 
				+                current = current[k]
			
 
				+            current[keys[-1]] = parsed_value
			
 
				+    
			
 
				+    # 合并配置
			
 
				+    if override_config:
			
 
				+        base_config = merge_configs(base_config, override_config)
			
 
				+    
			
 
				+    return base_config
			
 
				+
			
 
				+
			
 
				+# 为了保持向后兼容，保留原函数名（可选）
			
 
				+yaml_conf = load_yaml_config
			
 
				+read_conf = get_config_value
			
--- a/utils/db/ConnectMysql.py
+++ b/utils/db/ConnectMysql.py
@@ -1,56 +1,246 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Time    : 2024/6/7
			
 
				+# @Author  : 魏志亮
			
 
				+
			
 
				+import os
			
 
				 import traceback
			
 
				-from os import *
			
 
				+from typing import Any, Dict, List, Tuple, Union
			
 
				 
			
 
				 import pandas as pd
			
 
				 import pymysql
			
 
				 from pymysql.cursors import DictCursor
			
 
				 from sqlalchemy import create_engine
			
 
				+from sqlalchemy.engine import Engine
			
 
				+
			
 
				+from utils.conf.read_conf import load_yaml_config
			
 
				+from utils.log.trans_log import error, info, debug
			
 
				+
			
 
				+
			
 
				+class MySQLDatabase:
			
 
				+    """MySQL数据库连接管理类"""
			
 
				+
			
 
				+    # 类级别的引擎缓存，避免重复创建
			
 
				+    _engine_cache = {}
			
 
				+
			
 
				+    def __init__(self, connection_name: str):
			
 
				+        """
			
 
				+        初始化MySQL数据库连接
			
 
				+        
			
 
				+        Args:
			
 
				+            connection_name: 配置文件中对应的连接名称
			
 
				+        """
			
 
				+        # 获取配置文件路径
			
 
				+        config_path = os.environ.get('ETL_CONF')
			
 
				+        if not config_path:
			
 
				+            raise ValueError("环境变量 ETL_CONF 未设置")
			
 
				+
			
 
				+        # 加载配置
			
 
				+        self.yaml_data = load_yaml_config(config_path)
			
 
				+        self.connection_name = connection_name
			
 
				+
			
 
				+        # 验证配置是否存在
			
 
				+        if connection_name not in self.yaml_data:
			
 
				+            raise KeyError(f"配置中不存在连接名称: {connection_name}")
			
 
				+
			
 
				+        self.config = self.yaml_data[connection_name]
			
 
				+        self.database = self.config.get('database', '')
			
 
				+
			
 
				+        # 验证必要配置项
			
 
				+        required_keys = ['host', 'user', 'password', 'database']
			
 
				+        missing_keys = [key for key in required_keys if key not in self.config]
			
 
				+        if missing_keys:
			
 
				+            raise KeyError(f"连接配置缺少必要项: {missing_keys}")
			
 
				+
			
 
				+    def get_connection(self) -> pymysql.Connection:
			
 
				+        """
			
 
				+        从连接池中获取一个连接
			
 
				+        
			
 
				+        Returns:
			
 
				+            pymysql连接对象
			
 
				+        """
			
 
				+        # 创建连接配置副本，避免修改原配置
			
 
				+        conn_config = self.config.copy()
			
 
				+        # 移除可能不需要的配置项（如果有）
			
 
				+        conn_config.pop('charset', None)  # pymysql连接时charset参数可能会冲突
			
 
				+
			
 
				+        return pymysql.connect(
			
 
				+            cursorclass=DictCursor,
			
 
				+            charset='utf8mb4',
			
 
				+            **conn_config
			
 
				+        )
			
 
				+
			
 
				+    def execute_query(self, sql: str, params: Union[Tuple, List, Dict] = None) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        执行SQL查询并返回结果
			
 
				+        
			
 
				+        Args:
			
 
				+            sql: SQL语句
			
 
				+            params: SQL参数，可以是元组、列表或字典
			
 
				+            
			
 
				+        Returns:
			
 
				+            查询结果列表，每个元素为字典形式
			
 
				+            
			
 
				+        Raises:
			
 
				+            Exception: SQL执行错误时抛出
			
 
				+        """
			
 
				+        params = params or ()
			
 
				+        conn = None
			
 
				+        cursor = None
			
 
				+
			
 
				+        try:
			
 
				+            conn = self.get_connection()
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            # 执行SQL
			
 
				+            cursor.execute(sql, params)
			
 
				+            debug("开始执行SQL:\n", cursor._executed)
			
 
				 
			
 
				-from utils.conf.read_conf import yaml_conf
			
 
				-from utils.log.trans_log import trans_print
			
 
				+            # 提交事务
			
 
				+            conn.commit()
			
 
				 
			
 
				+            # 获取结果
			
 
				+            result = cursor.fetchall()
			
 
				+            return result
			
 
				 
			
 
				-class ConnectMysql:
			
 
				+        except Exception as e:
			
 
				+            error(f"执行SQL出错: {sql}")
			
 
				+            error(f"错误信息: {e}")
			
 
				+            error(traceback.format_exc())
			
 
				 
			
 
				-    def __init__(self, connet_name):
			
 
				-        self.yaml_data = yaml_conf(environ.get('ETL_CONF'))
			
 
				-        self.connet_name = connet_name
			
 
				-        self.config = self.yaml_data[self.connet_name]
			
 
				-        self.database = self.config['database']
			
 
				+            if conn:
			
 
				+                conn.rollback()
			
 
				+            raise e
			
 
				 
			
 
				-    # 从连接池中获取一个连接
			
 
				-    def get_conn(self):
			
 
				-        return pymysql.connect(**self.config)
			
 
				+        finally:
			
 
				+            # 确保资源被释放
			
 
				+            if cursor:
			
 
				+                cursor.close()
			
 
				+            if conn:
			
 
				+                conn.close()
			
 
				 
			
 
				-    # 使用连接执行sql
			
 
				-    def execute(self, sql, params=tuple()):
			
 
				+    def execute_update(self, sql: str, params: Union[Tuple, List, Dict] = None) -> int:
			
 
				+        """
			
 
				+        执行更新操作（INSERT, UPDATE, DELETE）
			
 
				+        
			
 
				+        Args:
			
 
				+            sql: SQL语句
			
 
				+            params: SQL参数
			
 
				+            
			
 
				+        Returns:
			
 
				+            影响的行数
			
 
				+        """
			
 
				+        params = params or ()
			
 
				+        conn = None
			
 
				+        cursor = None
			
 
				 
			
 
				-        with self.get_conn() as conn:
			
 
				-            with conn.cursor(cursor=DictCursor) as cursor:
			
 
				-                try:
			
 
				-                    cursor.execute(sql, params)
			
 
				-                    trans_print("开始执行SQL:", cursor._executed)
			
 
				-                    conn.commit()
			
 
				-                    result = cursor.fetchall()
			
 
				-                    return result
			
 
				-                except Exception as e:
			
 
				-                    trans_print(f"执行sql：{sql}，报错：{e}")
			
 
				-                    trans_print(traceback.format_exc())
			
 
				-                    conn.rollback()
			
 
				-                    raise e
			
 
				+        try:
			
 
				+            conn = self.get_connection()
			
 
				+            cursor = conn.cursor()
			
 
				 
			
 
				-    def get_engine(self):
			
 
				+            cursor.execute(sql, params)
			
 
				+            debug("开始执行SQL:", cursor._executed)
			
 
				+
			
 
				+            conn.commit()
			
 
				+            return cursor.rowcount
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            error(f"执行更新SQL出错: {sql}")
			
 
				+            error(f"错误信息: {e}")
			
 
				+            error(traceback.format_exc())
			
 
				+
			
 
				+            if conn:
			
 
				+                conn.rollback()
			
 
				+            raise e
			
 
				+
			
 
				+        finally:
			
 
				+            if cursor:
			
 
				+                cursor.close()
			
 
				+            if conn:
			
 
				+                conn.close()
			
 
				+
			
 
				+    def get_engine(self) -> Engine:
			
 
				+        """
			
 
				+        获取SQLAlchemy引擎，使用缓存避免重复创建
			
 
				+        
			
 
				+        Returns:
			
 
				+            SQLAlchemy引擎对象
			
 
				+        """
			
 
				+        # 构建缓存键
			
 
				         config = self.config
			
 
				-        username = config['user']
			
 
				-        password = config['password']
			
 
				-        host = config['host']
			
 
				-        port = config['port']
			
 
				-        dbname = config['database']
			
 
				-        return create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{dbname}')
			
 
				-
			
 
				-    def execute_df_save(self, df, table_name, chunk_size=10000):
			
 
				-        df.to_sql(table_name, self.get_engine(), index=False, if_exists='append', chunksize=chunk_size)
			
 
				-
			
 
				-    def read_sql_to_df(self, sql):
			
 
				-        df = pd.read_sql_query(sql, self.get_engine())
			
 
				-        return df
			
 
				+        cache_key = f"{config['host']}:{config['port']}:{config['user']}:{config['database']}"
			
 
				+
			
 
				+        # 检查缓存中是否已有引擎
			
 
				+        if cache_key not in self._engine_cache:
			
 
				+            username = config['user']
			
 
				+            password = config['password']
			
 
				+            host = config['host']
			
 
				+            port = config['port']
			
 
				+            dbname = config['database']
			
 
				+
			
 
				+            # 构建连接URL
			
 
				+            connection_url = f'mysql+pymysql://{username}:{password}@{host}:{port}/{dbname}?charset=utf8mb4'
			
 
				+
			
 
				+            # 创建引擎并缓存
			
 
				+            self._engine_cache[cache_key] = create_engine(
			
 
				+                connection_url,
			
 
				+                pool_size=10,  # 增加连接池大小
			
 
				+                pool_recycle=3600,
			
 
				+                pool_pre_ping=True,  # 连接池预ping，确保连接有效
			
 
				+                echo=False  # 设置为True可打印SQL日志
			
 
				+            )
			
 
				+
			
 
				+        return self._engine_cache[cache_key]
			
 
				+
			
 
				+    def save_dataframe(self, df: pd.DataFrame, table_name: str, chunk_size: int = 10000,
			
 
				+                       if_exists: str = 'append') -> None:
			
 
				+        """
			
 
				+        将DataFrame保存到数据库表
			
 
				+        
			
 
				+        Args:
			
 
				+            df: pandas DataFrame对象
			
 
				+            table_name: 目标表名
			
 
				+            chunk_size: 每批写入的行数
			
 
				+            if_exists: 表存在时的处理方式：'fail', 'replace', 'append'
			
 
				+        """
			
 
				+        try:
			
 
				+            df.to_sql(
			
 
				+                table_name,
			
 
				+                self.get_engine(),
			
 
				+                index=False,
			
 
				+                if_exists=if_exists,
			
 
				+                chunksize=chunk_size,
			
 
				+                method='multi'  # 使用多值插入提高性能
			
 
				+            )
			
 
				+            info(f"成功保存 {len(df)} 条数据到表 {table_name}")
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            error(f"保存DataFrame到表 {table_name} 失败: {e}")
			
 
				+            error(traceback.format_exc())
			
 
				+            raise e
			
 
				+
			
 
				+    def read_sql_to_dataframe(self, sql: str) -> pd.DataFrame:
			
 
				+        """
			
 
				+        执行SQL查询并返回DataFrame
			
 
				+        
			
 
				+        Args:
			
 
				+            sql: SQL查询语句
			
 
				+            
			
 
				+        Returns:
			
 
				+            查询结果的DataFrame
			
 
				+        """
			
 
				+        try:
			
 
				+            df = pd.read_sql_query(sql, self.get_engine())
			
 
				+            debug(f"查询返回 {len(df)} 行数据")
			
 
				+            return df
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            error(f"执行SQL查询失败: {sql}")
			
 
				+            error(f"错误信息: {e}")
			
 
				+            error(traceback.format_exc())
			
 
				+            raise e
			
 
				+
			
 
				+    # 为了保持向后兼容，保留原方法名（可选）
			
 
				+    get_conn = get_connection
			
 
				+    execute = execute_query
			
 
				+    execute_df_save = save_dataframe
			
 
				+    read_sql_to_df = read_sql_to_dataframe
			
--- a/utils/db/ConnectMysql_tidb_fix.py
+++ b/utils/db/ConnectMysql_tidb_fix.py
@@ -8,7 +8,7 @@ from pymysql.cursors import DictCursor
 
				 from sqlalchemy import create_engine
			
 
				 
			
 
				 from utils.conf.read_conf import yaml_conf
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from utils.log.trans_log import error, debug
			
 
				 
			
 
				 
			
 
				 class ConnectMysql:
			
@@ -30,13 +30,13 @@ class ConnectMysql:
 
				             with conn.cursor(cursor=DictCursor) as cursor:
			
 
				                 try:
			
 
				                     cursor.execute(sql, params)
			
 
				-                    trans_print("开始执行SQL:", cursor._executed)
			
 
				+                    debug("开始执行SQL:", cursor._executed)
			
 
				                     conn.commit()
			
 
				                     result = cursor.fetchall()
			
 
				                     return result
			
 
				                 except Exception as e:
			
 
				-                    trans_print(f"执行sql：{sql}，报错：{e}")
			
 
				-                    trans_print(traceback.format_exc())
			
 
				+                    error(f"执行sql：{sql}，报错：{e}")
			
 
				+                    error(traceback.format_exc())
			
 
				                     conn.rollback()
			
 
				                     raise e
			
 
				 
			
@@ -66,10 +66,10 @@ class ConnectMysql:
 
				                     df.to_sql(table_name, engine, if_exists='append', index=False, chunksize=chunksize)
			
 
				                 except Exception as e:
			
 
				                     retry_count += 1
			
 
				-                    trans_print(f" 第 {retry_count} 次重试, 错误: {str(e)}")
			
 
				+                    error(f" 第 {retry_count} 次重试, 错误: {str(e)}")
			
 
				                     time.sleep(5 * retry_count)  # 指数退避
			
 
				                     if retry_count == max_retries:
			
 
				-                        trans_print(f"处理失败: {str(e)}")
			
 
				+                        error(f"处理失败: {str(e)}")
			
 
				                         raise
			
 
				         except Exception as e:
			
 
				             engine.dispose()
			
--- a/utils/df_utils/util.py
+++ b/utils/df_utils/util.py
@@ -6,7 +6,7 @@ import datetime
 
				 import pandas as pd
			
 
				 
			
 
				 
			
 
				-def get_time_space(df, time_str):
			
 
				+def estimate_time_interval(df, time_str):
			
 
				     """
			
 
				     :return: 查询时间间隔
			
 
				     """
			
@@ -15,7 +15,6 @@ def get_time_space(df, time_str):
 
				     df1['chazhi'] = df1[time_str].shift(-1) - df1[time_str]
			
 
				     result = df1.sample(int(df1.shape[0] / 100))['chazhi'].value_counts().idxmax().seconds
			
 
				     del df1
			
 
				-    print(datetime.datetime.now() - begin)
			
 
				     return result
			
 
				 
			
 
				 
			
@@ -46,7 +45,7 @@ def calculate_time_difference(now: datetime.datetime, date: datetime.datetime):
 
				 if __name__ == '__main__':
			
 
				     df = pd.read_csv(r"D:\data\清理数据\密马风电场\test_11_test\minute\WOG00469.csv")
			
 
				     df['time_stamp'] = pd.to_datetime(df['time_stamp'])
			
 
				-    space = get_time_space(df, 'time_stamp')
			
 
				+    space = estimate_time_interval(df, 'time_stamp')
			
 
				     min = df['time_stamp'].min()
			
 
				     max = df['time_stamp'].max()
			
 
				     result = get_time_space_count(min, max, space)
			
--- a/utils/file/trans_methods.py
+++ b/utils/file/trans_methods.py
@@ -6,25 +6,35 @@ import datetime
 
				 import os
			
 
				 import shutil
			
 
				 import warnings
			
 
				+from typing import List, Dict, Optional
			
 
				 
			
 
				 import chardet
			
 
				 import pandas as pd
			
 
				 
			
 
				-from utils.common import excel_types, zip_types
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from conf.constants import FileTypes
			
 
				+from utils.log.trans_log import error, debug
			
 
				 
			
 
				 warnings.filterwarnings("ignore")
			
 
				 
			
 
				 
			
 
				 # 获取文件编码
			
 
				-def detect_file_encoding(filename):
			
 
				+def detect_file_encoding(filename: str) -> str:
			
 
				+    """
			
 
				+    检测文件编码
			
 
				+    
			
 
				+    Args:
			
 
				+        filename: 文件路径
			
 
				+    
			
 
				+    Returns:
			
 
				+        检测到的编码
			
 
				+    """
			
 
				     # 读取文件的前1000个字节（足够用于大多数编码检测）
			
 
				     with open(filename, 'rb') as f:
			
 
				         rawdata = f.read(1000)
			
 
				     result = chardet.detect(rawdata)
			
 
				     encoding = result['encoding']
			
 
				 
			
 
				-    trans_print("文件类型:", filename, encoding)
			
 
				+    debug("文件类型:", filename, encoding)
			
 
				 
			
 
				     if encoding is None:
			
 
				         encoding = 'gb18030'
			
@@ -35,19 +45,52 @@ def detect_file_encoding(filename):
 
				     return 'gb18030'
			
 
				 
			
 
				 
			
 
				-def del_blank(df=pd.DataFrame(), cols=list()):
			
 
				+def del_blank(df: pd.DataFrame = pd.DataFrame(), cols: Optional[List[str]] = None) -> pd.DataFrame:
			
 
				+    """
			
 
				+    删除指定列的空白字符
			
 
				+    
			
 
				+    Args:
			
 
				+        df: 数据帧
			
 
				+        cols: 要处理的列列表
			
 
				+    
			
 
				+    Returns:
			
 
				+        处理后的数据帧
			
 
				+    """
			
 
				+    if cols is None:
			
 
				+        cols = []
			
 
				     for col in cols:
			
 
				-        if df[col].dtype == object:
			
 
				+        if col in df.columns and df[col].dtype == object:
			
 
				             df[col] = df[col].str.strip()
			
 
				     return df
			
 
				 
			
 
				 
			
 
				 # 切割数组到多个数组
			
 
				-def split_array(array, num):
			
 
				+def split_array(array: List, num: int) -> List[List]:
			
 
				+    """
			
 
				+    将数组切割成多个子数组
			
 
				+    
			
 
				+    Args:
			
 
				+        array: 原始数组
			
 
				+        num: 每个子数组的长度
			
 
				+    
			
 
				+    Returns:
			
 
				+        子数组列表
			
 
				+    """
			
 
				     return [array[i:i + num] for i in range(0, len(array), num)]
			
 
				 
			
 
				 
			
 
				-def find_read_header(file_path, trans_cols, resolve_col_prefix=None):
			
 
				+def find_read_header(file_path: str, trans_cols: List[str], resolve_col_prefix: Optional[str] = None) -> Optional[int]:
			
 
				+    """
			
 
				+    查找文件的表头行
			
 
				+    
			
 
				+    Args:
			
 
				+        file_path: 文件路径
			
 
				+        trans_cols: 要匹配的列名列表
			
 
				+        resolve_col_prefix: 列名前缀解析表达式
			
 
				+    
			
 
				+    Returns:
			
 
				+        表头行索引
			
 
				+    """
			
 
				     df = read_file_to_df(file_path, nrows=20)
			
 
				     df.reset_index(inplace=True)
			
 
				     count = 0
			
@@ -59,7 +102,7 @@ def find_read_header(file_path, trans_cols, resolve_col_prefix=None):
 
				 
			
 
				     for col in trans_cols:
			
 
				         if col in df_cols:
			
 
				-            count = count + 1
			
 
				+            count += 1
			
 
				             if count >= 2:
			
 
				                 header = 0
			
 
				                 break
			
@@ -73,7 +116,7 @@ def find_read_header(file_path, trans_cols, resolve_col_prefix=None):
 
				             values = row.values
			
 
				         for col in trans_cols:
			
 
				             if col in values:
			
 
				-                count = count + 1
			
 
				+                count += 1
			
 
				                 if count > 2:
			
 
				                     header = index + 1
			
 
				                     return header
			
@@ -82,30 +125,44 @@ def find_read_header(file_path, trans_cols, resolve_col_prefix=None):
 
				 
			
 
				 
			
 
				 # 读取数据到df
			
 
				-def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None, not_find_header='raise',
			
 
				-                    resolve_col_prefix=None):
			
 
				+def read_file_to_df(file_path: str, read_cols: Optional[List[str]] = None, trans_cols: Optional[List[str]] = None,
			
 
				+                    nrows: Optional[int] = None, not_find_header: str = 'raise',
			
 
				+                    resolve_col_prefix: Optional[str] = None) -> pd.DataFrame:
			
 
				+    """
			
 
				+    读取文件到数据帧
			
 
				+    
			
 
				+    Args:
			
 
				+        file_path: 文件路径
			
 
				+        read_cols: 要读取的列列表
			
 
				+        trans_cols: 要匹配的列名列表
			
 
				+        nrows: 读取的行数
			
 
				+        not_find_header: 未找到表头时的处理方式
			
 
				+        resolve_col_prefix: 列名前缀解析表达式
			
 
				+    
			
 
				+    Returns:
			
 
				+        读取的数据帧
			
 
				+    """
			
 
				     begin = datetime.datetime.now()
			
 
				-    trans_print('开始读取文件', file_path)
			
 
				+    debug('开始读取文件', file_path)
			
 
				     header = 0
			
 
				-    find_cols = list()
			
 
				     if trans_cols:
			
 
				         header = find_read_header(file_path, trans_cols, resolve_col_prefix)
			
 
				-        trans_print(os.path.basename(file_path), "读取第", header, "行")
			
 
				+        debug(os.path.basename(file_path), "读取第", header, "行")
			
 
				         if header is None:
			
 
				             if not_find_header == 'raise':
			
 
				                 message = '未匹配到开始行，请检查并重新指定'
			
 
				-                trans_print(message)
			
 
				+                debug(message)
			
 
				                 raise Exception(message)
			
 
				             elif not_find_header == 'ignore':
			
 
				                 pass
			
 
				 
			
 
				-    # read_cols.extend(find_cols)
			
 
				     df = pd.DataFrame()
			
 
				     if header is not None:
			
 
				         try:
			
 
				-            if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
			
 
				+            file_path_lower = str(file_path).lower()
			
 
				+            if file_path_lower.endswith("csv") or file_path_lower.endswith("gz"):
			
 
				                 encoding = detect_file_encoding(file_path)
			
 
				-                end_with_gz = str(file_path).lower().endswith("gz")
			
 
				+                end_with_gz = file_path_lower.endswith("gz")
			
 
				                 if read_cols:
			
 
				                     if end_with_gz:
			
 
				                         df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip',
			
@@ -115,7 +172,6 @@ def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None, no
 
				                         df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header,
			
 
				                                          on_bad_lines='warn', nrows=nrows)
			
 
				                 else:
			
 
				-
			
 
				                     if end_with_gz:
			
 
				                         df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header, nrows=nrows)
			
 
				                     else:
			
@@ -135,16 +191,25 @@ def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None, no
 
				                     now_df['sheet_name'] = sheet_name
			
 
				                     df = pd.concat([df, now_df])
			
 
				                 xls.close()
			
 
				-            trans_print('文件读取成功:', file_path, '数据数量:', df.shape, '耗时:', datetime.datetime.now() - begin)
			
 
				+            debug('文件读取成功:', file_path, '数据数量:', df.shape, '耗时:', datetime.datetime.now() - begin)
			
 
				         except Exception as e:
			
 
				-            trans_print('读取文件出错', file_path, str(e))
			
 
				+            error('读取文件出错', file_path, str(e))
			
 
				             message = '文件:' + os.path.basename(file_path) + ',' + str(e)
			
 
				             raise ValueError(message)
			
 
				 
			
 
				     return df
			
 
				 
			
 
				 
			
 
				-def __build_directory_dict(directory_dict, path, filter_types=None):
			
 
				+def __build_directory_dict(directory_dict: Dict[str, List[str]], path: str,
			
 
				+                           filter_types: Optional[List[str]] = None) -> None:
			
 
				+    """
			
 
				+    构建目录文件字典
			
 
				+    
			
 
				+    Args:
			
 
				+        directory_dict: 目录文件字典
			
 
				+        path: 目录路径
			
 
				+        filter_types: 文件类型过滤器
			
 
				+    """
			
 
				     # 遍历目录下的所有项
			
 
				     for item in os.listdir(path):
			
 
				         item_path = os.path.join(path, item)
			
@@ -156,18 +221,31 @@ def __build_directory_dict(directory_dict, path, filter_types=None):
 
				 
			
 
				             if filter_types is None or len(filter_types) == 0:
			
 
				                 directory_dict[path].append(item_path)
			
 
				-            elif str(item_path).split(".")[-1] in filter_types:
			
 
				-                if str(item_path).count("~$") == 0:
			
 
				+            else:
			
 
				+                # 获取文件扩展名
			
 
				+                ext = os.path.splitext(item_path)[1].lstrip('.').lower()
			
 
				+                if ext in filter_types and "~$" not in item_path:
			
 
				                     directory_dict[path].append(item_path)
			
 
				 
			
 
				 
			
 
				 # 读取路径下所有的excel文件
			
 
				-def read_excel_files(read_path, filter_types=None):
			
 
				+def read_excel_files(read_path: str, filter_types: Optional[List[str]] = None) -> List[str]:
			
 
				+    """
			
 
				+    读取路径下所有的Excel文件
			
 
				+    
			
 
				+    Args:
			
 
				+        read_path: 读取路径
			
 
				+        filter_types: 文件类型过滤器
			
 
				+    
			
 
				+    Returns:
			
 
				+        文件路径列表
			
 
				+    """
			
 
				     if not os.path.exists(read_path):
			
 
				         return []
			
 
				 
			
 
				     if filter_types is None:
			
 
				-        filter_types = ['xls', 'xlsx', 'csv', 'gz']
			
 
				+        # filter_types = ['xls', 'xlsx', 'csv', 'gz']
			
 
				+        filter_types = FileTypes.EXCEL_TYPES
			
 
				     if os.path.isfile(read_path):
			
 
				         return [read_path]
			
 
				 
			
@@ -178,10 +256,20 @@ def read_excel_files(read_path, filter_types=None):
 
				 
			
 
				 
			
 
				 # 读取路径下所有的文件
			
 
				-def read_files(read_path, filter_types=None):
			
 
				+def read_files(read_path: str, filter_types: Optional[List[str]] = None) -> List[str]:
			
 
				+    """
			
 
				+    读取路径下所有的文件
			
 
				+    
			
 
				+    Args:
			
 
				+        read_path: 读取路径
			
 
				+        filter_types: 文件类型过滤器
			
 
				+    
			
 
				+    Returns:
			
 
				+        文件路径列表
			
 
				+    """
			
 
				     if filter_types is None:
			
 
				-        filter_types = [i for i in excel_types]
			
 
				-        filter_types.extend(zip_types)
			
 
				+        filter_types = list(FileTypes.EXCEL_TYPES)
			
 
				+        filter_types.extend(FileTypes.ZIP_TYPES)
			
 
				     if os.path.isfile(read_path):
			
 
				         return [read_path]
			
 
				     directory_dict = {}
			
@@ -190,10 +278,15 @@ def read_files(read_path, filter_types=None):
 
				     return [path1 for paths in directory_dict.values() for path1 in paths if path1]
			
 
				 
			
 
				 
			
 
				-def copy_to_new(from_path, to_path):
			
 
				-    is_file = False
			
 
				-    if to_path.count('.') > 0:
			
 
				-        is_file = True
			
 
				+def copy_to_new(from_path: str, to_path: str) -> None:
			
 
				+    """
			
 
				+    复制文件到新路径
			
 
				+    
			
 
				+    Args:
			
 
				+        from_path: 源文件路径
			
 
				+        to_path: 目标文件路径
			
 
				+    """
			
 
				+    is_file = '.' in to_path
			
 
				 
			
 
				     create_file_path(to_path, is_file_path=is_file)
			
 
				 
			
@@ -201,11 +294,13 @@ def copy_to_new(from_path, to_path):
 
				 
			
 
				 
			
 
				 # 创建路径
			
 
				-def create_file_path(read_path, is_file_path=False):
			
 
				+def create_file_path(read_path: str, is_file_path: bool = False) -> None:
			
 
				     """
			
 
				     创建路径
			
 
				-    :param read_path:创建文件夹的路径
			
 
				-    :param is_file_path: 传入的path是否包含具体的文件名
			
 
				+    
			
 
				+    Args:
			
 
				+        read_path: 创建文件夹的路径
			
 
				+        is_file_path: 传入的path是否包含具体的文件名
			
 
				     """
			
 
				     if is_file_path:
			
 
				         read_path = os.path.dirname(read_path)
			
@@ -214,9 +309,15 @@ def create_file_path(read_path, is_file_path=False):
 
				         os.makedirs(read_path, exist_ok=True)
			
 
				 
			
 
				 
			
 
				-def valid_eval(eval_str):
			
 
				+def valid_eval(eval_str: str) -> bool:
			
 
				     """
			
 
				     验证 eval 是否包含非法的参数
			
 
				+    
			
 
				+    Args:
			
 
				+        eval_str: 要验证的表达式
			
 
				+    
			
 
				+    Returns:
			
 
				+        是否合法
			
 
				     """
			
 
				     safe_param = ["column", "wind_name", "df", "error_time", "str", "int"]
			
 
				     eval_str_names = [node.id for node in ast.walk(ast.parse(eval_str)) if isinstance(node, ast.Name)]
			
--- a/utils/file/trans_methods.py_1
+++ b/utils/file/trans_methods.py_1
@@ -1,202 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Time    : 2024/5/16
			
 
				-# @Author  : 魏志亮
			
 
				-import datetime
			
 
				-from os import *
			
 
				-import shutil
			
 
				-import warnings
			
 
				-
			
 
				-import chardet
			
 
				-import pandas as pd
			
 
				-
			
 
				-from utils.log.trans_log import trans_print
			
 
				-
			
 
				-warnings.filterwarnings("ignore")
			
 
				-
			
 
				-
			
 
				-# 获取文件编码
			
 
				-def detect_file_encoding(filename):
			
 
				-    # 读取文件的前1000个字节（足够用于大多数编码检测）
			
 
				-    with open(filename, 'rb') as f:
			
 
				-        rawdata = f.read(1000)
			
 
				-    result = chardet.detect(rawdata)
			
 
				-    encoding = result['encoding']
			
 
				-
			
 
				-    trans_print("文件类型:", filename, encoding)
			
 
				-
			
 
				-    if encoding is None:
			
 
				-        encoding = 'gb18030'
			
 
				-
			
 
				-    if encoding.lower() in ['utf-8', 'ascii', 'utf8']:
			
 
				-        return 'utf-8'
			
 
				-
			
 
				-    return 'gb18030'
			
 
				-
			
 
				-
			
 
				-def del_blank(df=pd.DataFrame(), cols=list()):
			
 
				-    for col in cols:
			
 
				-        if df[col].dtype == object:
			
 
				-            df[col] = df[col].str.strip()
			
 
				-    return df
			
 
				-
			
 
				-
			
 
				-# 切割数组到多个数组
			
 
				-def split_array(array, num):
			
 
				-    return [array[i:i + num] for i in range(0, len(array), num)]
			
 
				-
			
 
				-
			
 
				-def find_read_header(file_path, trans_cols):
			
 
				-    df = read_file_to_df(file_path, nrows=20)
			
 
				-    count = 0
			
 
				-    header = None
			
 
				-    for col in trans_cols:
			
 
				-        if col in df.columns:
			
 
				-            count = count + 1
			
 
				-            if count >= 2:
			
 
				-                header = 0
			
 
				-                break
			
 
				-
			
 
				-    count = 0
			
 
				-
			
 
				-    values = list()
			
 
				-    for index, row in df.iterrows():
			
 
				-        values = list(row.values)
			
 
				-        if type(row.name) == tuple:
			
 
				-            values.extend(list(row.name))
			
 
				-        for col in trans_cols:
			
 
				-            if col in values:
			
 
				-                count = count + 1
			
 
				-                if count > 2:
			
 
				-                    header = index + 1
			
 
				-                    break
			
 
				-
			
 
				-    read_cols = []
			
 
				-    for col in values:
			
 
				-        if col in trans_cols:
			
 
				-            read_cols.append(col)
			
 
				-
			
 
				-    return header, read_cols
			
 
				-
			
 
				-
			
 
				-# 读取数据到df
			
 
				-def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None):
			
 
				-    begin = datetime.datetime.now()
			
 
				-    trans_print('开始读取文件', file_path)
			
 
				-    header = 0
			
 
				-    find_cols = list()
			
 
				-    if trans_cols:
			
 
				-        header, find_cols = find_read_header(file_path, trans_cols)
			
 
				-        trans_print(path.basename(file_path), "读取第", header, "行")
			
 
				-        if header is None:
			
 
				-            message = '未匹配到开始行，请检查并重新指定'
			
 
				-            trans_print(message)
			
 
				-            raise Exception(message)
			
 
				-
			
 
				-    read_cols.extend(find_cols)
			
 
				-
			
 
				-    try:
			
 
				-        df = pd.DataFrame()
			
 
				-        if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
			
 
				-            encoding = detect_file_encoding(file_path)
			
 
				-            end_with_gz = str(file_path).lower().endswith("gz")
			
 
				-            if read_cols:
			
 
				-                if end_with_gz:
			
 
				-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header,
			
 
				-                                     nrows=nrows)
			
 
				-                else:
			
 
				-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header,
			
 
				-                                     on_bad_lines='warn', nrows=nrows)
			
 
				-            else:
			
 
				-
			
 
				-                if end_with_gz:
			
 
				-                    df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header, nrows=nrows)
			
 
				-                else:
			
 
				-                    df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn', nrows=nrows)
			
 
				-
			
 
				-        else:
			
 
				-            xls = pd.ExcelFile(file_path, engine="calamine")
			
 
				-            # 获取所有的sheet名称
			
 
				-            sheet_names = xls.sheet_names
			
 
				-            for sheet_name in sheet_names:
			
 
				-                if read_cols:
			
 
				-                    now_df = pd.read_excel(xls, sheet_name=sheet_name, header=header, usecols=read_cols, nrows=nrows)
			
 
				-                else:
			
 
				-                    now_df = pd.read_excel(xls, sheet_name=sheet_name, header=header, nrows=nrows)
			
 
				-
			
 
				-                now_df['sheet_name'] = sheet_name
			
 
				-                df = pd.concat([df, now_df])
			
 
				-            xls.close()
			
 
				-        trans_print('文件读取成功:', file_path, '数据数量:', df.shape, '耗时:', datetime.datetime.now() - begin)
			
 
				-    except Exception as e:
			
 
				-        trans_print('读取文件出错', file_path, str(e))
			
 
				-        message = '文件:' + path.basename(file_path) + ',' + str(e)
			
 
				-        raise ValueError(message)
			
 
				-
			
 
				-    return df
			
 
				-
			
 
				-
			
 
				-def __build_directory_dict(directory_dict, path, filter_types=None):
			
 
				-    # 遍历目录下的所有项
			
 
				-    for item in listdir(path):
			
 
				-        item_path = path.join(path, item)
			
 
				-        if path.isdir(item_path):
			
 
				-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
			
 
				-        elif path.isfile(item_path):
			
 
				-            if path not in directory_dict:
			
 
				-                directory_dict[path] = []
			
 
				-
			
 
				-            if filter_types is None or len(filter_types) == 0:
			
 
				-                directory_dict[path].append(item_path)
			
 
				-            elif str(item_path).split(".")[-1] in filter_types:
			
 
				-                if str(item_path).count("~$") == 0:
			
 
				-                    directory_dict[path].append(item_path)
			
 
				-
			
 
				-
			
 
				-# 读取路径下所有的excel文件
			
 
				-def read_excel_files(read_path):
			
 
				-    if path.isfile(read_path):
			
 
				-        return [read_path]
			
 
				-
			
 
				-    directory_dict = {}
			
 
				-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
			
 
				-
			
 
				-    return [path for paths in directory_dict.values() for path in paths if path]
			
 
				-
			
 
				-
			
 
				-# 读取路径下所有的文件
			
 
				-def read_files(read_path):
			
 
				-    directory_dict = {}
			
 
				-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar'])
			
 
				-
			
 
				-    return [path for paths in directory_dict.values() for path in paths if path]
			
 
				-
			
 
				-
			
 
				-def copy_to_new(from_path, to_path):
			
 
				-    is_file = False
			
 
				-    if to_path.count('.') > 0:
			
 
				-        is_file = True
			
 
				-
			
 
				-    create_file_path(to_path, is_file_path=is_file)
			
 
				-
			
 
				-    shutil.copy(from_path, to_path)
			
 
				-
			
 
				-
			
 
				-# 创建路径
			
 
				-def create_file_path(path, is_file_path=False):
			
 
				-    if is_file_path:
			
 
				-        path = path.dirname(path)
			
 
				-
			
 
				-    if not path.exists(path):
			
 
				-        makedirs(path, exist_ok=True)
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    datas = read_excel_files(r"D:\data\清理数据\招远风电场\WOF053600062-WOB000009_ZYFDC000012\minute")
			
 
				-    for data in datas:
			
 
				-        print(data)
			
 
				-
			
 
				-    print("*" * 20)
			
 
				-
			
 
				-    datas = read_excel_files(r"D:\data\清理数据\招远风电场\WOF053600062-WOB000009_ZYFDC000012\minute\WOG00066.csv.gz")
			
 
				-    for data in datas:
			
 
				-        print(data)
			
--- a/utils/log/trans_log.py
+++ b/utils/log/trans_log.py
@@ -7,6 +7,7 @@ import logging
 
				 import sys
			
 
				 from os import *
			
 
				 
			
 
				+from conf.constants import Log
			
 
				 from utils.conf.read_conf import read_conf, yaml_conf
			
 
				 
			
 
				 
			
@@ -26,34 +27,107 @@ class ContextFilter(logging.Filter):
 
				         return True
			
 
				 
			
 
				 
			
 
				-logger = logging.getLogger("etl_tools")
			
 
				-logger.setLevel(logging.INFO)
			
 
				-stout_handle = logging.StreamHandler(sys.stdout)
			
 
				-stout_handle.setFormatter(
			
 
				-    logging.Formatter("%(asctime)s-%(trace_id)s: %(message)s"))
			
 
				-stout_handle.setLevel(logging.INFO)
			
 
				-stout_handle.addFilter(ContextFilter())
			
 
				-logger.addHandler(stout_handle)
			
 
				+# 初始化日志配置
			
 
				+def init_logger():
			
 
				+    """初始化日志配置"""
			
 
				+    logger = logging.getLogger("etl_tools")
			
 
				+    logger.setLevel(logging.DEBUG)  # 设置为DEBUG以捕获所有级别的日志
			
 
				 
			
 
				-config_path = path.abspath(__file__).split("utils")[0] + 'conf' + sep + 'etl_config_dev.yaml'
			
 
				-config_path = environ.get('ETL_CONF', config_path)
			
 
				-config = yaml_conf(environ.get('ETL_CONF', config_path))
			
 
				-log_path_dir = read_conf(config, 'log_path_dir', "/data/logs")
			
 
				+    # 清除已有的处理器
			
 
				+    if logger.handlers:
			
 
				+        logger.handlers.clear()
			
 
				 
			
 
				-log_path = log_path_dir + sep + r'etl_tools_' + (environ['env'] if 'env' in environ else 'dev')
			
 
				-file_path = path.join(log_path)
			
 
				+    formatter = logging.Formatter("%(asctime)s-%(levelname)s-%(trace_id)s: %(message)s")
			
 
				 
			
 
				-if not path.exists(file_path):
			
 
				-    makedirs(file_path, exist_ok=True)
			
 
				-file_name = file_path + sep + str(datetime.date.today()) + '.log'
			
 
				+    # 控制台处理器
			
 
				+    stout_handle = logging.StreamHandler(sys.stdout)
			
 
				+    stout_handle.setFormatter(formatter)
			
 
				 
			
 
				-file_handler = logging.FileHandler(file_name, encoding='utf-8')
			
 
				-file_handler.setFormatter(
			
 
				-    logging.Formatter("%(asctime)s-%(trace_id)s: %(message)s"))
			
 
				-file_handler.setLevel(logging.INFO)
			
 
				-file_handler.addFilter(ContextFilter())
			
 
				-logger.addHandler(file_handler)
			
 
				+    # 根据环境设置日志级别
			
 
				+    env = environ.get('env', 'dev')
			
 
				 
			
 
				+    stout_handle.setLevel(logging.INFO)
			
 
				 
			
 
				-def trans_print(*args):
			
 
				-    logger.info("  ".join([str(a) for a in args]))
			
 
				+    stout_handle.addFilter(ContextFilter())
			
 
				+    logger.addHandler(stout_handle)
			
 
				+
			
 
				+    # 文件处理器
			
 
				+    try:
			
 
				+        config_path = environ.get('ETL_CONF')
			
 
				+        if config_path:
			
 
				+            config = yaml_conf(config_path)
			
 
				+            log_path_dir = read_conf(config, 'log_path_dir', Log.DEFAULT_LOG_PATH)
			
 
				+        else:
			
 
				+            log_path_dir = Log.DEFAULT_LOG_PATH
			
 
				+
			
 
				+        log_path = log_path_dir + sep + Log.LOG_FILE_PREFIX + (environ['env'] if 'env' in environ else 'dev')
			
 
				+        file_path = path.join(log_path)
			
 
				+
			
 
				+        if not path.exists(file_path):
			
 
				+            makedirs(file_path, exist_ok=True)
			
 
				+        # 普通日志文件（INFO及以上）
			
 
				+        file_name = file_path + sep + str(datetime.date.today()) + '.log'
			
 
				+        file_handler = logging.FileHandler(file_name, encoding='utf-8')
			
 
				+        file_handler.setFormatter(formatter)
			
 
				+        file_handler.setLevel(logging.INFO)
			
 
				+        file_handler.addFilter(ContextFilter())
			
 
				+        logger.addHandler(file_handler)
			
 
				+
			
 
				+        # 错误日志文件（ERROR及以上）
			
 
				+        error_file_name = file_path + sep + str(datetime.date.today()) + '.error.log'
			
 
				+        error_file_handler = logging.FileHandler(error_file_name, encoding='utf-8')
			
 
				+        error_file_handler.setFormatter(formatter)
			
 
				+        error_file_handler.setLevel(logging.ERROR)
			
 
				+        error_file_handler.addFilter(ContextFilter())
			
 
				+        logger.addHandler(error_file_handler)
			
 
				+    except Exception as e:
			
 
				+        # 如果日志文件创建失败，只使用控制台日志
			
 
				+        pass
			
 
				+
			
 
				+    return logger
			
 
				+
			
 
				+
			
 
				+# 初始化日志记录器
			
 
				+logger = init_logger()
			
 
				+
			
 
				+
			
 
				+def trans_print(*args, level: str = 'info'):
			
 
				+    """
			
 
				+    打印日志
			
 
				+    
			
 
				+    Args:
			
 
				+        *args: 日志内容
			
 
				+        level: 日志级别，可选值: 'debug', 'info', 'warning', 'error'
			
 
				+    """
			
 
				+    message = "  ".join([str(a) for a in args])
			
 
				+
			
 
				+    if level == 'debug':
			
 
				+        logger.debug(message)
			
 
				+    elif level == 'info':
			
 
				+        logger.info(message)
			
 
				+    elif level == 'warning':
			
 
				+        logger.warning(message)
			
 
				+    elif level == 'error':
			
 
				+        logger.error(message)
			
 
				+    else:
			
 
				+        logger.info(message)
			
 
				+
			
 
				+
			
 
				+def debug(*args):
			
 
				+    """打印调试日志"""
			
 
				+    trans_print(*args, level='debug')
			
 
				+
			
 
				+
			
 
				+def info(*args):
			
 
				+    """打印信息日志"""
			
 
				+    trans_print(*args, level='info')
			
 
				+
			
 
				+
			
 
				+def warning(*args):
			
 
				+    """打印警告日志"""
			
 
				+    trans_print(*args, level='warning')
			
 
				+
			
 
				+
			
 
				+def error(*args):
			
 
				+    """打印错误日志"""
			
 
				+    trans_print(*args, level='error')
			
--- a/utils/systeminfo/sysinfo.py
+++ b/utils/systeminfo/sysinfo.py
@@ -1,13 +1,21 @@
 
				-from os import *
			
 
				+import os
			
 
				+from typing import List
			
 
				 
			
 
				 import psutil
			
 
				 
			
 
				-from utils.log.trans_log import trans_print
			
 
				+from conf.constants import ParallelProcessing
			
 
				+from utils.log.trans_log import info, debug
			
 
				 
			
 
				 
			
 
				-def print_memory_usage(detail=""):
			
 
				+def print_memory_usage(detail: str = "") -> None:
			
 
				+    """
			
 
				+    打印内存使用情况
			
 
				+    
			
 
				+    Args:
			
 
				+        detail: 详细信息
			
 
				+    """
			
 
				     # 获取当前进程ID
			
 
				-    pid = getpid()
			
 
				+    pid = os.getpid()
			
 
				     # 获取进程信息
			
 
				     py = psutil.Process(pid)
			
 
				     # 获取内存信息
			
@@ -21,34 +29,85 @@ def print_memory_usage(detail=""):
 
				     memory_usage_rss_mb = memory_usage_rss / (1024 ** 2)
			
 
				     memory_usage_vms_mb = memory_usage_vms / (1024 ** 2)
			
 
				 
			
 
				-    trans_print(f"{detail},Memory usage (RSS): {memory_usage_rss_mb:.2f} MB")
			
 
				-    trans_print(f"{detail},Memory usage (VMS): {memory_usage_vms_mb:.2f} MB")
			
 
				+    debug(f"{detail},Memory usage (RSS): {memory_usage_rss_mb:.2f} MB")
			
 
				+    debug(f"{detail},Memory usage (VMS): {memory_usage_vms_mb:.2f} MB")
			
 
				 
			
 
				 
			
 
				-def get_cpu_count():
			
 
				+def get_cpu_count() -> int:
			
 
				+    """
			
 
				+    获取CPU核心数
			
 
				+    
			
 
				+    Returns:
			
 
				+        CPU核心数
			
 
				+    """
			
 
				     return psutil.cpu_count()
			
 
				 
			
 
				 
			
 
				-def get_available_cpu_count_with_percent(percent: float = 1):
			
 
				+def get_available_cpu_count_with_percent(percent: float = 1) -> int:
			
 
				+    """
			
 
				+    根据百分比获取可用CPU数
			
 
				+    
			
 
				+    Args:
			
 
				+        percent: CPU使用百分比
			
 
				+    
			
 
				+    Returns:
			
 
				+        可用CPU数
			
 
				+    """
			
 
				     cpu_count = get_cpu_count()
			
 
				     return int(cpu_count * percent)
			
 
				 
			
 
				 
			
 
				-def get_file_size(file_path):
			
 
				-    return path.getsize(file_path)
			
 
				-
			
 
				-
			
 
				-def get_dir_size(dir_path):
			
 
				-    return sum(get_file_size(path.join(dir_path, file)) for file in listdir(dir_path) if
			
 
				-               path.isfile(path.join(dir_path, file)))
			
 
				-
			
 
				-
			
 
				-def get_available_memory_with_percent(percent: float = 1):
			
 
				+def get_file_size(file_path: str) -> int:
			
 
				+    """
			
 
				+    获取文件大小
			
 
				+    
			
 
				+    Args:
			
 
				+        file_path: 文件路径
			
 
				+    
			
 
				+    Returns:
			
 
				+        文件大小（字节）
			
 
				+    """
			
 
				+    return os.path.getsize(file_path)
			
 
				+
			
 
				+
			
 
				+def get_dir_size(dir_path: str) -> int:
			
 
				+    """
			
 
				+    获取目录大小
			
 
				+    
			
 
				+    Args:
			
 
				+        dir_path: 目录路径
			
 
				+    
			
 
				+    Returns:
			
 
				+        目录大小（字节）
			
 
				+    """
			
 
				+    return sum(get_file_size(os.path.join(dir_path, file)) for file in os.listdir(dir_path) if
			
 
				+               os.path.isfile(os.path.join(dir_path, file)))
			
 
				+
			
 
				+
			
 
				+def get_available_memory_with_percent(percent: float = 1) -> int:
			
 
				+    """
			
 
				+    根据百分比获取可用内存
			
 
				+    
			
 
				+    Args:
			
 
				+        percent: 内存使用百分比
			
 
				+    
			
 
				+    Returns:
			
 
				+        可用内存（字节）
			
 
				+    """
			
 
				     memory_info = psutil.virtual_memory()
			
 
				     return int(memory_info.available * percent)
			
 
				 
			
 
				 
			
 
				-def get_max_file_size(file_paths: list[str]):
			
 
				+def get_max_file_size(file_paths: List[str]) -> int:
			
 
				+    """
			
 
				+    获取文件列表中的最大文件大小
			
 
				+    
			
 
				+    Args:
			
 
				+        file_paths: 文件路径列表
			
 
				+    
			
 
				+    Returns:
			
 
				+        最大文件大小（字节）
			
 
				+    """
			
 
				     max_size = 0
			
 
				     for file_path in file_paths:
			
 
				         file_size = get_file_size(file_path)
			
@@ -57,11 +116,25 @@ def get_max_file_size(file_paths: list[str]):
 
				     return max_size
			
 
				 
			
 
				 
			
 
				-def use_files_get_max_cpu_count(file_paths: list[str], memory_percent: float = 1 / 12, cpu_percent: float = 2 / 5):
			
 
				+def use_files_get_max_cpu_count(file_paths: List[str], memory_percent: float = 1 / 12,
			
 
				+                                cpu_percent: float = 2 / 5) -> int:
			
 
				+    """
			
 
				+    根据文件大小和内存情况计算最大进程数
			
 
				+    
			
 
				+    Args:
			
 
				+        file_paths: 文件路径列表
			
 
				+        memory_percent: 内存使用百分比
			
 
				+        cpu_percent: CPU使用百分比
			
 
				+    
			
 
				+    Returns:
			
 
				+        最大进程数
			
 
				+    """
			
 
				     max_file_size = get_max_file_size(file_paths)
			
 
				     free_memory = get_available_memory_with_percent(memory_percent)
			
 
				     count = int(free_memory / max_file_size)
			
 
				     max_cpu_count = get_available_cpu_count_with_percent(cpu_percent)
			
 
				+    # 限制最大进程数
			
 
				+    max_cpu_count = min(max_cpu_count, ParallelProcessing.MAX_PROCESSES)
			
 
				     result = count if count <= max_cpu_count else max_cpu_count
			
 
				     if result == 0:
			
 
				         result = 1
			
@@ -69,21 +142,35 @@ def use_files_get_max_cpu_count(file_paths: list[str], memory_percent: float = 1
 
				     if result > len(file_paths):
			
 
				         result = len(file_paths)
			
 
				 
			
 
				-    trans_print("总文件数:", len(file_paths), ",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
			
 
				-                "可用内存:", str(get_available_memory_with_percent(1) / 2 ** 20) + "M",
			
 
				-                "总CPU数:", get_cpu_count(), "CPU使用比例:", round(cpu_percent, 2), "CPU可用数量:", max_cpu_count,
			
 
				-                ",最终确定使用进程数:", result)
			
 
				+    info("总文件数:", len(file_paths), ",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
			
 
				+         "可用内存:", str(get_available_memory_with_percent(1) / 2 ** 20) + "M",
			
 
				+         "总CPU数:", get_cpu_count(), "CPU使用比例:", round(cpu_percent, 2), "CPU可用数量:", max_cpu_count,
			
 
				+         ",最终确定使用进程数:", result)
			
 
				     return result
			
 
				 
			
 
				 
			
 
				-def max_file_size_get_max_cpu_count(max_file_size, memory_percent: float = 1 / 6, cpu_percent: float = 2 / 5):
			
 
				+def max_file_size_get_max_cpu_count(max_file_size: int, memory_percent: float = 1 / 6,
			
 
				+                                    cpu_percent: float = 2 / 5) -> int:
			
 
				+    """
			
 
				+    根据最大文件大小和内存情况计算最大进程数
			
 
				+    
			
 
				+    Args:
			
 
				+        max_file_size: 最大文件大小
			
 
				+        memory_percent: 内存使用百分比
			
 
				+        cpu_percent: CPU使用百分比
			
 
				+    
			
 
				+    Returns:
			
 
				+        最大进程数
			
 
				+    """
			
 
				     free_memory = get_available_memory_with_percent(memory_percent)
			
 
				     count = int(free_memory / max_file_size)
			
 
				     max_cpu_count = get_available_cpu_count_with_percent(cpu_percent)
			
 
				+    # 限制最大进程数
			
 
				+    max_cpu_count = min(max_cpu_count, ParallelProcessing.MAX_PROCESSES)
			
 
				     result = count if count <= max_cpu_count else max_cpu_count
			
 
				     if result == 0:
			
 
				         result = 1
			
 
				-    trans_print(",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
			
 
				+    info(",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
			
 
				                 "可用内存:", str(get_available_memory_with_percent(1) / 2 ** 20) + "M",
			
 
				                 "总CPU数:", get_cpu_count(), "CPU使用比例:", round(cpu_percent, 2), "CPU可用数量:", max_cpu_count,
			
 
				                 ",最终确定使用进程数:", result)
			
--- a/utils/tmp_util/__init__.py
+++ b/utils/tmp_util/__init__.py
--- a/utils/tmp_util/合并文件.py
+++ b/utils/tmp_util/合并文件.py
@@ -1,37 +0,0 @@
 
				-import multiprocessing
			
 
				-
			
 
				-read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/整改复核数据/2025年06月19日16时17分41秒'
			
 
				-
			
 
				-import os
			
 
				-import pandas as pd
			
 
				-
			
 
				-# 获取文件夹下所有文件的路径
			
 
				-file_paths = [os.path.join(read_dir, file) for file in os.listdir(read_dir) if
			
 
				-              os.path.isfile(os.path.join(read_dir, file))]
			
 
				-
			
 
				-
			
 
				-def read_and_save(wind_no, files, save_dir):
			
 
				-    # 读取文件
			
 
				-    df = pd.concat([pd.read_csv(file) for file in files])
			
 
				-
			
 
				-    # 保存文件
			
 
				-    df.to_csv(os.path.join(save_dir, f'{wind_no}.csv'), index=False, encoding='utf-8')
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-
			
 
				-    wind_dicts = dict()
			
 
				-
			
 
				-    save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/整改复核数据/合并202506191654'
			
 
				-
			
 
				-    os.makedirs(save_dir, exist_ok=True)
			
 
				-
			
 
				-    for file in os.listdir(read_dir):
			
 
				-        wind_no = file.split('(')[0]
			
 
				-        if wind_no not in wind_dicts:
			
 
				-            wind_dicts[wind_no] = [os.path.join(read_dir, file)]
			
 
				-        else:
			
 
				-            wind_dicts[wind_no].append(os.path.join(read_dir, file))
			
 
				-
			
 
				-    with multiprocessing.Pool(20) as pool:
			
 
				-        pool.starmap(read_and_save, [(key, files, save_dir) for key, files in wind_dicts.items()])
			
--- a/utils/tmp_util/整理INSERT到批量INSERT.py
+++ b/utils/tmp_util/整理INSERT到批量INSERT.py
@@ -1,100 +0,0 @@
 
				-# coding=utf-8
			
 
				-
			
 
				-
			
 
				-import re
			
 
				-from collections import defaultdict
			
 
				-
			
 
				-import pymysql
			
 
				-
			
 
				-
			
 
				-def read_sql_inserts(file_path):
			
 
				-    """生成器函数，逐行读取INSERT语句"""
			
 
				-    with open(file_path, 'r', encoding='utf-8') as f:
			
 
				-        for line in f:
			
 
				-            line = line.strip()
			
 
				-            if line.startswith('INSERT INTO'):
			
 
				-                yield line
			
 
				-
			
 
				-
			
 
				-def process_large_sql_file(input_file, batch_size=10000):
			
 
				-    table_data = defaultdict(lambda: {
			
 
				-        'columns': None,
			
 
				-        'value_rows': []
			
 
				-    })
			
 
				-
			
 
				-    insert_pattern = re.compile(
			
 
				-        r'INSERT\s+INTO\s+`?([a-zA-Z_][a-zA-Z0-9_]*)`?\s*\((.*?)\)\s*VALUES\s*\((.*?)\);',
			
 
				-        re.IGNORECASE
			
 
				-    )
			
 
				-
			
 
				-    # 使用生成器处理
			
 
				-    for insert_stmt in read_sql_inserts(input_file):
			
 
				-        match = insert_pattern.match(insert_stmt)
			
 
				-        if match:
			
 
				-            table_name = match.group(1)
			
 
				-            columns = match.group(2)
			
 
				-            values = match.group(3)
			
 
				-
			
 
				-            if table_data[table_name]['columns'] is None:
			
 
				-                table_data[table_name]['columns'] = columns
			
 
				-
			
 
				-            table_data[table_name]['value_rows'].append(values)
			
 
				-
			
 
				-    # 生成批量INSERT语句
			
 
				-    batch_inserts = {}
			
 
				-    for table_name, data in table_data.items():
			
 
				-        columns = data['columns']
			
 
				-        value_rows = data['value_rows']
			
 
				-
			
 
				-        for i in range(0, len(value_rows), batch_size):
			
 
				-            batch_values = value_rows[i:i + batch_size]
			
 
				-            batch_insert = f"INSERT INTO `{table_name}` ({columns}) VALUES\n"
			
 
				-            batch_insert += ",\n".join([f"({values})" for values in batch_values])
			
 
				-            batch_insert += ";"
			
 
				-
			
 
				-            if table_name not in batch_inserts:
			
 
				-                batch_inserts[table_name] = []
			
 
				-            batch_inserts[table_name].append(batch_insert)
			
 
				-
			
 
				-    return batch_inserts
			
 
				-
			
 
				-
			
 
				-def execute_batch_inserts(db_config, batch_inserts):
			
 
				-    """直接执行批量INSERT到数据库"""
			
 
				-    connection = pymysql.connect(**db_config)
			
 
				-    try:
			
 
				-        with connection.cursor() as cursor:
			
 
				-            for table_name, inserts in batch_inserts.items():
			
 
				-                for index, insert_sql in enumerate(inserts):
			
 
				-                    cursor.execute(insert_sql)
			
 
				-                    print(f"表 {table_name},共 {len(inserts)} 个, 第 {index + 1} 个批量INSERT语句执行成功")
			
 
				-        connection.commit()
			
 
				-    finally:
			
 
				-        connection.close()
			
 
				-
			
 
				-
			
 
				-# 数据库配置
			
 
				-db_config = {
			
 
				-    'host': '192.168.50.235',
			
 
				-    'user': 'root',
			
 
				-    'password': 'admin123456',
			
 
				-    'db': 'wtlivedb_1',
			
 
				-    'charset': 'utf8mb4'
			
 
				-}
			
 
				-
			
 
				-"""
			
 
				-移除INSERT 语句 其他的就是建表语句了
			
 
				-cat file |grep -v 'INSERT ' > create_talbe.sql
			
 
				-下面是 INSERT 转化为  BATCH INSERT 的脚本
			
 
				-"""
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    input_file = "wtlivedb.sql"
			
 
				-
			
 
				-    # 使用
			
 
				-    batch_inserts = process_large_sql_file("input.sql")
			
 
				-    execute_batch_inserts(db_config, batch_inserts)
			
 
				-
			
 
				-    # 打印统计信息
			
 
				-    for table_name, inserts in batch_inserts.items():
			
 
				-        print(f"表 '{table_name}': {len(inserts)} 个批量INSERT语句")
			
--- a/utils/tmp_util/神木_完整度_10分.py
+++ b/utils/tmp_util/神木_完整度_10分.py
@@ -1,87 +0,0 @@
 
				-# coding=utf-8
			
 
				-
			
 
				-import datetime
			
 
				-import multiprocessing
			
 
				-import os
			
 
				-import sys
			
 
				-
			
 
				-sys.path.insert(0, os.path.abspath(__file__).split("utils")[0])
			
 
				-
			
 
				-import pandas as pd
			
 
				-
			
 
				-from utils.file.trans_methods import read_file_to_df, read_excel_files
			
 
				-
			
 
				-
			
 
				-def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
			
 
				-    """
			
 
				-    获取俩个时间之间的个数
			
 
				-    :return: 查询时间间隔
			
 
				-    """
			
 
				-    delta = end_time - start_time
			
 
				-    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
			
 
				-
			
 
				-    return abs(int(total_seconds / time_space)) + 1
			
 
				-
			
 
				-
			
 
				-def save_percent(value, save_decimal=7):
			
 
				-    return round(value, save_decimal) * 100
			
 
				-
			
 
				-
			
 
				-def read_and_select(file_path, read_cols_bak):
			
 
				-    try:
			
 
				-        read_cols = read_cols_bak[0:len(read_cols_bak)]
			
 
				-        result_df = pd.DataFrame()
			
 
				-        df = read_file_to_df(file_path, read_cols=read_cols)
			
 
				-        wind_name = df['名称'].values[0]
			
 
				-        df['时间'] = pd.to_datetime(df['时间'])
			
 
				-        count = get_time_space_count(df['时间'].min(), df['时间'].max(), 600)
			
 
				-        repeat_time_count = df.shape[0] - len(df['时间'].unique())
			
 
				-        print(wind_name, count, repeat_time_count)
			
 
				-        result_df['风机号'] = [wind_name]
			
 
				-        result_df['重复率'] = [save_percent(repeat_time_count / count)]
			
 
				-        result_df['重复次数'] = [repeat_time_count]
			
 
				-        result_df['总记录数'] = [count]
			
 
				-
			
 
				-        read_cols.remove('名称')
			
 
				-        for read_col in read_cols:
			
 
				-
			
 
				-            if read_col != '时间':
			
 
				-                df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
			
 
				-            else:
			
 
				-                df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
			
 
				-
			
 
				-        group_df = df.groupby(by=['名称']).count()
			
 
				-        group_df.reset_index(inplace=True)
			
 
				-        count_df = pd.DataFrame(group_df)
			
 
				-        total_count = count_df[read_cols].values[0].sum()
			
 
				-        print(wind_name, total_count, count * len(read_cols))
			
 
				-        result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
			
 
				-        result_df['缺失数值'] = [
			
 
				-            '-'.join([f'{col_name}_{str(count - i)}' for col_name, i in zip(read_cols, count_df[read_cols].values[0])])]
			
 
				-        del group_df
			
 
				-
			
 
				-        error_fengsu_count = df.query("(风速 < 0) | (风速 > 80)").shape[0]
			
 
				-        error_yougong_gonglv = df.query("(发电机有功功率 < -200) | (发电机有功功率 > 2500)").shape[0]
			
 
				-
			
 
				-        result_df['平均异常率'] = [save_percent((error_fengsu_count + error_yougong_gonglv) / (2 * count))]
			
 
				-    except Exception as e:
			
 
				-        print(file_path)
			
 
				-        raise e
			
 
				-
			
 
				-    return result_df
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    read_cols_str = '名称,时间,发电机有功功率,发电机转速,发电机驱动端轴承温度,发电机非驱动端轴承温度,发电机定子U相线圈温度,发电机定子V相线圈温度,发电机定子W相线圈温度,实际扭矩,设定扭矩,仪表盘风速,舱内温度,控制柜内温度,舱外温度,风向,风速,机舱风向夹角,1#桨叶片角度,1#桨设定角度,2#桨叶片角度,2#桨设定角度,3#桨叶片角度,3#桨设定角度,1#桨电机温度,2#桨电机温度,3#桨电机温度,轮毂内温度,齿轮箱油泵吸油口油压,齿轮箱分配器位置油压,偏航液压刹车系统蓄能罐压力,主轴转速,齿轮箱油路入口温度,齿轮箱中间轴驱动端轴承温度,齿轮箱中间轴非驱动端轴承温度,齿轮箱油池温度,主轴承外圈温度,可利用率,机舱位置,总扭缆角度'
			
 
				-    read_cols = [i for i in read_cols_str.split(",") if i]
			
 
				-    read_dir = r'D:\data\tmp_data\10分'
			
 
				-
			
 
				-    files = read_excel_files(read_dir)
			
 
				-
			
 
				-    with multiprocessing.Pool(4) as pool:
			
 
				-        dfs = pool.starmap(read_and_select, [(os.path.join(read_dir, i), read_cols) for i in files])
			
 
				-
			
 
				-    df = pd.concat(dfs, ignore_index=True)
			
 
				-    df.sort_values(by=['风机号'], inplace=True)
			
 
				-
			
 
				-    df.to_csv("神木风电场-10分钟.csv", encoding='utf8', index=False)
			
--- a/utils/tmp_util/神木_完整度_1分.py
+++ b/utils/tmp_util/神木_完整度_1分.py
@@ -1,90 +0,0 @@
 
				-# coding=utf-8
			
 
				-
			
 
				-import datetime
			
 
				-import multiprocessing
			
 
				-import os
			
 
				-import sys
			
 
				-
			
 
				-sys.path.insert(0, os.path.abspath(__file__).split("utils")[0])
			
 
				-
			
 
				-import pandas as pd
			
 
				-
			
 
				-from utils.file.trans_methods import read_file_to_df, read_excel_files
			
 
				-
			
 
				-
			
 
				-def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
			
 
				-    """
			
 
				-    获取俩个时间之间的个数
			
 
				-    :return: 查询时间间隔
			
 
				-    """
			
 
				-    delta = end_time - start_time
			
 
				-    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
			
 
				-
			
 
				-    return abs(int(total_seconds / time_space)) + 1
			
 
				-
			
 
				-
			
 
				-def save_percent(value, save_decimal=7):
			
 
				-    return round(value, save_decimal) * 100
			
 
				-
			
 
				-
			
 
				-def read_and_select(file_path):
			
 
				-    try:
			
 
				-        result_df = pd.DataFrame()
			
 
				-        df = read_file_to_df(file_path)
			
 
				-        read_cols_bak = df.columns.tolist()
			
 
				-
			
 
				-        wind_name = df['名称'].values[0]
			
 
				-        df['时间'] = pd.to_datetime(df['时间'])
			
 
				-        count = get_time_space_count(df['时间'].min(), df['时间'].max(), 60)
			
 
				-        repeat_time_count = df.shape[0] - len(df['时间'].unique())
			
 
				-        print(wind_name, count, repeat_time_count)
			
 
				-        result_df['风机号'] = [wind_name]
			
 
				-        result_df['重复率'] = [save_percent(repeat_time_count / count)]
			
 
				-        result_df['重复次数'] = [repeat_time_count]
			
 
				-        result_df['总记录数'] = [count]
			
 
				-
			
 
				-        read_cols_bak.remove('名称')
			
 
				-        read_cols = list()
			
 
				-        for read_col in read_cols_bak:
			
 
				-
			
 
				-            if read_col == '时间':
			
 
				-                df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
			
 
				-                read_cols.append(read_col)
			
 
				-            else:
			
 
				-                df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
			
 
				-                if not df[read_col].isnull().all():
			
 
				-                    read_cols.append(read_col)
			
 
				-
			
 
				-        group_df = df.groupby(by=['名称']).count()
			
 
				-        group_df.reset_index(inplace=True)
			
 
				-        count_df = pd.DataFrame(group_df)
			
 
				-        total_count = count_df[read_cols].values[0].sum()
			
 
				-        print(wind_name, total_count, count * len(read_cols))
			
 
				-        result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
			
 
				-        result_df['缺失数值'] = [
			
 
				-            '-'.join([f'{col_name}_{str(count - i)}' for col_name, i in zip(read_cols, count_df[read_cols].values[0])])]
			
 
				-        del group_df
			
 
				-
			
 
				-        error_fengsu_count = df.query("(风速 < 0) | (风速 > 80)").shape[0]
			
 
				-        error_yougong_gonglv = df.query("(发电机有功功率 < -200) | (发电机有功功率 > 2500)").shape[0]
			
 
				-
			
 
				-        result_df['平均异常率'] = [save_percent((error_fengsu_count + error_yougong_gonglv) / (2 * count))]
			
 
				-    except Exception as e:
			
 
				-        print(file_path)
			
 
				-        raise e
			
 
				-
			
 
				-    return result_df
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    read_dir = r'D:\data\tmp_data\1分\远景1min'
			
 
				-
			
 
				-    files = read_excel_files(read_dir)
			
 
				-
			
 
				-    with multiprocessing.Pool(4) as pool:
			
 
				-        dfs = pool.map(read_and_select, files)
			
 
				-
			
 
				-    df = pd.concat(dfs, ignore_index=True)
			
 
				-    df.sort_values(by=['风机号'], inplace=True)
			
 
				-
			
 
				-    df.to_csv("神木风电场-1分钟.csv", encoding='utf8', index=False)
			
--- a/utils/tmp_util/获取台账所有wind表信息.py
+++ b/utils/tmp_util/获取台账所有wind表信息.py
@@ -1,18 +0,0 @@
 
				-import sys
			
 
				-from os import path, environ
			
 
				-
			
 
				-env = 'dev'
			
 
				-if len(sys.argv) >= 2:
			
 
				-    env = sys.argv[1]
			
 
				-
			
 
				-conf_path = path.abspath(__file__).split("energy-data-trans")[0] + f"/energy-data-trans/conf/etl_config_{env}.yaml"
			
 
				-environ['ETL_CONF'] = conf_path
			
 
				-environ['env'] = env
			
 
				-
			
 
				-from service.common_connect import plt
			
 
				-
			
 
				-tables = 'wind_company,wind_engine_group,wind_engine_mill,wind_exception_count,wind_field,wind_field_batch,wind_field_contract,wind_field_resource,wind_relation'
			
 
				-
			
 
				-for table in tables.split(','):
			
 
				-    df = plt.read_sql_to_df(f"select * from {table}")
			
 
				-    df.to_csv(table + '.csv', encoding='utf8', index=False)
			
--- a/utils/tmp_util/表添加列.py
+++ b/utils/tmp_util/表添加列.py
@@ -1,76 +0,0 @@
 
				-import os
			
 
				-import sys
			
 
				-
			
 
				-env = 'prod'
			
 
				-if len(sys.argv) >= 2:
			
 
				-    env = sys.argv[1]
			
 
				-
			
 
				-conf_path = os.path.abspath(__file__).split("energy-data-trans")[0] + f"/energy-data-trans/conf/etl_config_{env}.yaml"
			
 
				-os.environ['ETL_CONF'] = conf_path
			
 
				-os.environ['env'] = env
			
 
				-
			
 
				-db_last = ''
			
 
				-if env != 'dev':
			
 
				-    db_last = db_last + '_' + env
			
 
				-
			
 
				-query_sql = f"""
			
 
				-SELECT
			
 
				-	t.TABLE_NAME
			
 
				-FROM
			
 
				-	information_schema.`TABLES` t
			
 
				-WHERE
			
 
				-	t.TABLE_SCHEMA = 'energy_data{db_last}'
			
 
				-AND t.TABLE_NAME LIKE 'WOF%%_minute'
			
 
				-AND t.TABLE_NAME NOT IN (
			
 
				-	SELECT
			
 
				-		table_name
			
 
				-	FROM
			
 
				-		information_schema.`COLUMNS` a
			
 
				-	WHERE
			
 
				-		a.TABLE_SCHEMA = 'energy_data{db_last}'
			
 
				-	AND a.TABLE_NAME LIKE 'WOF%%_minute'
			
 
				-	AND a.COLUMN_NAME = 'main_bearing_temperature_2'
			
 
				-)
			
 
				-"""
			
 
				-
			
 
				-
			
 
				-def get_table_count(table_name):
			
 
				-    query_begin = time.time()
			
 
				-    query_sql = f"""
			
 
				-    select count(1) as count from {table_name}
			
 
				-    """
			
 
				-    print(table_name, '统计条数耗时', time.time() - query_begin, trans.execute(query_sql)[0]['count'])
			
 
				-
			
 
				-
			
 
				-def get_update_sql(table_name):
			
 
				-    update_sql = f"""
			
 
				-        ALTER TABLE {table_name}
			
 
				-        ADD COLUMN `main_bearing_temperature_2` double DEFAULT NULL COMMENT '主轴承轴承温度2', 
			
 
				-        ADD COLUMN `grid_a_phase_current` double DEFAULT NULL COMMENT '电网A相电流',
			
 
				-        ADD COLUMN `grid_b_phase_current` double DEFAULT NULL COMMENT '电网B相电流',
			
 
				-        ADD COLUMN `grid_c_phase_current` double DEFAULT NULL COMMENT '电网C相电流',
			
 
				-        ADD COLUMN `reactive_power` double DEFAULT NULL COMMENT '无功功率';
			
 
				-        """
			
 
				-    return update_sql
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    from service.common_connect import trans
			
 
				-
			
 
				-    # tables = trans.execute(query_sql)
			
 
				-    # print(tables)
			
 
				-
			
 
				-    tables = list()
			
 
				-    tables.append({'TABLE_NAME': 'WOF093400005_minute'})
			
 
				-
			
 
				-    import time
			
 
				-
			
 
				-    begin_time = time.time()
			
 
				-    for table in tables:
			
 
				-        table_name = '`' + table['TABLE_NAME'] + '`'
			
 
				-        get_table_count(table_name)
			
 
				-        update_time = time.time()
			
 
				-        trans.execute(get_update_sql(table_name))
			
 
				-        print(table_name, '更新耗时', time.time() - update_time)
			
 
				-
			
 
				-    print(len(tables), '张表,总耗时:', time.time() - begin_time)
			
--- a/utils/tmp_util/表添加注释.py
+++ b/utils/tmp_util/表添加注释.py
@@ -1,49 +0,0 @@
 
				-import os
			
 
				-import sys
			
 
				-
			
 
				-env = 'tidbprod'
			
 
				-if len(sys.argv) >= 2:
			
 
				-    env = sys.argv[1]
			
 
				-
			
 
				-conf_path = os.path.abspath(__file__).split("energy-data-trans")[0] + f"/energy-data-trans/conf/etl_config_{env}.yaml"
			
 
				-os.environ['ETL_CONF'] = conf_path
			
 
				-os.environ['env'] = env
			
 
				-
			
 
				-from service.common_connect import trans, plt
			
 
				-
			
 
				-
			
 
				-def get_all_tables():
			
 
				-    query_sql = f"""
			
 
				-    
			
 
				-    SELECT 
			
 
				-        t.TABLE_NAME
			
 
				-    FROM
			
 
				-        information_schema.`TABLES` t
			
 
				-    WHERE
			
 
				-        t.TABLE_SCHEMA = 'energy_data_prod'
			
 
				-"""
			
 
				-
			
 
				-    return trans.execute(query_sql)
			
 
				-
			
 
				-
			
 
				-def get_all_wind_company():
			
 
				-    query_sql = "SELECT t.field_code,t.field_name FROM wind_field t where t.del_state = 0"
			
 
				-    datas = plt.execute(query_sql)
			
 
				-    result_dict = dict()
			
 
				-    for data in datas:
			
 
				-        result_dict[data['field_code']] = data['field_name']
			
 
				-
			
 
				-    return result_dict
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    code_name_dict = get_all_wind_company()
			
 
				-    tables = get_all_tables()
			
 
				-    for table in tables:
			
 
				-        table_name = table['TABLE_NAME']
			
 
				-
			
 
				-        if table_name.startswith('WOF'):
			
 
				-            field_code = table_name.split('_')[0].split('-')[0]
			
 
				-            if field_code in code_name_dict.keys():
			
 
				-                update_sql = f"ALTER TABLE `{table_name}` COMMENT = '{code_name_dict[field_code]}'"
			
 
				-                trans.execute(update_sql)
			
--- a/utils/tmp_util/颗粒度变大.py
+++ b/utils/tmp_util/颗粒度变大.py
@@ -1,27 +0,0 @@
 
				-import os
			
 
				-
			
 
				-import pandas as pd
			
 
				-
			
 
				-
			
 
				-def trans_time_granularity(read_dir: str, save_dir: str, time_str: str, time_granularity: str, group_by: list):
			
 
				-    for root, dirs, files in os.walk(read_dir):
			
 
				-        for file in files:
			
 
				-            file_path = os.path.join(root, file)
			
 
				-            df = pd.read_csv(file_path)
			
 
				-            # df = df.drop(index=0)
			
 
				-            df[time_str] = pd.to_datetime(df[time_str], errors='coerce')
			
 
				-            df[time_str] = df[time_str].dt.ceil(time_granularity)
			
 
				-            groupby_df = df.groupby(group_by).mean(numeric_only=True).reset_index()
			
 
				-
			
 
				-            save_file = file_path.replace(read_dir, save_dir)
			
 
				-            if not os.path.exists(os.path.dirname(save_file)):
			
 
				-                os.makedirs(os.path.dirname(save_file))
			
 
				-
			
 
				-            groupby_df.to_csv(save_file, index=False, encoding='utf-8')
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    read_dir = r'D:\data\tmp_data\龙源\minute'
			
 
				-    save_dir = r'D:\data\tmp_data\龙源\minute12'
			
 
				-
			
 
				-    trans_time_granularity(read_dir, save_dir, 'time_stamp', '20min', ['time_stamp'])
			
--- a/utils/zip/unzip.py
+++ b/utils/zip/unzip.py
@@ -1,17 +1,27 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # @Time    : 2024/5/17
			
 
				 # @Author  : 魏志亮
			
 
				+import os
			
 
				 import traceback
			
 
				 import zipfile
			
 
				-from os import *
			
 
				+from typing import Tuple, Optional
			
 
				 
			
 
				 import rarfile
			
 
				 
			
 
				-from utils.file.trans_methods import detect_file_encoding
			
 
				-from utils.log.trans_log import trans_print, logger
			
 
				+from utils.file.trans_methods import detect_file_encoding, create_file_path
			
 
				+from utils.log.trans_log import debug, error
			
 
				 
			
 
				 
			
 
				-def __support_gbk(zip_file: zipfile.ZipFile):
			
 
				+def __support_gbk(zip_file: zipfile.ZipFile) -> zipfile.ZipFile:
			
 
				+    """
			
 
				+    支持GBK编码的zip文件
			
 
				+    
			
 
				+    Args:
			
 
				+        zip_file: ZipFile对象
			
 
				+    
			
 
				+    Returns:
			
 
				+        处理后的ZipFile对象
			
 
				+    """
			
 
				     name_to_info = zip_file.NameToInfo
			
 
				     # copy map first
			
 
				     for name, info in name_to_info.copy().items():
			
@@ -23,18 +33,31 @@ def __support_gbk(zip_file: zipfile.ZipFile):
 
				     return zip_file
			
 
				 
			
 
				 
			
 
				-def unzip(zip_filepath, dest_path):
			
 
				+def unzip(zip_filepath: str, dest_path: str) -> Tuple[bool, Optional[Exception]]:
			
 
				+    """
			
 
				+    解压zip文件
			
 
				+    
			
 
				+    Args:
			
 
				+        zip_filepath: zip文件路径
			
 
				+        dest_path: 解压目标路径
			
 
				+    
			
 
				+    Returns:
			
 
				+        (是否成功, 错误信息)
			
 
				+    """
			
 
				     # 解压zip文件
			
 
				     is_success = True
			
 
				-    trans_print('开始读取文件:', zip_filepath)
			
 
				-    trans_print("解压到:", dest_path)
			
 
				+    debug('开始读取文件:', zip_filepath)
			
 
				+    debug("解压到:", dest_path)
			
 
				+
			
 
				+    # 确保目标路径存在
			
 
				+    create_file_path(dest_path)
			
 
				 
			
 
				     try:
			
 
				         if detect_file_encoding(zip_filepath).startswith("gb"):
			
 
				             try:
			
 
				-                with __support_gbk(zipfile.ZipFile(zip_filepath, 'r'))  as zip_ref:
			
 
				+                with __support_gbk(zipfile.ZipFile(zip_filepath, 'r')) as zip_ref:
			
 
				                     zip_ref.extractall(dest_path)
			
 
				-            except:
			
 
				+            except Exception:
			
 
				                 with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
			
 
				                     zip_ref.extractall(dest_path)
			
 
				         else:
			
@@ -42,46 +65,60 @@ def unzip(zip_filepath, dest_path):
 
				                 zip_ref.extractall(dest_path)
			
 
				 
			
 
				     except zipfile.BadZipFile as e:
			
 
				-        trans_print(traceback.format_exc())
			
 
				+        error(traceback.format_exc())
			
 
				+        is_success = False
			
 
				+        error('不是zip文件:', zip_filepath)
			
 
				+        return is_success, e
			
 
				+    except Exception as e:
			
 
				+        error(traceback.format_exc())
			
 
				         is_success = False
			
 
				-        trans_print('不是zip文件:', zip_filepath)
			
 
				+        error('解压文件出错:', zip_filepath, str(e))
			
 
				         return is_success, e
			
 
				 
			
 
				     # 遍历解压后的文件
			
 
				-    dest_path = dest_path
			
 
				-    trans_print('解压再次读取', dest_path)
			
 
				+    debug('解压再次读取', dest_path)
			
 
				     if is_success:
			
 
				-        for root, dirs, files in walk(dest_path):
			
 
				+        for root, dirs, files in os.walk(dest_path):
			
 
				             for file in files:
			
 
				-                file_path = path.join(root, file)
			
 
				+                file_path = os.path.join(root, file)
			
 
				                 # 检查文件是否是zip文件
			
 
				                 if file_path.endswith('.zip'):
			
 
				                     if file_path.endswith('.csv.zip'):
			
 
				-                        rename(file_path, file_path.replace(".csv.zip", ".csv.gz"))
			
 
				+                        os.rename(file_path, file_path.replace(".csv.zip", ".csv.gz"))
			
 
				                     else:
			
 
				                         # 如果是，递归解压
			
 
				-                        unzip(file_path, dest_path + sep + get_desc_path(str(file)))
			
 
				-                        # 删除已解压的zip文件（可选）
			
 
				-                        remove(file_path)
			
 
				-                    # 检查文件是否是zip文件
			
 
				-                if file_path.endswith('.rar'):
			
 
				+                        unzip(file_path, os.path.join(dest_path, get_desc_path(str(file))))
			
 
				+                        # 删除已解压的zip文件
			
 
				+                        os.remove(file_path)
			
 
				+                # 检查文件是否是rar文件
			
 
				+                elif file_path.endswith('.rar'):
			
 
				                     # 如果是，递归解压
			
 
				-                    unrar(file_path, dest_path + sep + get_desc_path(str(file)))
			
 
				-                    # 删除已解压的zip文件（可选）
			
 
				-                    remove(file_path)
			
 
				-
			
 
				-    return is_success, ''
			
 
				-
			
 
				-
			
 
				-def unrar(rar_file_path, dest_dir):
			
 
				-    # 检查目标目录是否存在，如果不存在则创建
			
 
				-    # 解压zip文件
			
 
				+                    unrar(file_path, os.path.join(dest_path, get_desc_path(str(file))))
			
 
				+                    # 删除已解压的rar文件
			
 
				+                    os.remove(file_path)
			
 
				+
			
 
				+    return is_success, None
			
 
				+
			
 
				+
			
 
				+def unrar(rar_file_path: str, dest_dir: str) -> Tuple[bool, Optional[Exception]]:
			
 
				+    """
			
 
				+    解压rar文件
			
 
				+    
			
 
				+    Args:
			
 
				+        rar_file_path: rar文件路径
			
 
				+        dest_dir: 解压目标目录
			
 
				+    
			
 
				+    Returns:
			
 
				+        (是否成功, 错误信息)
			
 
				+    """
			
 
				+    # 解压rar文件
			
 
				     is_success = True
			
 
				-    trans_print('开始读取文件:', rar_file_path)
			
 
				+    debug('开始读取文件:', rar_file_path)
			
 
				     dest_path = dest_dir
			
 
				-    trans_print("解压到:", dest_path)
			
 
				-    if not path.exists(dest_path):
			
 
				-        makedirs(dest_path)
			
 
				+    debug("解压到:", dest_path)
			
 
				+
			
 
				+    # 确保目标路径存在
			
 
				+    create_file_path(dest_path)
			
 
				 
			
 
				     try:
			
 
				         # 打开RAR文件
			
@@ -91,33 +128,41 @@ def unrar(rar_file_path, dest_dir):
 
				                 # 解压文件到目标目录
			
 
				                 rf.extract(member, dest_path)
			
 
				     except Exception as e:
			
 
				-        trans_print(traceback.format_exc())
			
 
				-        logger.exception(e)
			
 
				+        error(traceback.format_exc())
			
 
				         is_success = False
			
 
				-        trans_print('不是rar文件:', rar_file_path)
			
 
				+        error('不是rar文件:', rar_file_path)
			
 
				         return is_success, e
			
 
				 
			
 
				     # 遍历解压后的文件
			
 
				-    print('解压再次读取', dest_path)
			
 
				+    debug('解压再次读取', dest_path)
			
 
				     if is_success:
			
 
				-        for root, dirs, files in walk(dest_path):
			
 
				+        for root, dirs, files in os.walk(dest_path):
			
 
				             for file in files:
			
 
				-                file_path = path.join(root, file)
			
 
				-                # 检查文件是否是zip文件
			
 
				+                file_path = os.path.join(root, file)
			
 
				+                # 检查文件是否是rar文件
			
 
				                 if file_path.endswith('.rar'):
			
 
				                     # 如果是，递归解压
			
 
				                     unrar(file_path, get_desc_path(file_path))
			
 
				-                    # 删除已解压的zip文件（可选）
			
 
				-                    remove(file_path)
			
 
				+                    # 删除已解压的rar文件
			
 
				+                    os.remove(file_path)
			
 
				 
			
 
				-                if file_path.endswith('.zip'):
			
 
				+                elif file_path.endswith('.zip'):
			
 
				                     # 如果是，递归解压
			
 
				                     unzip(file_path, get_desc_path(file_path))
			
 
				-                    # 删除已解压的zip文件（可选）
			
 
				-                    remove(file_path)
			
 
				-
			
 
				-    return is_success, ''
			
 
				-
			
 
				-
			
 
				-def get_desc_path(path):
			
 
				-    return path[0:path.rfind(".")]
			
 
				+                    # 删除已解压的zip文件
			
 
				+                    os.remove(file_path)
			
 
				+
			
 
				+    return is_success, None
			
 
				+
			
 
				+
			
 
				+def get_desc_path(file_path: str) -> str:
			
 
				+    """
			
 
				+    获取文件路径的描述路径（去除扩展名）
			
 
				+    
			
 
				+    Args:
			
 
				+        file_path: 文件路径
			
 
				+    
			
 
				+    Returns:
			
 
				+        去除扩展名的路径
			
 
				+    """
			
 
				+    return file_path[0:file_path.rfind(".")]
			
--- a/wind_farm/CGN/__init__.py
+++ b/wind_farm/CGN/__init__.py
--- a/wind_farm/CGN/minute_data.py
+++ b/wind_farm/CGN/minute_data.py
@@ -1,83 +0,0 @@
 
				-import datetime
			
 
				-import logging
			
 
				-import os
			
 
				-
			
 
				-import pandas as pd
			
 
				-import sys
			
 
				-from sqlalchemy import create_engine
			
 
				-
			
 
				-# 更新为第三方数据源
			
 
				-engine = create_engine('mysql+pymysql://root:admin123456@192.168.50.235:30306/appoint')
			
 
				-
			
 
				-base_dir = r'/data/logs/104'
			
 
				-save_dir = base_dir + os.sep + 'minute'
			
 
				-log_dir = base_dir + os.sep + 'logs' + os.sep + 'minute'
			
 
				-
			
 
				-wind_farm_code_dict = {
			
 
				-    '风场编号1': '山西风场',
			
 
				-    '风场编号2': '桂林风场'
			
 
				-}
			
 
				-
			
 
				-
			
 
				-def create_dir(save_dir, is_file=False):
			
 
				-    if is_file:
			
 
				-        save_dir = os.path.dirname(save_dir)
			
 
				-    os.makedirs(save_dir, exist_ok=True)
			
 
				-
			
 
				-
			
 
				-def init_log():
			
 
				-    logger = logging.getLogger("104data")
			
 
				-    logger.setLevel(logging.INFO)
			
 
				-    stout_handle = logging.StreamHandler(sys.stdout)
			
 
				-    stout_handle.setFormatter(
			
 
				-        logging.Formatter("%(asctime)s: %(message)s"))
			
 
				-    stout_handle.setLevel(logging.INFO)
			
 
				-    logger.addHandler(stout_handle)
			
 
				-    create_dir(log_dir)
			
 
				-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-info.log'
			
 
				-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
			
 
				-    file_handler.setFormatter(
			
 
				-        logging.Formatter("%(asctime)s: %(message)s"))
			
 
				-    file_handler.setLevel(logging.INFO)
			
 
				-    logger.addHandler(file_handler)
			
 
				-
			
 
				-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-error.log'
			
 
				-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
			
 
				-    file_handler.setFormatter(
			
 
				-        logging.Formatter("%(asctime)s: %(message)s"))
			
 
				-    file_handler.setLevel(logging.ERROR)
			
 
				-    logger.addHandler(file_handler)
			
 
				-
			
 
				-    return logger
			
 
				-
			
 
				-
			
 
				-logger = init_log()
			
 
				-
			
 
				-
			
 
				-def info_print(*kwargs):
			
 
				-    message = " ".join([str(i) for i in kwargs])
			
 
				-    logger.info(message)
			
 
				-
			
 
				-
			
 
				-def error_print(*kwargs):
			
 
				-    message = " ".join([str(i) for i in kwargs])
			
 
				-    logger.error(message)
			
 
				-
			
 
				-
			
 
				-def get_data_and_save_file(df_sql, save_path):
			
 
				-    info_print(df_sql)
			
 
				-    df = pd.read_sql_query(df_sql, engine)
			
 
				-    info_print(df.shape)
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    info_print("开始执行")
			
 
				-    begin = datetime.datetime.now()
			
 
				-    yestoday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y%m%d')
			
 
				-    yestoday_sql = f"select * from information_schema.TABLES where TABLE_NAME = {yestoday}"
			
 
				-
			
 
				-    get_data_and_save_file(yestoday_sql,
			
 
				-                           os.path.join(save_dir, wind_farm_code_dict['风场编号1'], yestoday[0:4], yestoday[0:6],
			
 
				-                                        f'{yestoday}.csv.gz'))
			
 
				-
			
 
				-    info_print("执行结束，总耗时:", datetime.datetime.now() - begin)
			
--- a/wind_farm/CGN/purge_history_data.py
+++ b/wind_farm/CGN/purge_history_data.py
@@ -1,83 +0,0 @@
 
				-import datetime
			
 
				-import logging
			
 
				-import os
			
 
				-import sys
			
 
				-
			
 
				-import pandas as pd
			
 
				-from sqlalchemy import create_engine, text
			
 
				-
			
 
				-engine = create_engine('mysql+pymysql://root:admin123456@192.168.50.235:30306/appoint')
			
 
				-
			
 
				-base_dir = r'/data/logs/104'
			
 
				-log_dir = base_dir + os.sep + 'logs' + os.sep + 'delete'
			
 
				-
			
 
				-
			
 
				-def create_dir(save_dir, is_file=False):
			
 
				-    if is_file:
			
 
				-        save_dir = os.path.dirname(save_dir)
			
 
				-    os.makedirs(save_dir, exist_ok=True)
			
 
				-
			
 
				-
			
 
				-def init_log():
			
 
				-    logger = logging.getLogger("104data")
			
 
				-    logger.setLevel(logging.INFO)
			
 
				-    stout_handle = logging.StreamHandler(sys.stdout)
			
 
				-    stout_handle.setFormatter(
			
 
				-        logging.Formatter("%(asctime)s: %(message)s"))
			
 
				-    stout_handle.setLevel(logging.INFO)
			
 
				-    logger.addHandler(stout_handle)
			
 
				-    create_dir(log_dir)
			
 
				-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-info.log'
			
 
				-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
			
 
				-    file_handler.setFormatter(
			
 
				-        logging.Formatter("%(asctime)s: %(message)s"))
			
 
				-    file_handler.setLevel(logging.INFO)
			
 
				-    logger.addHandler(file_handler)
			
 
				-
			
 
				-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-error.log'
			
 
				-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
			
 
				-    file_handler.setFormatter(
			
 
				-        logging.Formatter("%(asctime)s: %(message)s"))
			
 
				-    file_handler.setLevel(logging.ERROR)
			
 
				-    logger.addHandler(file_handler)
			
 
				-
			
 
				-    return logger
			
 
				-
			
 
				-
			
 
				-logger = init_log()
			
 
				-
			
 
				-
			
 
				-def info_print(*kwargs):
			
 
				-    message = " ".join([str(i) for i in kwargs])
			
 
				-    logger.info(message)
			
 
				-
			
 
				-
			
 
				-def error_print(*kwargs):
			
 
				-    message = " ".join([str(i) for i in kwargs])
			
 
				-    logger.error(message)
			
 
				-
			
 
				-
			
 
				-def drop_table(lastdays):
			
 
				-    # 构建查询语句
			
 
				-    query = text(
			
 
				-        f"SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA='appoint' AND TABLE_NAME like '{lastdays}%'")
			
 
				-    table_df = pd.read_sql(query, engine)
			
 
				-
			
 
				-    info_print('查询到表', table_df['TABLE_NAME'].values)
			
 
				-    for table_name in table_df['TABLE_NAME'].values:
			
 
				-        # 构建删除表的SQL语句
			
 
				-        drop_query = text(f"DROP TABLE {table_name}")
			
 
				-        # 执行删除操作
			
 
				-        with engine.connect() as connection:
			
 
				-            connection.execute(drop_query)
			
 
				-
			
 
				-        info_print(f"Table {table_name} deleted")
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    info_print("开始执行")
			
 
				-    begin = datetime.datetime.now()
			
 
				-    lastdays = (datetime.datetime.now() - datetime.timedelta(days=8)).strftime('%Y%m%d')
			
 
				-    print(lastdays)
			
 
				-    drop_table(lastdays)
			
 
				-    info_print("执行结束，总耗时:", datetime.datetime.now() - begin)
			
--- a/wind_farm/CGN/second_data.py
+++ b/wind_farm/CGN/second_data.py
@@ -1,173 +0,0 @@
 
				-import datetime
			
 
				-import json
			
 
				-import logging
			
 
				-import multiprocessing
			
 
				-import os
			
 
				-import traceback
			
 
				-
			
 
				-import sys
			
 
				-
			
 
				-import numpy as np
			
 
				-import pandas as pd
			
 
				-from sqlalchemy import create_engine
			
 
				-
			
 
				-engine = create_engine('mysql+pymysql://root:admin123456@192.168.50.235:30306/appoint')
			
 
				-
			
 
				-base_dir = r'/data/logs/104'
			
 
				-save_dir = base_dir + os.sep + 'second'
			
 
				-log_dir = base_dir + os.sep + 'logs' + os.sep + 'second'
			
 
				-
			
 
				-def create_dir(save_dir, is_file=False):
			
 
				-    if is_file:
			
 
				-        save_dir = os.path.dirname(save_dir)
			
 
				-    os.makedirs(save_dir, exist_ok=True)
			
 
				-
			
 
				-
			
 
				-def init_log():
			
 
				-    logger = logging.getLogger("104data")
			
 
				-    logger.setLevel(logging.INFO)
			
 
				-    stout_handle = logging.StreamHandler(sys.stdout)
			
 
				-    stout_handle.setFormatter(
			
 
				-        logging.Formatter("%(asctime)s: %(message)s"))
			
 
				-    stout_handle.setLevel(logging.INFO)
			
 
				-    logger.addHandler(stout_handle)
			
 
				-    create_dir(log_dir)
			
 
				-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-info.log'
			
 
				-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
			
 
				-    file_handler.setFormatter(
			
 
				-        logging.Formatter("%(asctime)s: %(message)s"))
			
 
				-    file_handler.setLevel(logging.INFO)
			
 
				-    logger.addHandler(file_handler)
			
 
				-
			
 
				-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-error.log'
			
 
				-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
			
 
				-    file_handler.setFormatter(
			
 
				-        logging.Formatter("%(asctime)s: %(message)s"))
			
 
				-    file_handler.setLevel(logging.ERROR)
			
 
				-    logger.addHandler(file_handler)
			
 
				-
			
 
				-    return logger
			
 
				-
			
 
				-
			
 
				-logger = init_log()
			
 
				-
			
 
				-
			
 
				-def get_all_mesurement_conf():
			
 
				-    sql = "select * from measurement_conf "
			
 
				-    return pd.read_sql(sql, engine)
			
 
				-
			
 
				-
			
 
				-def get_all_mesurepoint_conf():
			
 
				-    sql = "select * from measurepoint_conf t where t.status = 1"
			
 
				-    return pd.read_sql(sql, engine)
			
 
				-
			
 
				-
			
 
				-def df_value_to_dict(df, key='col1', value='col2'):
			
 
				-    """
			
 
				-    :param df: dataframe
			
 
				-    :param key: 字典的key，如果重复，则返回
			
 
				-    :param value: 字典的value
			
 
				-    :return:
			
 
				-    """
			
 
				-    result_dict = dict()
			
 
				-    for k, v in zip(df[key], df[value]):
			
 
				-        if k in result_dict.keys():
			
 
				-            if type(result_dict[k]) == list:
			
 
				-                result_dict[k].append(v)
			
 
				-            else:
			
 
				-                result_dict[k] = [result_dict[k]]
			
 
				-                result_dict[k].append(v)
			
 
				-        else:
			
 
				-            result_dict[k] = v
			
 
				-
			
 
				-    return result_dict
			
 
				-
			
 
				-
			
 
				-def info_print(*kwargs):
			
 
				-    message = " ".join([str(i) for i in kwargs])
			
 
				-    logger.info(message)
			
 
				-
			
 
				-
			
 
				-def error_print(*kwargs):
			
 
				-    message = " ".join([str(i) for i in kwargs])
			
 
				-    logger.error(message)
			
 
				-
			
 
				-
			
 
				-def exists_table(table_name):
			
 
				-    sql = f"SELECT * FROM information_schema.tables WHERE table_schema = 'appoint' AND table_name = '{table_name}'"
			
 
				-    info_print(sql)
			
 
				-    table_df = pd.read_sql_query(sql, engine)
			
 
				-    if table_df.empty:
			
 
				-        return False
			
 
				-    return True
			
 
				-
			
 
				-
			
 
				-def get_data_and_save_file(table_name, save_path, measurepoint_use_dict):
			
 
				-    if not exists_table(table_name):
			
 
				-        error_print(f"{table_name} 表不存在")
			
 
				-    else:
			
 
				-        df_sql = f"SELECT * FROM {table_name}"
			
 
				-        info_print(df_sql)
			
 
				-        df = pd.read_sql_query(df_sql, engine)
			
 
				-        info_print(df.shape)
			
 
				-
			
 
				-        data_dict = dict()
			
 
				-        for receive_time, information_object_data in zip(df['receive_time'],
			
 
				-                                                         df['information_object_data']):
			
 
				-
			
 
				-            json_data = json.loads(information_object_data)
			
 
				-            for k, v in json_data.items():
			
 
				-                k = int(k)
			
 
				-                wind_num = k // 103 + 1
			
 
				-                mesurepoint_num = k % 103
			
 
				-
			
 
				-                if wind_num not in data_dict.keys():
			
 
				-                    data_dict[wind_num] = dict()
			
 
				-
			
 
				-                if receive_time not in data_dict[wind_num].keys():
			
 
				-                    data_dict[wind_num][receive_time] = dict()
			
 
				-
			
 
				-                if mesurepoint_num in measurepoint_use_dict.keys():
			
 
				-                    data_dict[wind_num][receive_time][mesurepoint_num] = v
			
 
				-
			
 
				-        datas = list()
			
 
				-        for wind_num, data in data_dict.items():
			
 
				-            for receive_time, mesurepoint_data in data.items():
			
 
				-                data = [wind_num, receive_time]
			
 
				-                for point_num in measurepoint_use_dict.keys():
			
 
				-                    data.append(mesurepoint_data[point_num] if point_num in mesurepoint_data.keys() else np.nan)
			
 
				-                if len(data) > 2:
			
 
				-                    datas.append(data)
			
 
				-
			
 
				-        cols = ['风机编号', '时间']
			
 
				-        cols.extend(measurepoint_use_dict.values())
			
 
				-        result_df = pd.DataFrame(data=datas, columns=cols)
			
 
				-        result_df.sort_values(by=['风机编号', '时间'])
			
 
				-        create_dir(save_path, True)
			
 
				-        result_df.to_csv(save_path, encoding='utf8', index=False, compression='gzip')
			
 
				-        info_print("文件", save_path, '保存成功')
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    info_print("开始执行")
			
 
				-    begin = datetime.datetime.now()
			
 
				-    try:
			
 
				-        measurepoint_conf_df = get_all_mesurepoint_conf()
			
 
				-        measurepoint_use_dict = df_value_to_dict(measurepoint_conf_df, 'id', 'name')
			
 
				-
			
 
				-        yestoday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y%m%d')
			
 
				-
			
 
				-        measurement_conf_df = get_all_mesurement_conf()
			
 
				-        tables = list()
			
 
				-        for id, measurement_wind_field in zip(measurement_conf_df['id'], measurement_conf_df['measurement_wind_field']):
			
 
				-            tables.append(
			
 
				-                (f'{yestoday}_{id}', os.path.join(save_dir, measurement_wind_field, yestoday[0:4], yestoday[0:6],
			
 
				-                                                  yestoday + '.csv.gz')))
			
 
				-
			
 
				-        with multiprocessing.Pool(len(tables)) as pool:
			
 
				-            pool.starmap(get_data_and_save_file, [(t[0], t[1], measurepoint_use_dict) for t in tables])
			
 
				-    except Exception as e:
			
 
				-        error_print(traceback.format_exc())
			
 
				-        raise e
			
 
				-
			
 
				-    info_print("执行结束，总耗时:", datetime.datetime.now() - begin)
			
--- a/wind_farm/__init__.py
+++ b/wind_farm/__init__.py