4 hónapja · 038509a11a
--- a/app_run.py
+++ b/app_run.py
@@ -1,8 +1,8 @@
 
															 # -*- coding: utf-8 -*-
														
 
															 # @Time    : 2024/6/11
														
 
															 # @Author  : 魏志亮
														
 
															+import os
														
 
															 import sys
														
 
															-from os import *
														
 
															 from utils.conf.read_conf import yaml_conf, read_conf
														
@@ -11,7 +11,7 @@ def get_exec_data(run_count=1):
 
															     now_run_count = get_now_running_count()
														
 
															     data = None
														
 
															     if now_run_count >= run_count:
														
 
															-        trans_print(f"当前有{now_run_count}个任务在执行")
														
 
															+        info(f"当前有{now_run_count}个任务在执行")
														
 
															     else:
														
 
															         data = get_batch_exec_data()
														
 
															     return data
														
@@ -22,7 +22,7 @@ def run(save_db=True, run_count=1, yaml_config=None, step=0, end=999):
 
															     data = get_exec_data(run_count)
														
 
															     if data is None:
														
 
															-        trans_print("没有需要执行的任务")
														
 
															+        info("没有需要执行的任务")
														
 
															         return
														
 
															     exec_process = None
														
@@ -55,14 +55,14 @@ if __name__ == "__main__":
 
															     if env.endswith(".yaml"):
														
 
															         conf_path = env
														
 
															     else:
														
 
															-        conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
														
 
															+        conf_path = os.path.abspath(f"./conf/etl_config_{env}.yaml")
														
 
															-    environ["ETL_CONF"] = conf_path
														
 
															+    os.environ["ETL_CONF"] = conf_path
														
 
															     yaml_config = yaml_conf(conf_path)
														
 
															-    environ["env"] = env
														
 
															+    os.environ["env"] = env
														
 
															     run_count = int(read_conf(yaml_config, "run_batch_count", 1))
														
 
															-    from utils.log.trans_log import trans_print
														
 
															+    from utils.log.trans_log import info
														
 
															     from service.trans_conf_service import (
														
 
															         update_timeout_trans_data,
														
 
															         get_now_running_count,
														
@@ -73,7 +73,7 @@ if __name__ == "__main__":
 
															     from etl.wind_power.laser.LaserTrans import LaserTrans
														
 
															     from etl.wind_power.wave.WaveTrans import WaveTrans
														
 
															-    trans_print("所有请求参数:", sys.argv, "env:", env, "最大可执行个数:", run_count)
														
 
															-    trans_print("配置文件路径:", environ.get("ETL_CONF"))
														
 
															+    info("所有请求参数:", sys.argv, "env:", env, "最大可执行个数:", run_count)
														
 
															+    info("配置文件路径:", os.environ.get("ETL_CONF"))
														
 
															     run(run_count=run_count, yaml_config=yaml_config, step=0)
														
--- a/conf/constants.py
+++ b/conf/constants.py
@@ -0,0 +1,97 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+# @Time    : 2026/3/12
														
 
															+# @Author  : 系统自动生成
														
 
															+# 项目常量定义
														
 
															+
														
 
															+
														
 
															+# 文件类型常量
														
 
															+class FileTypes:
														
 
															+    """文件类型常量"""
														
 
															+    # Excel相关文件类型
														
 
															+    EXCEL_TYPES = ['xls', 'xlsx', 'xlsm', 'xlsb', 'odf', 'ods', 'csv', 'csv.gz']
														
 
															+    # 压缩文件类型
														
 
															+    ZIP_TYPES = ['rar', 'zip']
														
 
															+
														
 
															+
														
 
															+# 数据处理常量
														
 
															+class DataProcessing:
														
 
															+    """数据处理常量"""
														
 
															+    # 时间戳列名
														
 
															+    TIME_STAMP_COLUMN = 'time_stamp'
														
 
															+    # NaN替换值
														
 
															+    NAN_REPLACE_VALUE = -999999999
														
 
															+    # 有功功率单位判断阈值
														
 
															+    POWER_UNIT_THRESHOLD = 100000
														
 
															+    # 时间间隔
														
 
															+    TIME_INTERVAL = '10T'
														
 
															+    # 非数值列
														
 
															+    NOT_DOUBLE_COLS = ['wind_turbine_number', 'wind_turbine_name', 'time_stamp', 
														
 
															+                       'param6', 'param7', 'param8', 'param9', 'param10']
														
 
															+
														
 
															+
														
 
															+# 并行处理常量
														
 
															+class ParallelProcessing:
														
 
															+    """并行处理常量"""
														
 
															+    # 最大进程数
														
 
															+    MAX_PROCESSES = 8
														
 
															+    # 最大批次数
														
 
															+    MAX_BATCHES = 10
														
 
															+    # CPU使用百分比
														
 
															+    CPU_USAGE_PERCENT = 2 / 3
														
 
															+
														
 
															+
														
 
															+# 数据库常量
														
 
															+class Database:
														
 
															+    """数据库常量"""
														
 
															+    # 表引擎
														
 
															+    TABLE_ENGINE = 'InnoDB'
														
 
															+    # 默认字符集
														
 
															+    DEFAULT_CHARSET = 'utf8mb4'
														
 
															+    # 批处理大小
														
 
															+    BATCH_SIZE = 100000
														
 
															+
														
 
															+
														
 
															+# 日志常量
														
 
															+class Log:
														
 
															+    """日志常量"""
														
 
															+    # 默认日志路径
														
 
															+    DEFAULT_LOG_PATH = "/data/logs"
														
 
															+    # 日志文件名前缀
														
 
															+    LOG_FILE_PREFIX = "etl_tools_"
														
 
															+
														
 
															+
														
 
															+# 路径常量
														
 
															+class Paths:
														
 
															+    """路径常量"""
														
 
															+    # 临时文件基础路径
														
 
															+    DEFAULT_TMP_BASE_PATH = "/tmp"
														
 
															+    # 归档路径
														
 
															+    DEFAULT_ARCHIVE_PATH = "/tmp/archive"
														
 
															+
														
 
															+
														
 
															+# 状态常量
														
 
															+class Status:
														
 
															+    """状态常量"""
														
 
															+    # 成功状态
														
 
															+    SUCCESS = 1
														
 
															+    # 错误状态
														
 
															+    ERROR = 0
														
 
															+    # 运行状态
														
 
															+    RUNNING = 2
														
 
															+
														
 
															+
														
 
															+# 类型常量
														
 
															+class Types:
														
 
															+    """类型常量"""
														
 
															+    # 秒级数据
														
 
															+    SECOND = 'second'
														
 
															+    # 分钟级数据
														
 
															+    MINUTE = 'minute'
														
 
															+    # 故障数据
														
 
															+    FAULT = 'fault'
														
 
															+    # 告警数据
														
 
															+    WARN = 'warn'
														
 
															+    # 波形数据
														
 
															+    WAVE = 'wave'
														
 
															+    # 激光数据
														
 
															+    LASER = 'laser'
														
--- a/conf/etl_config_dev.yaml
+++ b/conf/etl_config_dev.yaml
@@ -1,24 +1,24 @@
 
															 plt:
														
 
															-  database: energy_ty
														
 
															+  database: energy
														
 
															   host: 192.168.50.233
														
 
															   password: admin123456
														
 
															   port: 3306
														
 
															   user: admin
														
 
															-# trans:
														
 
															-#   database: energy_data
														
 
															-#   host: 192.168.50.235
														
 
															-#   password: admin123456
														
 
															-#   port: 30306
														
 
															-#   user: root
														
 
															-
														
 
															 trans:
														
 
															   database: energy_data
														
 
															-  host: 106.120.102.238
														
 
															+  host: 192.168.50.235
														
 
															   password: admin123456
														
 
															-  port: 10336
														
 
															+  port: 30306
														
 
															   user: root
														
 
															+#trans:
														
 
															+#  database: energy_data
														
 
															+#  host: 106.120.102.238
														
 
															+#  password: admin123456
														
 
															+#  port: 10336
														
 
															+#  user: root
														
 
															+
														
 
															 # 如果要放在原始路径,则配置这个 以下面的名称作为切割点,新建清理数据文件夹
														
 
															 etl_origin_path_contain: 收资数据
														
 
															 # 如果单独保存,配置这个路径
														
--- a/etl/common/ArchiveFile.py
+++ b/etl/common/ArchiveFile.py
@@ -3,7 +3,7 @@ import shutil
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															 from service.trans_conf_service import update_archive_success
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import info
														
 
															 class ArchiveFile(object):
														
@@ -19,6 +19,6 @@ class ArchiveFile(object):
 
															         if os.path.exists(self.pathsAndTable.get_tmp_formal_path()):
														
 
															             shutil.make_archive(self.pathsAndTable.get_archive_path(), 'zip', self.pathsAndTable.get_tmp_formal_path())
														
 
															             update_archive_success(self.exec_id, f"{self.pathsAndTable.get_archive_path()}.zip")
														
 
															-            trans_print(f"文件夹已归档为 {self.pathsAndTable.get_archive_path()}.zip")
														
 
															+            info(f"文件夹已归档为 {self.pathsAndTable.get_archive_path()}.zip")
														
 
															         else:
														
 
															-            trans_print(f"文件夹 {self.pathsAndTable.get_tmp_formal_path()} 不存在")
														
 
															+            info(f"文件夹 {self.pathsAndTable.get_tmp_formal_path()} 不存在")
														
--- a/etl/common/BaseDataTrans.py
+++ b/etl/common/BaseDataTrans.py
@@ -10,12 +10,24 @@ from service.plt_service import get_all_wind
 
															 from service.trans_conf_service import update_trans_status_success, update_trans_status_error, \
														
 
															     update_trans_status_running
														
 
															 from utils.file.trans_methods import read_excel_files
														
 
															-from utils.log.trans_log import trans_print, set_trance_id
														
 
															+from utils.log.trans_log import set_trance_id, info, error
														
 
															 class BaseDataTrans(object):
														
 
															-    def __init__(self, data: dict = None, save_db=True, yaml_config=None, step=0, end=999):
														
 
															-
														
 
															+    """数据转换基类"""
														
 
															+
														
 
															+    def __init__(self, data: dict = None, save_db: bool = True, yaml_config: dict = None, step: int = 0,
														
 
															+                 end: int = 999):
														
 
															+        """
														
 
															+        初始化数据转换基类
														
 
															+        
														
 
															+        Args:
														
 
															+            data: 任务数据字典
														
 
															+            save_db: 是否保存到数据库
														
 
															+            yaml_config: YAML配置
														
 
															+            step: 开始步骤
														
 
															+            end: 结束步骤
														
 
															+        """
														
 
															         self.id = data['id']
														
 
															         self.task_name = data['task_name']
														
 
															         self.transfer_type = data['transfer_type']
														
@@ -37,7 +49,7 @@ class BaseDataTrans(object):
 
															                                                self.wind_farm_name, self.transfer_type, save_db, self.save_zip,
														
 
															                                                self.yaml_config, self.wind_col_trans)
														
 
															         except Exception as e:
														
 
															-            trans_print(traceback.format_exc())
														
 
															+            error(traceback.format_exc())
														
 
															             update_trans_status_error(self.id, str(e), self.save_db)
														
 
															             raise e
														
@@ -94,70 +106,70 @@ class BaseDataTrans(object):
 
															             # 0
														
 
															             if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															-                trans_print("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
														
 
															+                info("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
														
 
															                 self.clean_file_and_db()
														
 
															-                trans_print("清理数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															-                            datetime.datetime.now() - total_begin)
														
 
															+                info("清理数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															+                     datetime.datetime.now() - total_begin)
														
 
															             now_index = now_index + 1
														
 
															             # 1
														
 
															             if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															-                trans_print("开始解压移动文件")
														
 
															+                info("开始解压移动文件")
														
 
															                 self.unzip_or_remove_to_tmp_dir()
														
 
															-                trans_print("解压移动文件结束:耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															-                            datetime.datetime.now() - total_begin)
														
 
															+                info("解压移动文件结束:耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															+                     datetime.datetime.now() - total_begin)
														
 
															             now_index = now_index + 1
														
 
															             # 2
														
 
															             if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															-                trans_print("开始保存数据到临时文件")
														
 
															+                info("开始保存数据到临时文件")
														
 
															                 self.read_and_save_tmp_file()
														
 
															-                trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															-                            datetime.datetime.now() - total_begin)
														
 
															+                info("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															+                     datetime.datetime.now() - total_begin)
														
 
															             now_index = now_index + 1
														
 
															             # 3
														
 
															             if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															-                trans_print("开始保存到临时正式文件")
														
 
															+                info("开始保存到临时正式文件")
														
 
															                 self.statistics_and_save_tmp_formal_file()
														
 
															-                trans_print("保存到临时正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															-                            datetime.datetime.now() - total_begin)
														
 
															+                info("保存到临时正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															+                     datetime.datetime.now() - total_begin)
														
 
															             now_index = now_index + 1
														
 
															             # 4
														
 
															             if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															-                trans_print("开始保存归档文件")
														
 
															+                info("开始保存归档文件")
														
 
															                 self.archive_file()
														
 
															-                trans_print("保存到保存归档文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															-                            datetime.datetime.now() - total_begin)
														
 
															+                info("保存到保存归档文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															+                     datetime.datetime.now() - total_begin)
														
 
															             now_index = now_index + 1
														
 
															             # 5
														
 
															             if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															-                trans_print("开始保存数据到正式文件")
														
 
															+                info("开始保存数据到正式文件")
														
 
															                 self.combine_and_save_formal_file()
														
 
															-                trans_print("保存数据到正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															-                            datetime.datetime.now() - total_begin)
														
 
															+                info("保存数据到正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															+                     datetime.datetime.now() - total_begin)
														
 
															             now_index = now_index + 1
														
 
															             # 6
														
 
															             if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															-                trans_print("开始保存到数据库,是否存库:", self.pathsAndTable.save_db)
														
 
															+                info("开始保存到数据库,是否存库:", self.pathsAndTable.save_db)
														
 
															                 self.save_to_db()
														
 
															-                trans_print("保存到数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															-                            datetime.datetime.now() - total_begin)
														
 
															+                info("保存到数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															+                     datetime.datetime.now() - total_begin)
														
 
															             self.update_exec_progress()
														
 
															         except Exception as e:
														
 
															-            trans_print(traceback.format_exc())
														
 
															+            error(traceback.format_exc())
														
 
															             update_trans_status_error(self.id, str(e), self.save_db)
														
 
															             raise e
														
 
															         finally:
														
 
															             self.pathsAndTable.delete_tmp_files()
														
 
															-            trans_print("执行结束,总耗时:", str(datetime.datetime.now() - total_begin))
														
 
															+            info("执行结束,总耗时:", str(datetime.datetime.now() - total_begin))
														
--- a/etl/common/ClearData.py
+++ b/etl/common/ClearData.py
@@ -2,7 +2,7 @@ import datetime
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															 from service.trans_conf_service import update_trans_transfer_progress
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import info
														
 
															 class ClearData(object):
														
@@ -19,8 +19,8 @@ class ClearData(object):
 
															         # self.pathsAndTable.delete_batch_files()
														
 
															     def run(self):
														
 
															-        trans_print("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
														
 
															+        info("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
														
 
															         begin = datetime.datetime.now()
														
 
															         self.clean_data()
														
 
															         update_trans_transfer_progress(self.pathsAndTable.id, 5, self.pathsAndTable.save_db)
														
 
															-        trans_print("清理数据结束,耗时:", datetime.datetime.now() - begin)
														
 
															+        info("清理数据结束,耗时:", datetime.datetime.now() - begin)
														
--- a/etl/common/CombineAndSaveFormalFile.py
+++ b/etl/common/CombineAndSaveFormalFile.py
@@ -1,61 +1,134 @@
 
															 import multiprocessing
														
 
															 import os
														
 
															+from typing import Dict, List, Tuple, Optional
														
 
															 import pandas as pd
														
 
															+from conf.constants import DataProcessing, ParallelProcessing
														
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															 from utils.file.trans_methods import read_excel_files, read_file_to_df, copy_to_new
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import info, debug
														
 
															 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
														
 
															-class CombineAndSaveFormalFile(object):
														
 
															+class CombineAndSaveFormalFile:
														
 
															+    """合并并保存正式文件"""
														
 
															-    def __init__(self, pathsAndTable: PathsAndTable):
														
 
															-        self.pathsAndTable = pathsAndTable
														
 
															-        self.update_files = multiprocessing.Manager().list()
														
 
															+    # 常量定义
														
 
															+    TIME_STAMP_COLUMN = DataProcessing.TIME_STAMP_COLUMN
														
 
															-    def combine_and_save(self, file_path, key, exists_file_path):
														
 
															-        exists_same = False
														
 
															-        if exists_file_path:
														
 
															-            exists_same = True
														
 
															+    def __init__(self, paths_and_table: PathsAndTable):
														
 
															+        """
														
 
															+        初始化合并器
														
 
															+
														
 
															+        Args:
														
 
															+            paths_and_table: 路径和表信息对象
														
 
															+        """
														
 
															+        self.paths_and_table = paths_and_table
														
 
															+        self.updated_files = multiprocessing.Manager().list()
														
 
															+
														
 
															+    def _merge_dataframes(self, exists_df: pd.DataFrame, now_df: pd.DataFrame) -> pd.DataFrame:
														
 
															+        """
														
 
															+        合并两个数据框并去重排序
														
 
															+
														
 
															+        Args:
														
 
															+            exists_df: 已存在的数据框
														
 
															+            now_df: 当前的数据框
														
 
															+
														
 
															+        Returns:
														
 
															+            合并后的数据框
														
 
															+        """
														
 
															+        combined_df = pd.concat([exists_df, now_df])
														
 
															+        # 去重，保留最新的数据
														
 
															+        combined_df = combined_df.drop_duplicates(
														
 
															+            subset=self.TIME_STAMP_COLUMN,
														
 
															+            keep='last'
														
 
															+        )
														
 
															+        # 按时间戳排序
														
 
															+        return combined_df.sort_values(
														
 
															+            by=self.TIME_STAMP_COLUMN
														
 
															+        ).reset_index(drop=True)
														
 
															+
														
 
															+    def _save_combined_file(self, file_path: str, key: Tuple[str, str], exists_file_path: Optional[str]) -> None:
														
 
															+        """
														
 
															+        保存合并后的文件
														
 
															+
														
 
															+        Args:
														
 
															+            file_path: 新文件路径
														
 
															+            key: 文件键值 (目录名, 文件名)
														
 
															+            exists_file_path: 已存在的文件路径，如果为None则表示不存在
														
 
															+        """
														
 
															+        has_exists = exists_file_path is not None
														
 
															+
														
 
															+        if has_exists:
														
 
															+            # 合并并保存
														
 
															             exists_df = read_file_to_df(exists_file_path)
														
 
															             now_df = read_file_to_df(file_path)
														
 
															-            # 合并两个 DataFrame
														
 
															-            combined_df = pd.concat([exists_df, now_df])
														
 
															-            # 去重，保留 now_df 的值
														
 
															-            combined_df = combined_df.drop_duplicates(subset='time_stamp', keep='last')
														
 
															-            # 按 time_stamp 排序
														
 
															-            combined_df = combined_df.sort_values(by='time_stamp').reset_index(drop=True)
														
 
															+            combined_df = self._merge_dataframes(exists_df, now_df)
														
 
															             combined_df.to_csv(exists_file_path, encoding='utf-8', index=False)
														
 
															-            self.update_files.append(exists_file_path)
														
 
															+            self.updated_files.append(exists_file_path)
														
 
															         else:
														
 
															-            save_path = str(os.path.join(self.pathsAndTable.get_save_path(), key[0], key[1]))
														
 
															-            copy_to_new(file_path, save_path)
														
 
															-            self.update_files.append(save_path)
														
 
															-        trans_print(f"{key[0]}/{key[1]} {'包含' if exists_same else '不包含'} 相同文件,保存成功")
														
 
															-
														
 
															-    def combine_and_save_formal_file(self):
														
 
															-        exists_files = read_excel_files(self.pathsAndTable.get_save_path())
														
 
															-        exists_file_maps = dict()
														
 
															-        for file_path in exists_files:
														
 
															-            name = (os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path))
														
 
															-            exists_file_maps[name] = file_path
														
 
															-
														
 
															-        new_files = read_excel_files(self.pathsAndTable.get_tmp_formal_path())
														
 
															-        new_file_maps = dict()
														
 
															-        for file_path in new_files:
														
 
															-            name = (os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path))
														
 
															-            new_file_maps[name] = file_path
														
 
															-
														
 
															-        same_keys = list(set(exists_file_maps.keys()).intersection(new_file_maps.keys()))
														
 
															-        split_count = get_available_cpu_count_with_percent(2 / 3)
														
 
															-        with multiprocessing.Pool(split_count) as pool:
														
 
															-            pool.starmap(self.combine_and_save,
														
 
															-                         [(file_path, key, exists_file_maps[key] if key in same_keys else None) for key, file_path in
														
 
															-                          new_file_maps.items()])
														
 
															-
														
 
															-    def run(self):
														
 
															+            # 复制新文件
														
 
															+            save_dir = str(os.path.join(
														
 
															+                self.paths_and_table.get_save_path(),
														
 
															+                key[0],
														
 
															+                key[1]
														
 
															+            ))
														
 
															+            copy_to_new(file_path, save_dir)
														
 
															+            self.updated_files.append(save_dir)
														
 
															+
														
 
															+        # 记录日志
														
 
															+        status = "包含" if has_exists else "不包含"
														
 
															+        debug(f"{key[0]}/{key[1]} {status} 相同文件,保存成功")
														
 
															+
														
 
															+    def _build_file_maps(self, base_path: str) -> Dict[Tuple[str, str], str]:
														
 
															+        """
														
 
															+        构建文件映射字典
														
 
															+
														
 
															+        Args:
														
 
															+            base_path: 基础路径
														
 
															+
														
 
															+        Returns:
														
 
															+            文件路径映射字典，键为(目录名, 文件名)，值为完整路径
														
 
															+        """
														
 
															+        files = read_excel_files(base_path)
														
 
															+        return {
														
 
															+            (os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path)): file_path
														
 
															+            for file_path in files
														
 
															+        }
														
 
															+
														
 
															+    def combine_and_save_formal_file(self) -> None:
														
 
															+        """合并并保存正式文件的主方法"""
														
 
															+        # 构建已存在文件和新文件的映射
														
 
															+        exists_file_maps = self._build_file_maps(self.paths_and_table.get_save_path())
														
 
															+        new_file_maps = self._build_file_maps(self.paths_and_table.get_tmp_formal_path())
														
 
															+
														
 
															+        # 找出相同键的文件
														
 
															+        same_keys = set(exists_file_maps.keys()) & set(new_file_maps.keys())
														
 
															+
														
 
															+        # 准备并行处理参数
														
 
															+        process_args = [
														
 
															+            (
														
 
															+                file_path,
														
 
															+                key,
														
 
															+                exists_file_maps.get(key) if key in same_keys else None
														
 
															+            )
														
 
															+            for key, file_path in new_file_maps.items()
														
 
															+        ]
														
 
															+
														
 
															+        # 使用并行处理
														
 
															+        cpu_count = get_available_cpu_count_with_percent(ParallelProcessing.CPU_USAGE_PERCENT)
														
 
															+        cpu_count = min(cpu_count, ParallelProcessing.MAX_PROCESSES)
														
 
															+        with multiprocessing.Pool(cpu_count) as pool:
														
 
															+            pool.starmap(self._save_combined_file, process_args)
														
 
															+
														
 
															+    def run(self) -> List[str]:
														
 
															+        """
														
 
															+        执行合并操作
														
 
															+
														
 
															+        Returns:
														
 
															+            更新后的文件路径列表
														
 
															+        """
														
 
															         self.combine_and_save_formal_file()
														
 
															-        print(self.update_files)
														
 
															-        return list(self.update_files)
														
 
															+        info(f"共处理了 {len(self.updated_files)} 个文件")
														
 
															+        return list(self.updated_files)
														
--- a/etl/common/PathsAndTable.py
+++ b/etl/common/PathsAndTable.py
@@ -1,14 +1,33 @@
 
															 import shutil
														
 
															 from os import path, sep
														
 
															+from conf.constants import Paths
														
 
															 from service.trans_service import creat_min_sec_table, create_warn_fault_table
														
 
															 from utils.conf.read_conf import *
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import info
														
 
															 class PathsAndTable(object):
														
 
															-    def __init__(self, id=None, task_name=None, read_dir=None, wind_farm_code=None, wind_farm_name=None,
														
 
															-                 read_type=None, save_db=True, save_zip=True, yaml_config=None, wind_col_trans=None):
														
 
															+    """路径和表管理类"""
														
 
															+
														
 
															+    def __init__(self, id: int = None, task_name: str = None, read_dir: str = None, wind_farm_code: str = None,
														
 
															+                 wind_farm_name: str = None, read_type: str = None, save_db: bool = True,
														
 
															+                 save_zip: bool = True, yaml_config: dict = None, wind_col_trans: dict = None):
														
 
															+        """
														
 
															+        初始化路径和表管理类
														
 
															+        
														
 
															+        Args:
														
 
															+            id: 任务ID
														
 
															+            task_name: 任务名称
														
 
															+            read_dir: 读取目录
														
 
															+            wind_farm_code: 风电场编码
														
 
															+            wind_farm_name: 风电场名称
														
 
															+            read_type: 读取类型
														
 
															+            save_db: 是否保存到数据库
														
 
															+            save_zip: 是否保存为压缩文件
														
 
															+            yaml_config: YAML配置
														
 
															+            wind_col_trans: 风机列转换映射
														
 
															+        """
														
 
															         self.id = id
														
 
															         self.task_name = task_name
														
 
															         self.read_dir = read_dir
														
@@ -25,11 +44,11 @@ class PathsAndTable(object):
 
															         self.use_tidb = read_conf(yaml_config, 'use_tidb', False)
														
 
															-        self.tmp_base_path = read_conf(yaml_config, "tmp_base_path", "/tmp")
														
 
															+        self.tmp_base_path = read_conf(yaml_config, "tmp_base_path", Paths.DEFAULT_TMP_BASE_PATH)
														
 
															         if save_path_conf:
														
 
															             self.save_path = save_path_conf + sep + self.wind_farm_name
														
 
															         else:
														
 
															-            find_index = read_dir.find(read_conf(yaml_config, 'etl_origin_path_contain', "etl_origin_path_contain"))
														
 
															+            find_index = read_dir.find(read_conf(yaml_config, 'etl_origin_path_contain', "收资数据"))
														
 
															             if find_index == -1:
														
 
															                 raise Exception("路径未包含原始数据特定字符:" + read_dir)
														
 
															             self.save_path = read_dir[0:find_index] + sep + "清理数据"
														
@@ -37,48 +56,105 @@ class PathsAndTable(object):
 
															         if self.save_path is None:
														
 
															             raise Exception("未配置保存路径:" + read_dir)
														
 
															-        self.archive_path = read_conf(yaml_config, "archive_path", "/tmp/archive")
														
 
															+        self.archive_path = read_conf(yaml_config, "archive_path", Paths.DEFAULT_ARCHIVE_PATH)
														
 
															-    def get_save_path(self):
														
 
															+    def get_save_path(self) -> str:
														
 
															+        """
														
 
															+        获取保存路径
														
 
															+        
														
 
															+        Returns:
														
 
															+            保存路径
														
 
															+        """
														
 
															         return path.join(self.save_path, self.read_type)
														
 
															-    def get_tmp_path(self):
														
 
															+    def get_tmp_path(self) -> str:
														
 
															+        """
														
 
															+        获取临时路径
														
 
															+        
														
 
															+        Returns:
														
 
															+            临时路径
														
 
															+        """
														
 
															         return str(path.join(self.tmp_base_path, str(self.id) + "_" + self.task_name + "_" + self.read_type))
														
 
															-    def get_excel_tmp_path(self):
														
 
															+    def get_excel_tmp_path(self) -> str:
														
 
															+        """
														
 
															+        获取Excel临时路径
														
 
															+        
														
 
															+        Returns:
														
 
															+            Excel临时路径
														
 
															+        """
														
 
															         return path.join(self.get_tmp_path(), 'excel_tmp' + sep)
														
 
															-    def get_read_tmp_path(self):
														
 
															+    def get_read_tmp_path(self) -> str:
														
 
															+        """
														
 
															+        获取读取临时路径
														
 
															+        
														
 
															+        Returns:
														
 
															+            读取临时路径
														
 
															+        """
														
 
															         return path.join(self.get_tmp_path(), 'read_tmp')
														
 
															-    def get_merge_tmp_path(self, wind_turbine_number=None):
														
 
															+    def get_merge_tmp_path(self, wind_turbine_number=None) -> str:
														
 
															+        """
														
 
															+        获取合并临时路径
														
 
															+        
														
 
															+        Args:
														
 
															+            wind_turbine_number: 风机编号
														
 
															+            
														
 
															+        Returns:
														
 
															+            合并临时路径
														
 
															+        """
														
 
															         if wind_turbine_number is None:
														
 
															             return path.join(self.get_tmp_path(), 'merge_tmp')
														
 
															         else:
														
 
															             return path.join(self.get_tmp_path(), 'merge_tmp', str(wind_turbine_number))
														
 
															-    def get_tmp_formal_path(self):
														
 
															+    def get_tmp_formal_path(self) -> str:
														
 
															+        """
														
 
															+        获取正式临时路径
														
 
															+        
														
 
															+        Returns:
														
 
															+            正式临时路径
														
 
															+        """
														
 
															         return path.join(self.get_tmp_path(), 'formal_tmp')
														
 
															-    def get_archive_path(self):
														
 
															+    def get_archive_path(self) -> str:
														
 
															+        """
														
 
															+        获取归档路径
														
 
															+        
														
 
															+        Returns:
														
 
															+            归档路径
														
 
															+        """
														
 
															         return path.join(self.archive_path, self.wind_farm_name, self.read_type, f'{self.id}_{self.task_name}')
														
 
															-    def get_table_name(self):
														
 
															+    def get_table_name(self) -> str:
														
 
															+        """
														
 
															+        获取表名
														
 
															+        
														
 
															+        Returns:
														
 
															+            表名
														
 
															+        """
														
 
															         return "_".join([self.wind_farm_code, self.read_type])
														
 
															-    def delete_tmp_files(self):
														
 
															-        trans_print("开始删除临时文件夹")
														
 
															+    def delete_tmp_files(self) -> None:
														
 
															+        """
														
 
															+        删除临时文件
														
 
															+        """
														
 
															+        info("开始删除临时文件夹")
														
 
															         if path.exists(self.get_tmp_path()):
														
 
															             shutil.rmtree(self.get_tmp_path())
														
 
															-        trans_print("删除临时文件夹删除成功")
														
 
															+        info("删除临时文件夹删除成功")
														
 
															-    def create_wind_farm_db(self):
														
 
															+    def create_wind_farm_db(self) -> None:
														
 
															+        """
														
 
															+        创建风电场数据库表
														
 
															+        """
														
 
															         if self.save_db:
														
 
															-            trans_print("开始创建表")
														
 
															+            info("开始创建表")
														
 
															             if self.read_type in ['second', 'minute']:
														
 
															                 creat_min_sec_table(self.get_table_name(), self.read_type, self.wind_farm_name, self.use_tidb)
														
 
															             elif self.read_type in ['fault', 'warn']:
														
 
															                 create_warn_fault_table(self.get_table_name(), self.wind_farm_name, )
														
 
															             else:
														
 
															                 raise Exception("不支持的读取类型:" + self.read_type)
														
 
															-            trans_print("建表结束")
														
 
															+            info("建表结束")
														
--- a/etl/common/SaveToDb.py
+++ b/etl/common/SaveToDb.py
@@ -5,8 +5,7 @@ import traceback
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															 from service.trans_conf_service import update_trans_transfer_progress
														
 
															 from service.trans_service import save_scada_file_to_db, save_file_to_db
														
 
															-from utils.file.trans_methods import split_array
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import info, error
														
 
															 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
														
@@ -25,34 +24,60 @@ class SaveToDb(object):
 
															         all_saved_files = [i for i in all_saved_files if
														
 
															                            os.path.basename(i).split(".")[0] in self.pathsAndTable.wind_col_trans.keys()]
														
 
															+        if not all_saved_files:
														
 
															+            info("没有文件需要保存到数据库")
														
 
															+            return
														
 
															+
														
 
															         self.pathsAndTable.create_wind_farm_db()
														
 
															-        split_count = get_available_cpu_count_with_percent(percent=2 / 3)
														
 
															-        split_count = split_count if split_count <= len(all_saved_files) else len(all_saved_files)
														
 
															-        all_arrays = split_array(all_saved_files, split_count)
														
 
															+        # 计算最佳进程数
														
 
															+        max_processes = get_available_cpu_count_with_percent(percent=2 / 3)
														
 
															+        max_processes = min(max_processes, len(all_saved_files), 10)  # 限制最大进程数为10
														
 
															+
														
 
															         try:
														
 
															-            for index, arr in enumerate(all_arrays):
														
 
															-                with multiprocessing.Pool(10) as pool:
														
 
															-                    if self.pathsAndTable.read_type in ['minute', 'second']:
														
 
															-                        pool.starmap(save_scada_file_to_db,
														
 
															-                                     [(self.pathsAndTable.get_table_name(), file,
														
 
															-                                       self.pathsAndTable.wind_col_trans[os.path.basename(file).split(".")[0]],
														
 
															-                                       os.path.basename(os.path.dirname(file)),
														
 
															-                                       self.batch_count,self.pathsAndTable.use_tidb) for file in arr])
														
 
															-                    else:
														
 
															-                        pool.starmap(save_file_to_db,
														
 
															-                                     [(self.pathsAndTable.get_table_name(), file, self.batch_count) for file in arr])
														
 
															-
														
 
															-                update_trans_transfer_progress(self.pathsAndTable.id,
														
 
															-                                               round(70 + 29 * (index + 1) / len(all_arrays), 2),
														
 
															-                                               self.pathsAndTable.save_db)
														
 
															+            # 创建一个进程池处理所有文件
														
 
															+            with multiprocessing.Pool(max_processes) as pool:
														
 
															+                if self.pathsAndTable.read_type in ['minute', 'second']:
														
 
															+                    # 准备参数
														
 
															+                    params = [(self.pathsAndTable.get_table_name(), file,
														
 
															+                               self.pathsAndTable.wind_col_trans[os.path.basename(file).split(".")[0]],
														
 
															+                               os.path.basename(os.path.dirname(file)),
														
 
															+                               self.batch_count, self.pathsAndTable.use_tidb) for file in all_saved_files]
														
 
															+
														
 
															+                    # 分批次处理并更新进度
														
 
															+                    batch_size = max(1, len(params) // 10)  # 最多10个批次
														
 
															+                    for i in range(0, len(params), batch_size):
														
 
															+                        batch_params = params[i:i + batch_size]
														
 
															+                        pool.starmap(save_scada_file_to_db, batch_params)
														
 
															+
														
 
															+                        # 更新进度
														
 
															+                        progress = 70 + 29 * (i + len(batch_params)) / len(params)
														
 
															+                        update_trans_transfer_progress(self.pathsAndTable.id,
														
 
															+                                                       round(progress, 2),
														
 
															+                                                       self.pathsAndTable.save_db)
														
 
															+
														
 
															+                else:
														
 
															+                    # 准备参数
														
 
															+                    params = [(self.pathsAndTable.get_table_name(), file, self.batch_count) for file in all_saved_files]
														
 
															+
														
 
															+                    # 分批次处理并更新进度
														
 
															+                    batch_size = max(1, len(params) // 10)  # 最多10个批次
														
 
															+                    for i in range(0, len(params), batch_size):
														
 
															+                        batch_params = params[i:i + batch_size]
														
 
															+                        pool.starmap(save_file_to_db, batch_params)
														
 
															+
														
 
															+                        # 更新进度
														
 
															+                        progress = 70 + 29 * (i + len(batch_params)) / len(params)
														
 
															+                        update_trans_transfer_progress(self.pathsAndTable.id,
														
 
															+                                                       round(progress, 2),
														
 
															+                                                       self.pathsAndTable.save_db)
														
 
															         except Exception as e:
														
 
															-            trans_print(traceback.format_exc())
														
 
															+            error(traceback.format_exc())
														
 
															             message = "保存到数据库错误,系统返回错误:" + str(e)
														
 
															             raise ValueError(message)
														
 
															     def run(self):
														
 
															         if self.pathsAndTable.save_db:
														
 
															             self.mutiprocessing_to_save_db()
														
 
															-            update_trans_transfer_progress(self.pathsAndTable.id,  99,
														
 
															+            update_trans_transfer_progress(self.pathsAndTable.id, 99,
														
 
															                                            self.pathsAndTable.save_db)
														
--- a/etl/common/UnzipAndRemove.py
+++ b/etl/common/UnzipAndRemove.py
@@ -1,54 +1,76 @@
 
															 import multiprocessing
														
 
															+import os
														
 
															 import traceback
														
 
															-from os import *
														
 
															+from typing import List, Optional
														
 
															+from conf.constants import ParallelProcessing
														
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															 from service.trans_conf_service import update_trans_transfer_progress
														
 
															 from utils.file.trans_methods import read_files, read_excel_files, copy_to_new, split_array
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import info, error
														
 
															 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
														
 
															 from utils.zip.unzip import unzip, unrar, get_desc_path
														
 
															 class UnzipAndRemove(object):
														
 
															-    def __init__(self, pathsAndTable: PathsAndTable, filter_types=None):
														
 
															+    """解压缩并移动文件类"""
														
 
															+
														
 
															+    def __init__(self, pathsAndTable: PathsAndTable, filter_types: Optional[List[str]] = None):
														
 
															+        """
														
 
															+        初始化解压缩并移动文件类
														
 
															+        
														
 
															+        Args:
														
 
															+            pathsAndTable: 路径和表对象
														
 
															+            filter_types: 文件类型过滤器
														
 
															+        """
														
 
															         self.pathsAndTable = pathsAndTable
														
 
															         self.filter_types = filter_types
														
 
															-    def get_and_remove(self, file):
														
 
															-
														
 
															+    def get_and_remove(self, file: str) -> None:
														
 
															+        """
														
 
															+        解压缩或移动文件到临时路径
														
 
															+        
														
 
															+        Args:
														
 
															+            file: 文件路径
														
 
															+        """
														
 
															         to_path = self.pathsAndTable.get_excel_tmp_path()
														
 
															-        if str(file).endswith("zip"):
														
 
															-            if str(file).endswith("csv.zip"):
														
 
															+        file_lower = str(file).lower()
														
 
															+        if file_lower.endswith("zip"):
														
 
															+            if file_lower.endswith("csv.zip"):
														
 
															                 copy_to_new(file, file.replace(self.pathsAndTable.read_dir, to_path).replace("csv.zip", 'csv.gz'))
														
 
															             else:
														
 
															                 desc_path = file.replace(self.pathsAndTable.read_dir, to_path)
														
 
															                 unzip(file, get_desc_path(desc_path))
														
 
															                 self.pathsAndTable.has_zip = True
														
 
															-        elif str(file).endswith("rar"):
														
 
															+        elif file_lower.endswith("rar"):
														
 
															             desc_path = file.replace(self.pathsAndTable.read_dir, to_path)
														
 
															             is_success, e = unrar(file, get_desc_path(desc_path))
														
 
															             self.pathsAndTable.has_zip = True
														
 
															-            if not is_success:
														
 
															-                trans_print(traceback.format_exc())
														
 
															-                pass
														
 
															         else:
														
 
															             copy_to_new(file, file.replace(self.pathsAndTable.read_dir, to_path))
														
 
															-    def remove_file_to_tmp_path(self):
														
 
															+    def remove_file_to_tmp_path(self) -> List[str]:
														
 
															+        """
														
 
															+        将文件移动到临时路径
														
 
															+        
														
 
															+        Returns:
														
 
															+            处理后的文件列表
														
 
															+        """
														
 
															         # 读取文件
														
 
															         try:
														
 
															-            if path.isfile(self.pathsAndTable.read_dir):
														
 
															+            if os.path.isfile(self.pathsAndTable.read_dir):
														
 
															                 all_files = [self.pathsAndTable.read_dir]
														
 
															             else:
														
 
															                 all_files = read_files(self.pathsAndTable.read_dir)
														
 
															             # 最大取系统cpu的 三分之二
														
 
															             split_count = get_available_cpu_count_with_percent(2 / 3)
														
 
															+            # 限制最大进程数
														
 
															+            split_count = min(split_count, ParallelProcessing.MAX_PROCESSES)
														
 
															             all_arrays = split_array(all_files, split_count)
														
 
															             for index, arr in enumerate(all_arrays):
														
 
															-                pool_count = split_count if split_count < len(arr) else len(arr)
														
 
															+                pool_count = min(split_count, len(arr))
														
 
															                 with multiprocessing.Pool(pool_count) as pool:
														
 
															                     pool.starmap(self.get_and_remove, [(i,) for i in arr])
														
 
															                 update_trans_transfer_progress(self.pathsAndTable.id,
														
@@ -57,14 +79,17 @@ class UnzipAndRemove(object):
 
															             all_files = read_excel_files(self.pathsAndTable.get_excel_tmp_path())
														
 
															-            trans_print('读取文件数量:', len(all_files))
														
 
															+            info('读取文件数量:', len(all_files))
														
 
															         except Exception as e:
														
 
															-            trans_print(traceback.format_exc())
														
 
															+            error(traceback.format_exc())
														
 
															             message = "读取文件列表错误:" + self.pathsAndTable.read_dir + ",系统返回错误:" + str(e)
														
 
															             raise ValueError(message)
														
 
															         return all_files
														
 
															-    def run(self):
														
 
															+    def run(self) -> None:
														
 
															+        """
														
 
															+        运行解压缩和移动文件流程
														
 
															+        """
														
 
															         self.remove_file_to_tmp_path()
														
 
															-        update_trans_transfer_progress(self.pathsAndTable.id,  20,
														
 
															+        update_trans_transfer_progress(self.pathsAndTable.id, 20,
														
 
															                                        self.pathsAndTable.save_db)
														
--- a/etl/wind_power/fault_warn/FaultWarnTrans.py
+++ b/etl/wind_power/fault_warn/FaultWarnTrans.py
@@ -10,7 +10,7 @@ from service.trans_service import get_fault_warn_conf, drop_table, create_warn_f
 
															     save_file_to_db
														
 
															 from utils.conf.read_conf import read_conf
														
 
															 from utils.file.trans_methods import read_excel_files, read_file_to_df, create_file_path, valid_eval
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import info, error
														
 
															 class FaultWarnTrans(BaseDataTrans):
														
@@ -27,14 +27,14 @@ class FaultWarnTrans(BaseDataTrans):
 
															     # 第三步 读取 并 保存到临时文件
														
 
															     def read_and_save_tmp_file(self):
														
 
															-        trans_print("无需保存临时文件")
														
 
															+        info("无需保存临时文件")
														
 
															     # 读取并保存到临时正式文件
														
 
															     def statistics_and_save_tmp_formal_file(self):
														
 
															         conf_map = self.get_filed_conf()
														
 
															         if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
														
 
															             message = f"未找到{self.id}的{self.transfer_type}配置"
														
 
															-            trans_print(message)
														
 
															+            error(message)
														
 
															             update_trans_status_error(self.id, message, self.save_db)
														
 
															         else:
														
--- a/etl/wind_power/laser/LaserTrans.py
+++ b/etl/wind_power/laser/LaserTrans.py
@@ -7,11 +7,11 @@ import numpy as np
 
															 import pandas as pd
														
 
															 from service.plt_service import get_all_wind
														
 
															-from service.trans_service import save_df_to_db
														
 
															 from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
														
 
															     update_trans_status_success
														
 
															+from service.trans_service import save_df_to_db
														
 
															 from utils.file.trans_methods import read_files, read_file_to_df
														
 
															-from utils.log.trans_log import set_trance_id, trans_print
														
 
															+from utils.log.trans_log import set_trance_id, info
														
 
															 class LaserTrans():
														
@@ -56,7 +56,7 @@ class LaserTrans():
 
															         trance_id = '-'.join([self.wind_farm_code, 'laser'])
														
 
															         set_trance_id(trance_id)
														
 
															         all_files = read_files(self.read_path, ['csv'])
														
 
															-        trans_print(self.wind_farm_code, '获取文件总数为:', len(all_files))
														
 
															+        info(self.wind_farm_code, '获取文件总数为:', len(all_files))
														
 
															         pool_count = 8 if len(all_files) > 8 else len(all_files)
														
 
															         with multiprocessing.Pool(pool_count) as pool:
														
@@ -70,7 +70,7 @@ class LaserTrans():
 
															         update_trans_status_success(self.id, len(df['wind_turbine_number'].unique()), None,
														
 
															                                     df['acquisition_time'].min(), df['acquisition_time'].max(), df.shape[0])
														
 
															         # update_trans_status_success(self.id)
														
 
															-        trans_print(self.wind_farm_code, '执行结束,总耗时:', (datetime.datetime.now() - self.begin))
														
 
															+        info(self.wind_farm_code, '执行结束,总耗时:', (datetime.datetime.now() - self.begin))
														
 
															 if __name__ == '__main__':
														
--- a/etl/wind_power/min_sec/ClassIdentifier.py
+++ b/etl/wind_power/min_sec/ClassIdentifier.py
@@ -5,7 +5,7 @@ import numpy as np
 
															 from pandas import DataFrame
														
 
															 from utils.file.trans_methods import read_file_to_df
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import error, warning, debug
														
 
															 class ClassIdentifier(object):
														
@@ -35,11 +35,11 @@ class ClassIdentifier(object):
 
															         self.cut_out_speed = cut_out_speed
														
 
															         if self.rated_power is None:
														
 
															-            trans_print(wind_turbine_number, "WARNING:rated_power配置为空的")
														
 
															+            warning(wind_turbine_number, "WARNING:rated_power配置为空的")
														
 
															             self.rated_power = 1500
														
 
															         if self.cut_out_speed is None:
														
 
															-            trans_print(cut_out_speed, "WARNING:cut_out_speed配置为空的")
														
 
															+            warning(cut_out_speed, "WARNING:cut_out_speed配置为空的")
														
 
															             self.cut_out_speed = 20
														
 
															         if file_path is None and origin_df is None:
														
@@ -350,12 +350,12 @@ class ClassIdentifier(object):
 
															     def run(self):
														
 
															         # Implement your class identification logic here
														
 
															         begin = datetime.datetime.now()
														
 
															-        trans_print("打标签开始,风机号:", self.wind_turbine_number, self.df.shape)
														
 
															+        debug("打标签开始,风机号:", self.wind_turbine_number, self.df.shape)
														
 
															         try:
														
 
															             df = self.identifier()
														
 
															         except Exception as e:
														
 
															-            trans_print(traceback.format_exc())
														
 
															+            error(traceback.format_exc())
														
 
															             message = str(e) + ',风机编号:' + self.wind_turbine_number
														
 
															             raise Exception('打标签失败:' + message)
														
 
															-        trans_print("打标签结束,", df.shape, ",耗时:", datetime.datetime.now() - begin)
														
 
															+        debug("打标签结束,", df.shape, ",耗时:", datetime.datetime.now() - begin)
														
 
															         return df
														
--- a/etl/wind_power/min_sec/MinSecTrans.py
+++ b/etl/wind_power/min_sec/MinSecTrans.py
@@ -3,6 +3,7 @@
 
															 # @Author  : 魏志亮
														
 
															 import multiprocessing
														
 
															 import os.path
														
 
															+from typing import Optional
														
 
															 from etl.common.BaseDataTrans import BaseDataTrans
														
 
															 from etl.common.CombineAndSaveFormalFile import CombineAndSaveFormalFile
														
@@ -12,26 +13,67 @@ from etl.wind_power.min_sec.TransParam import TransParam
 
															 from service.trans_conf_service import update_trans_status_success, update_trans_status_error
														
 
															 from service.trans_service import get_min_sec_conf
														
 
															 from utils.conf.read_conf import read_conf
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import error
														
 
															 class MinSecTrans(BaseDataTrans):
														
 
															+    """分钟/秒级数据转换类"""
														
 
															-    def __init__(self, data: dict = None, save_db=True, yaml_config=None, step=0, end=999):
														
 
															+    # 转换列名列表
														
 
															+    TRANS_COLS = [
														
 
															+        'wind_turbine_number', 'time_stamp', 'active_power', 'rotor_speed', 'generator_speed',
														
 
															+        'wind_velocity', 'pitch_angle_blade_1', 'pitch_angle_blade_2', 'pitch_angle_blade_3',
														
 
															+        'cabin_position', 'true_wind_direction', 'yaw_error1', 'set_value_of_active_power',
														
 
															+        'gearbox_oil_temperature', 'generatordrive_end_bearing_temperature',
														
 
															+        'generatornon_drive_end_bearing_temperature', 'wind_turbine_status', 'wind_turbine_status2',
														
 
															+        'cabin_temperature', 'twisted_cable_angle', 'front_back_vibration_of_the_cabin',
														
 
															+        'side_to_side_vibration_of_the_cabin', 'actual_torque', 'given_torque',
														
 
															+        'clockwise_yaw_count', 'counterclockwise_yaw_count', 'unusable', 'power_curve_available',
														
 
															+        'required_gearbox_speed',
														
 
															+        'inverter_speed_master_control', 'outside_cabin_temperature', 'main_bearing_temperature',
														
 
															+        'main_bearing_temperature_2', 'gearbox_high_speed_shaft_bearing_temperature',
														
 
															+        'gearboxmedium_speed_shaftbearing_temperature',
														
 
															+        'gearbox_low_speed_shaft_bearing_temperature', 'generator_winding1_temperature',
														
 
															+        'generator_winding2_temperature', 'generator_winding3_temperature',
														
 
															+        'turbulence_intensity', 'grid_a_phase_current', 'grid_b_phase_current',
														
 
															+        'grid_c_phase_current', 'reactive_power', 'param1', 'param2', 'param3', 'param4', 'param5',
														
 
															+        'param6', 'param7', 'param8', 'param9', 'param10'
														
 
															+    ]
														
 
															+
														
 
															+    def __init__(self, data: dict = None, save_db: bool = True, yaml_config: dict = None, step: int = 0,
														
 
															+                 end: int = 999):
														
 
															+        """
														
 
															+        初始化分钟/秒级数据转换类
														
 
															+        
														
 
															+        Args:
														
 
															+            data: 任务数据字典
														
 
															+            save_db: 是否保存到数据库
														
 
															+            yaml_config: YAML配置
														
 
															+            step: 开始步骤
														
 
															+            end: 结束步骤
														
 
															+        """
														
 
															         super(MinSecTrans, self).__init__(data, save_db, yaml_config, step, end)
														
 
															         self.statistics_map = multiprocessing.Manager().dict()
														
 
															         self.trans_param = self.get_trans_param()
														
 
															         self.trans_param.wind_col_trans = self.wind_col_trans
														
 
															     def get_filed_conf(self):
														
 
															+        """获取配置"""
														
 
															         return get_min_sec_conf(self.wind_farm_code, self.transfer_type)
														
 
															-    def get_trans_param(self):
														
 
															+    def get_trans_param(self) -> Optional[TransParam]:
														
 
															+        """
														
 
															+        获取转换参数
														
 
															+        
														
 
															+        Returns:
														
 
															+            TransParam对象
														
 
															+        """
														
 
															         conf_map = self.get_filed_conf()
														
 
															         if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
														
 
															             message = f"未找到{self.id}的{self.transfer_type}配置"
														
 
															-            trans_print(message)
														
 
															+            error(message)
														
 
															             update_trans_status_error(self.id, message, self.save_db)
														
 
															+            return None
														
 
															         else:
														
 
															             resolve_col_prefix = read_conf(conf_map, 'resolve_col_prefix')
														
 
															             wind_name_exec = read_conf(conf_map, 'wind_name_exec', None)
														
@@ -48,25 +90,7 @@ class MinSecTrans(BaseDataTrans):
 
															             boolean_sec_to_min = int(boolean_sec_to_min) == 1
														
 
															             cols_trans_all = dict()
														
 
															-            trans_cols = ['wind_turbine_number', 'time_stamp', 'active_power', 'rotor_speed', 'generator_speed',
														
 
															-                          'wind_velocity', 'pitch_angle_blade_1', 'pitch_angle_blade_2', 'pitch_angle_blade_3',
														
 
															-                          'cabin_position', 'true_wind_direction', 'yaw_error1', 'set_value_of_active_power',
														
 
															-                          'gearbox_oil_temperature', 'generatordrive_end_bearing_temperature',
														
 
															-                          'generatornon_drive_end_bearing_temperature', 'wind_turbine_status', 'wind_turbine_status2',
														
 
															-                          'cabin_temperature', 'twisted_cable_angle', 'front_back_vibration_of_the_cabin',
														
 
															-                          'side_to_side_vibration_of_the_cabin', 'actual_torque', 'given_torque',
														
 
															-                          'clockwise_yaw_count', 'counterclockwise_yaw_count', 'unusable', 'power_curve_available',
														
 
															-                          'required_gearbox_speed',
														
 
															-                          'inverter_speed_master_control', 'outside_cabin_temperature', 'main_bearing_temperature',
														
 
															-                          'main_bearing_temperature_2', 'gearbox_high_speed_shaft_bearing_temperature',
														
 
															-                          'gearboxmedium_speed_shaftbearing_temperature',
														
 
															-                          'gearbox_low_speed_shaft_bearing_temperature', 'generator_winding1_temperature',
														
 
															-                          'generator_winding2_temperature', 'generator_winding3_temperature',
														
 
															-                          'turbulence_intensity', 'grid_a_phase_current', 'grid_b_phase_current',
														
 
															-                          'grid_c_phase_current', 'reactive_power', 'param1', 'param2', 'param3', 'param4', 'param5',
														
 
															-                          'param6', 'param7', 'param8', 'param9', 'param10']
														
 
															-
														
 
															-            for col in trans_cols:
														
 
															+            for col in self.TRANS_COLS:
														
 
															                 cols_trans_all[col] = read_conf(conf_map, col, '')
														
 
															             return TransParam(read_type=self.transfer_type, read_path=self.read_dir,
														
@@ -77,13 +101,13 @@ class MinSecTrans(BaseDataTrans):
 
															                               resolve_col_prefix=resolve_col_prefix, need_valid_cols=need_valid_cols,
														
 
															                               boolean_sec_to_min=boolean_sec_to_min)
														
 
															-    # 第三步 读取 并 保存到临时文件
														
 
															     def read_and_save_tmp_file(self):
														
 
															+        """第三步：读取并保存到临时文件"""
														
 
															         read_and_save_tmp = ReadAndSaveTmp(self.pathsAndTable, self.trans_param)
														
 
															         read_and_save_tmp.run()
														
 
															-    # 第四步 统计 并 保存到正式文件
														
 
															     def statistics_and_save_tmp_formal_file(self):
														
 
															+        """第四步：统计并保存到正式文件"""
														
 
															         # 保存到正式文件
														
 
															         statistics_and_save_tmp_formal_file = StatisticsAndSaveTmpFormalFile(self.pathsAndTable, self.trans_param,
														
 
															                                                                              self.statistics_map,
														
@@ -91,11 +115,12 @@ class MinSecTrans(BaseDataTrans):
 
															         statistics_and_save_tmp_formal_file.run()
														
 
															     def combine_and_save_formal_file(self):
														
 
															+        """合并并保存正式文件"""
														
 
															         combine_and_save_formal_file = CombineAndSaveFormalFile(self.pathsAndTable)
														
 
															         self.update_files = combine_and_save_formal_file.run()
														
 
															-    # 最后更新执行程度
														
 
															     def update_exec_progress(self):
														
 
															+        """最后更新执行进度"""
														
 
															         all_files = set([os.path.basename(i) for i in self.update_files])
														
 
															         update_trans_status_success(self.id, len(all_files),
														
 
															                                     self.statistics_map['time_granularity'],
														
--- a/etl/wind_power/min_sec/ReadAndSaveTmp.py
+++ b/etl/wind_power/min_sec/ReadAndSaveTmp.py
@@ -1,31 +1,47 @@
 
															 import datetime
														
 
															 import multiprocessing
														
 
															+import os
														
 
															 import traceback
														
 
															-from os import *
														
 
															 import pandas as pd
														
 
															+from conf.constants import ParallelProcessing
														
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															 from etl.wind_power.min_sec import TransParam
														
 
															 from service.trans_conf_service import update_trans_transfer_progress
														
 
															 from utils.file.trans_methods import read_excel_files, split_array, del_blank, \
														
 
															     create_file_path, read_file_to_df, valid_eval
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import info, debug, error
														
 
															 from utils.systeminfo.sysinfo import use_files_get_max_cpu_count, get_dir_size
														
 
															 class ReadAndSaveTmp(object):
														
 
															+    """读取并保存临时文件类"""
														
 
															     def __init__(self, pathsAndTable: PathsAndTable, trans_param: TransParam):
														
 
															+        """
														
 
															+        初始化读取并保存临时文件类
														
 
															+        
														
 
															+        Args:
														
 
															+            pathsAndTable: 路径和表对象
														
 
															+            trans_param: 转换参数对象
														
 
															+        """
														
 
															         self.pathsAndTable = pathsAndTable
														
 
															         self.trans_param = trans_param
														
 
															         self.exist_wind_names = multiprocessing.Manager().list()
														
 
															         self.lock = multiprocessing.Manager().Lock()
														
 
															         self.file_lock = multiprocessing.Manager().dict()
														
 
															-    def _save_to_tmp_csv_by_name(self, df, name):
														
 
															+    def _save_to_tmp_csv_by_name(self, df: pd.DataFrame, name: str):
														
 
															+        """
														
 
															+        根据风机名称保存到临时CSV文件
														
 
															+        
														
 
															+        Args:
														
 
															+            df: 数据帧
														
 
															+            name: 风机名称
														
 
															+        """
														
 
															         save_name = str(name) + '.csv'
														
 
															-        save_path = path.join(self.pathsAndTable.get_read_tmp_path(), save_name)
														
 
															+        save_path = os.path.join(self.pathsAndTable.get_read_tmp_path(), save_name)
														
 
															         create_file_path(save_path, is_file_path=True)
														
 
															         with self.lock:
														
@@ -41,7 +57,13 @@ class ReadAndSaveTmp(object):
 
															             else:
														
 
															                 df.to_csv(save_path, index=False, encoding='utf8')
														
 
															-    def save_merge_data(self, file_path):
														
 
															+    def save_merge_data(self, file_path: str):
														
 
															+        """
														
 
															+        保存合并数据
														
 
															+        
														
 
															+        Args:
														
 
															+            file_path: 文件路径
														
 
															+        """
														
 
															         df = self.read_excel_to_df(file_path)
														
 
															         if self.trans_param.wind_name_exec:
														
 
															             if valid_eval(self.trans_param.wind_name_exec):
														
@@ -67,7 +89,7 @@ class ReadAndSaveTmp(object):
 
															                         else:
														
 
															                             contains_name = False
														
 
															                             self.exist_wind_names.append(exist_name)
														
 
															-                        save_path = path.join(merge_path, csv_name)
														
 
															+                        save_path = os.path.join(merge_path, csv_name)
														
 
															                         now_df = df[df['wind_turbine_number'] == wind_name][['time_stamp', col]]
														
 
															                         if contains_name:
														
 
															                             now_df.to_csv(save_path, index=False, encoding='utf-8', mode='a',
														
@@ -75,7 +97,16 @@ class ReadAndSaveTmp(object):
 
															                         else:
														
 
															                             now_df.to_csv(save_path, index=False, encoding='utf-8')
														
 
															-    def trans_df_cols(self, df):
														
 
															+    def trans_df_cols(self, df: pd.DataFrame) -> pd.DataFrame:
														
 
															+        """
														
 
															+        转换数据帧列名
														
 
															+        
														
 
															+        Args:
														
 
															+            df: 数据帧
														
 
															+        
														
 
															+        Returns:
														
 
															+            转换后的数据帧
														
 
															+        """
														
 
															         if self.trans_param.is_vertical_table:
														
 
															             pass
														
 
															         else:
														
@@ -120,8 +151,13 @@ class ReadAndSaveTmp(object):
 
															         return df
														
 
															-    def df_save_to_tmp_file(self, df=pd.DataFrame()):
														
 
															-
														
 
															+    def df_save_to_tmp_file(self, df: pd.DataFrame = pd.DataFrame()):
														
 
															+        """
														
 
															+        保存数据帧到临时文件
														
 
															+        
														
 
															+        Args:
														
 
															+            df: 数据帧
														
 
															+        """
														
 
															         df = self.trans_df_cols(df)
														
 
															         df = del_blank(df, ['wind_turbine_number'])
														
@@ -133,19 +169,34 @@ class ReadAndSaveTmp(object):
 
															         self.save_to_tmp_csv(df)
														
 
															-    def save_to_tmp_csv(self, df):
														
 
															+    def save_to_tmp_csv(self, df: pd.DataFrame):
														
 
															+        """
														
 
															+        保存到临时CSV文件
														
 
															+        
														
 
															+        Args:
														
 
															+            df: 数据帧
														
 
															+        """
														
 
															         names = set(df['wind_turbine_number'].values)
														
 
															         if names:
														
 
															-            trans_print("开始保存", str(names), "到临时文件", df.shape)
														
 
															+            debug("开始保存", str(names), "到临时文件", df.shape)
														
 
															             for name in names:
														
 
															                 self._save_to_tmp_csv_by_name(df[df['wind_turbine_number'] == name], name)
														
 
															             del df
														
 
															-            trans_print("保存", str(names), "到临时文件成功, 风机数量", len(names))
														
 
															-
														
 
															-    def merge_df(self, dir_path):
														
 
															+            debug("保存", str(names), "到临时文件成功, 风机数量", len(names))
														
 
															+
														
 
															+    def merge_df(self, dir_path: str) -> pd.DataFrame:
														
 
															+        """
														
 
															+        合并数据帧
														
 
															+        
														
 
															+        Args:
														
 
															+            dir_path: 目录路径
														
 
															+        
														
 
															+        Returns:
														
 
															+            合并后的数据帧
														
 
															+        """
														
 
															         all_files = read_excel_files(dir_path)
														
 
															-        wind_turbine_number = path.basename(dir_path)
														
 
															+        wind_turbine_number = os.path.basename(dir_path)
														
 
															         df = pd.DataFrame()
														
 
															         for file in all_files:
														
 
															             now_df = read_file_to_df(file)
														
@@ -161,8 +212,13 @@ class ReadAndSaveTmp(object):
 
															         return df
														
 
															     def read_file_and_save_tmp(self):
														
 
															+        """
														
 
															+        读取文件并保存到临时文件
														
 
															+        """
														
 
															         all_files = read_excel_files(self.pathsAndTable.get_excel_tmp_path())
														
 
															         split_count = use_files_get_max_cpu_count(all_files)
														
 
															+        # 限制最大进程数
														
 
															+        split_count = min(split_count, ParallelProcessing.MAX_PROCESSES)
														
 
															         all_arrays = split_array(all_files, split_count)
														
 
															         if self.trans_param.merge_columns:
														
@@ -172,7 +228,7 @@ class ReadAndSaveTmp(object):
 
															                         pool.starmap(self.save_merge_data, [(ar,) for ar in arr])
														
 
															                 except Exception as e:
														
 
															-                    trans_print(traceback.format_exc())
														
 
															+                    error(traceback.format_exc())
														
 
															                     message = "整理临时文件,系统返回错误:" + str(e)
														
 
															                     raise ValueError(message)
														
@@ -180,28 +236,28 @@ class ReadAndSaveTmp(object):
 
															                                                round(20 + 20 * (index + 1) / len(all_arrays), 2),
														
 
															                                                self.pathsAndTable.save_db)
														
 
															-            dirs = [path.join(self.pathsAndTable.get_merge_tmp_path(), dir_name) for dir_name in
														
 
															-                    listdir(self.pathsAndTable.get_merge_tmp_path())]
														
 
															-            dir_total_size = get_dir_size(dirs[0])
														
 
															-            # split_count = max_file_size_get_max_cpu_count(dir_total_size, memory_percent=1 / 12, cpu_percent=1 / 10)
														
 
															-            split_count = 2
														
 
															-            all_arrays = split_array(dirs, split_count)
														
 
															-            for index, arr in enumerate(all_arrays):
														
 
															-                try:
														
 
															-                    with multiprocessing.Pool(split_count) as pool:
														
 
															-                        pool.starmap(self.merge_df, [(ar,) for ar in arr])
														
 
															-
														
 
															-                except Exception as e:
														
 
															-                    trans_print(traceback.format_exc())
														
 
															-                    message = "整理临时文件,系统返回错误:" + str(e)
														
 
															-                    raise ValueError(message)
														
 
															-
														
 
															-                update_trans_transfer_progress(self.pathsAndTable.id,
														
 
															-                                               round(20 + 30 * (index + 1) / len(all_arrays), 2),
														
 
															-                                               self.pathsAndTable.save_db)
														
 
															+            dirs = [os.path.join(self.pathsAndTable.get_merge_tmp_path(), dir_name) for dir_name in
														
 
															+                    os.listdir(self.pathsAndTable.get_merge_tmp_path())]
														
 
															+            if dirs:
														
 
															+                dir_total_size = get_dir_size(dirs[0])
														
 
															+                # 限制最大进程数
														
 
															+                split_count = min(dir_total_size, ParallelProcessing.MAX_PROCESSES)
														
 
															+                all_arrays = split_array(dirs, split_count)
														
 
															+                for index, arr in enumerate(all_arrays):
														
 
															+                    try:
														
 
															+                        with multiprocessing.Pool(split_count) as pool:
														
 
															+                            pool.starmap(self.merge_df, [(ar,) for ar in arr])
														
 
															+
														
 
															+                    except Exception as e:
														
 
															+                        error(traceback.format_exc())
														
 
															+                        message = "整理临时文件,系统返回错误:" + str(e)
														
 
															+                        raise ValueError(message)
														
 
															+
														
 
															+                    update_trans_transfer_progress(self.pathsAndTable.id,
														
 
															+                                                   round(20 + 30 * (index + 1) / len(all_arrays), 2),
														
 
															+                                                   self.pathsAndTable.save_db)
														
 
															         else:
														
 
															-
														
 
															             for index, arr in enumerate(all_arrays):
														
 
															                 try:
														
 
															                     with multiprocessing.Pool(split_count) as pool:
														
@@ -209,7 +265,7 @@ class ReadAndSaveTmp(object):
 
															                     for df in dfs:
														
 
															                         self.df_save_to_tmp_file(df)
														
 
															                 except Exception as e:
														
 
															-                    trans_print(traceback.format_exc())
														
 
															+                    error(traceback.format_exc())
														
 
															                     message = "整理临时文件,系统返回错误:" + str(e)
														
 
															                     raise ValueError(message)
														
@@ -217,8 +273,16 @@ class ReadAndSaveTmp(object):
 
															                                                round(20 + 30 * (index + 1) / len(all_arrays), 2),
														
 
															                                                self.pathsAndTable.save_db)
														
 
															-    def read_excel_to_df(self, file_path):
														
 
															-
														
 
															+    def read_excel_to_df(self, file_path: str) -> pd.DataFrame:
														
 
															+        """
														
 
															+        读取Excel文件到数据帧
														
 
															+        
														
 
															+        Args:
														
 
															+            file_path: 文件路径
														
 
															+        
														
 
															+        Returns:
														
 
															+            数据帧
														
 
															+        """
														
 
															         read_cols = [v.split(",")[0] for k, v in self.trans_param.cols_tran.items() if v and not v.startswith("$")]
														
 
															         trans_dict = {}
														
@@ -300,7 +364,7 @@ class ReadAndSaveTmp(object):
 
															             for k, v in trans_dict.items():
														
 
															                 if k.startswith("$file"):
														
 
															-                    file = ".".join(path.basename(file_path).split(".")[0:-1])
														
 
															+                    file = ".".join(os.path.basename(file_path).split(".")[0:-1])
														
 
															                     if k == "$file":
														
 
															                         ks = k.split("|")
														
 
															                         bool_contains = False
														
@@ -337,7 +401,7 @@ class ReadAndSaveTmp(object):
 
															                     datas = str(k.split(",")[1].replace("$file_date", "").replace("[", "").replace("]", "")).split(":")
														
 
															                     if len(datas) != 2:
														
 
															                         raise Exception("字段映射出现错误 :" + str(trans_dict))
														
 
															-                    file = ".".join(path.basename(file_path).split(".")[0:-1])
														
 
															+                    file = ".".join(os.path.basename(file_path).split(".")[0:-1])
														
 
															                     date_str = str(file[int(datas[0]):int(datas[1])]).strip()
														
 
															                     df[v] = df[k.split(",")[0]].apply(lambda x: date_str + " " + str(x))
														
@@ -351,8 +415,8 @@ class ReadAndSaveTmp(object):
 
															                     if not bool_contains:
														
 
															                         cengshu = int(str(ks[0].replace("$folder", "").replace("[", "").replace("]", "")))
														
 
															                         for i in range(cengshu):
														
 
															-                            folder = path.dirname(folder)
														
 
															-                        df[v] = str(str(folder).split(sep)[-1]).strip()
														
 
															+                            folder = os.path.dirname(folder)
														
 
															+                        df[v] = str(str(folder).split(os.sep)[-1]).strip()
														
 
															                 elif k.startswith("$sheet_name"):
														
 
															                     df[v] = df['sheet_name']
														
@@ -374,9 +438,11 @@ class ReadAndSaveTmp(object):
 
															             return df
														
 
															     def run(self):
														
 
															-        trans_print("开始保存数据到临时文件")
														
 
															+        """
														
 
															+        """
														
 
															+        info("开始保存数据到临时文件")
														
 
															         begin = datetime.datetime.now()
														
 
															         self.read_file_and_save_tmp()
														
 
															         update_trans_transfer_progress(self.pathsAndTable.id, 50,
														
 
															                                        self.pathsAndTable.save_db)
														
 
															-        trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin)
														
 
															+        info("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin)
														
--- a/etl/wind_power/min_sec/StatisticsAndSaveTmpFormalFile.py
+++ b/etl/wind_power/min_sec/StatisticsAndSaveTmpFormalFile.py
@@ -5,15 +5,16 @@ from os import path
 
															 import numpy as np
														
 
															 import pandas as pd
														
 
															+from conf.constants import DataProcessing, ParallelProcessing, Types
														
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															 from etl.wind_power.min_sec import TransParam
														
 
															 from etl.wind_power.min_sec.ClassIdentifier import ClassIdentifier
														
 
															 from etl.wind_power.min_sec.FilterValidData import FilterValidData
														
 
															 from service.trans_conf_service import update_trans_transfer_progress
														
 
															 from utils.conf.read_conf import read_conf
														
 
															-from utils.df_utils.util import get_time_space
														
 
															-from utils.file.trans_methods import create_file_path, read_excel_files, read_file_to_df, split_array
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.df_utils.util import estimate_time_interval as get_time_space
														
 
															+from utils.file.trans_methods import create_file_path, read_excel_files, read_file_to_df
														
 
															+from utils.log.trans_log import debug, error
														
 
															 from utils.systeminfo.sysinfo import use_files_get_max_cpu_count
														
 
															 exec("import math")
														
@@ -74,13 +75,12 @@ class StatisticsAndSaveTmpFormalFile(object):
 
															             self.trans_param.wind_col_trans).fillna(df['wind_turbine_number'])
														
 
															         wind_col_name = str(df['wind_turbine_number'].values[0])
														
 
															-        not_double_cols = ['wind_turbine_number', 'wind_turbine_name', 'time_stamp', 'param6', 'param7', 'param8',
														
 
															-                           'param9', 'param10']
														
 
															+        not_double_cols = DataProcessing.NOT_DOUBLE_COLS
														
 
															         # 删除 有功功率 和 风速均为空的情况
														
 
															         df.dropna(subset=['active_power', 'wind_velocity'], how='any', inplace=True)
														
 
															-        trans_print(origin_wind_name, wind_col_name, "删除有功功率和风速有空的情况后:", df.shape)
														
 
															-        df.replace(np.nan, -999999999, inplace=True)
														
 
															+        debug(origin_wind_name, wind_col_name, "删除有功功率和风速有空的情况后:", df.shape)
														
 
															+        df.replace(np.nan, DataProcessing.NAN_REPLACE_VALUE, inplace=True)
														
 
															         number_cols = df.select_dtypes(include=['number']).columns.tolist()
														
 
															         for col in df.columns:
														
 
															             if col not in not_double_cols and col not in number_cols:
														
@@ -88,8 +88,8 @@ class StatisticsAndSaveTmpFormalFile(object):
 
															                     df[col] = pd.to_numeric(df[col], errors='coerce')
														
 
															                     # 删除包含NaN的行（即那些列A转换失败的行）
														
 
															                     df = df.dropna(subset=[col])
														
 
															-                    trans_print(origin_wind_name, wind_col_name, "删除非数值列名:", col)
														
 
															-        df.replace(-999999999, np.nan, inplace=True)
														
 
															+                    debug(origin_wind_name, wind_col_name, "删除非数值列名:", col)
														
 
															+        df.replace(DataProcessing.NAN_REPLACE_VALUE, np.nan, inplace=True)
														
 
															         df.drop_duplicates(['wind_turbine_number', 'time_stamp'], keep='first', inplace=True)
														
@@ -102,40 +102,40 @@ class StatisticsAndSaveTmpFormalFile(object):
 
															         # 删除每行有空值的行(2025-3-24)
														
 
															         # origin_count = df.shape[0]
														
 
															         # df = df.dropna()
														
 
															-        # trans_print(f'原始数据量:{origin_count},去除na后数据量:{df.shape[0]}')
														
 
															+        # trans_print(f"原始数据量:{origin_count},去除na后数据量:{df.shape[0]}")
														
 
															         # 如果秒级有可能合并到分钟级
														
 
															         # TODO add 秒转分钟
														
 
															         if self.trans_param.boolean_sec_to_min:
														
 
															             df['time_stamp'] = df['time_stamp'].apply(lambda x: x + pd.Timedelta(minutes=(10 - x.minute % 10) % 10))
														
 
															-            df['time_stamp'] = df['time_stamp'].dt.floor('10T')
														
 
															+            df['time_stamp'] = df['time_stamp'].dt.floor(DataProcessing.TIME_INTERVAL)
														
 
															             df = df.groupby(['wind_turbine_number', 'time_stamp']).mean().reset_index()
														
 
															-        trans_print('有功功率前10个', df.head(10)['active_power'].values)
														
 
															+        debug('有功功率前10个', df.head(10)['active_power'].values)
														
 
															         power_df = df[df['active_power'] > 0]
														
 
															-        trans_print(origin_wind_name, wind_col_name, "功率大于0的数量:", power_df.shape)
														
 
															+        debug(origin_wind_name, wind_col_name, "功率大于0的数量:", power_df.shape)
														
 
															         power = power_df.sample(int(power_df.shape[0] / 100))['active_power'].median()
														
 
															-        trans_print(origin_wind_name, wind_col_name, '有功功率，中位数', power)
														
 
															-        if power > 100000:
														
 
															+        debug(origin_wind_name, wind_col_name, '有功功率，中位数', power)
														
 
															+        if power > DataProcessing.POWER_UNIT_THRESHOLD:
														
 
															             df['active_power'] = df['active_power'] / 1000
														
 
															-        ## 做数据检测前,羡强行处理有功功率
														
 
															+        # 做数据检测前,羡强行处理有功功率
														
 
															         # df = df[df['active_power'] < 50000]
														
 
															         rated_power_and_cutout_speed_tuple = read_conf(self.rated_power_and_cutout_speed_map, str(wind_col_name))
														
 
															         if rated_power_and_cutout_speed_tuple is None:
														
 
															-            rated_power_and_cutout_speed_tuple = (None, None)
														
 
															-            trans_print(origin_wind_name, '未从平台匹配到额定功率')
														
 
															+            # rated_power_and_cutout_speed_tuple = (None, None)
														
 
															+            error(origin_wind_name, '未从平台匹配到额定功率')
														
 
															         else:
														
 
															-            trans_print(origin_wind_name, '过滤数据前数据大小', df.shape)
														
 
															-            trans_print(origin_wind_name, '额定功率', rated_power_and_cutout_speed_tuple[0])
														
 
															+            debug(origin_wind_name, '过滤数据前数据大小', df.shape)
														
 
															+            debug(origin_wind_name, '额定功率', rated_power_and_cutout_speed_tuple[0])
														
 
															             # trans_print(origin_wind_name, '\n', df.head(10))
														
 
															             filter_valid_data = FilterValidData(df, rated_power_and_cutout_speed_tuple[0])
														
 
															             try:
														
 
															                 df = filter_valid_data.run()
														
 
															             except:
														
 
															-                trans_print(origin_wind_name, '过滤数据异常', filename)
														
 
															+                error(origin_wind_name, '过滤数据异常', filename)
														
 
															                 raise
														
 
															-            trans_print(origin_wind_name, '过滤数据后数据大小', df.shape)
														
 
															+            debug(origin_wind_name, '过滤数据后数据大小', df.shape)
														
 
															             # 如果有需要处理的,先进行代码处理,在进行打标签
														
 
															             # exec_code = get_trans_exec_code(self.paths_and_table.exec_id, self.paths_and_table.read_type)
														
@@ -147,10 +147,10 @@ class StatisticsAndSaveTmpFormalFile(object):
 
															             if power_df.shape[0] == 0:
														
 
															                 df.loc[:, 'lab'] = -1
														
 
															             else:
														
 
															-                class_identifiler = ClassIdentifier(wind_turbine_number=origin_wind_name, origin_df=df,
														
 
															-                                                    rated_power=rated_power_and_cutout_speed_tuple[0],
														
 
															-                                                    cut_out_speed=rated_power_and_cutout_speed_tuple[1])
														
 
															-                df = class_identifiler.run()
														
 
															+                class_identifier = ClassIdentifier(wind_turbine_number=origin_wind_name, origin_df=df,
														
 
															+                                                   rated_power=rated_power_and_cutout_speed_tuple[0],
														
 
															+                                                   cut_out_speed=rated_power_and_cutout_speed_tuple[1])
														
 
															+                df = class_identifier.run()
														
 
															             del power_df
														
@@ -163,7 +163,7 @@ class StatisticsAndSaveTmpFormalFile(object):
 
															             df['year_month'] = df[['year', 'month']].apply(lambda x: str(x['year']) + str(x['month']).zfill(2), axis=1)
														
 
															             cols = df.columns
														
 
															-            if self.paths_and_table.read_type == 'second':
														
 
															+            if self.paths_and_table.read_type == Types.SECOND:
														
 
															                 type_col = 'year_month'
														
 
															             else:
														
 
															                 type_col = 'year'
														
@@ -185,29 +185,42 @@ class StatisticsAndSaveTmpFormalFile(object):
 
															             self.set_statistics_data(df)
														
 
															             del df
														
 
															-            trans_print("保存" + str(wind_col_name) + "成功")
														
 
															+            debug("保存" + str(wind_col_name) + "成功")
														
 
															-    def mutiprocessing_to_save_file(self):
														
 
															+    def multiprocessing_to_save_file(self):
														
 
															         # 开始保存到正式文件
														
 
															         all_tmp_files = read_excel_files(self.paths_and_table.get_read_tmp_path())
														
 
															-        # split_count = self.pathsAndTable.multi_pool_count
														
 
															-        split_count = use_files_get_max_cpu_count(all_tmp_files)
														
 
															-        all_arrays = split_array(all_tmp_files, split_count)
														
 
															+
														
 
															+        if not all_tmp_files:
														
 
															+            debug("没有临时文件需要处理")
														
 
															+            return
														
 
															+
														
 
															+        # 计算最佳进程数
														
 
															+        max_processes = use_files_get_max_cpu_count(all_tmp_files)
														
 
															+        max_processes = min(max_processes, len(all_tmp_files), ParallelProcessing.MAX_PROCESSES)  # 限制最大进程数
														
 
															         try:
														
 
															-            for index, arr in enumerate(all_arrays):
														
 
															-                with multiprocessing.Pool(split_count) as pool:
														
 
															-                    pool.starmap(self.save_to_csv, [(i,) for i in arr])
														
 
															-                update_trans_transfer_progress(self.paths_and_table.id,
														
 
															-                                               round(50 + 15 * (index + 1) / len(all_arrays), 2),
														
 
															-                                               self.paths_and_table.save_db)
														
 
															+            # 创建一个进程池处理所有文件
														
 
															+            with multiprocessing.Pool(max_processes) as pool:
														
 
															+                # 分批次处理并更新进度
														
 
															+                batch_size = max(1, len(all_tmp_files) // ParallelProcessing.MAX_BATCHES)  # 最多10个批次
														
 
															+
														
 
															+                for i in range(0, len(all_tmp_files), batch_size):
														
 
															+                    batch_files = all_tmp_files[i:i + batch_size]
														
 
															+                    pool.starmap(self.save_to_csv, [(file,) for file in batch_files])
														
 
															+
														
 
															+                    # 更新进度
														
 
															+                    progress = 50 + 15 * (i + len(batch_files)) / len(all_tmp_files)
														
 
															+                    update_trans_transfer_progress(self.paths_and_table.id,
														
 
															+                                                   round(progress, 2),
														
 
															+                                                   self.paths_and_table.save_db)
														
 
															         except Exception as e:
														
 
															-            trans_print(traceback.format_exc())
														
 
															+            error(traceback.format_exc())
														
 
															             message = "保存文件错误,系统返回错误:" + str(e)
														
 
															             raise ValueError(message)
														
 
															     def run(self):
														
 
															-        self.mutiprocessing_to_save_file()
														
 
															+        self.multiprocessing_to_save_file()
														
 
															         update_trans_transfer_progress(self.paths_and_table.id, 65,
														
 
															                                        self.paths_and_table.save_db)
														
--- a/etl/wind_power/min_sec/TransParam.py
+++ b/etl/wind_power/min_sec/TransParam.py
@@ -1,23 +1,58 @@
 
															 # -*- coding: utf-8 -*-
														
 
															 # @Time    : 2024/5/16
														
 
															 # @Author  : 魏志亮
														
 
															+from typing import Optional, Dict, List
														
 
															 class TransParam(object):
														
 
															+    """转换参数类
														
 
															+    
														
 
															+    存储数据转换过程中的各种参数配置
														
 
															+    """
														
 
															-    def __init__(self, read_type=None, read_path=None, cols_tran=dict(),
														
 
															-                 wind_name_exec=str(), is_vertical_table=False, vertical_cols=list(), vertical_key=None,
														
 
															-                 vertical_value=None, index_cols=list(), merge_columns=False, resolve_col_prefix=None,
														
 
															-                 need_valid_cols=True, wind_col_trans: dict = None, boolean_sec_to_min=False):
														
 
															+    def __init__(self, 
														
 
															+                 read_type: Optional[str] = None, 
														
 
															+                 read_path: Optional[str] = None, 
														
 
															+                 cols_tran: Dict[str, str] = None,
														
 
															+                 wind_name_exec: str = "", 
														
 
															+                 is_vertical_table: bool = False, 
														
 
															+                 vertical_cols: List[str] = None,
														
 
															+                 vertical_key: Optional[str] = None,
														
 
															+                 vertical_value: Optional[str] = None, 
														
 
															+                 index_cols: List[str] = None, 
														
 
															+                 merge_columns: bool = False, 
														
 
															+                 resolve_col_prefix: Optional[str] = None,
														
 
															+                 need_valid_cols: bool = True, 
														
 
															+                 wind_col_trans: Optional[Dict[str, str]] = None, 
														
 
															+                 boolean_sec_to_min: bool = False):
														
 
															+        """
														
 
															+        初始化转换参数
														
 
															+        
														
 
															+        Args:
														
 
															+            read_type: 读取类型，如 'second' 或 'minute'
														
 
															+            read_path: 读取路径
														
 
															+            cols_tran: 列名转换映射
														
 
															+            wind_name_exec: 风机名称处理表达式
														
 
															+            is_vertical_table: 是否为垂直表
														
 
															+            vertical_cols: 垂直表列名列表
														
 
															+            vertical_key: 垂直表键列
														
 
															+            vertical_value: 垂直表值列
														
 
															+            index_cols: 索引列列表
														
 
															+            merge_columns: 是否合并列
														
 
															+            resolve_col_prefix: 列名前缀解析表达式
														
 
															+            need_valid_cols: 是否需要验证列
														
 
															+            wind_col_trans: 风机列转换映射
														
 
															+            boolean_sec_to_min: 是否将秒级数据转换为分钟级
														
 
															+        """
														
 
															         self.read_type = read_type
														
 
															         self.read_path = read_path
														
 
															-        self.cols_tran = cols_tran
														
 
															+        self.cols_tran = cols_tran or {}
														
 
															         self.is_vertical_table = is_vertical_table
														
 
															         self.wind_name_exec = wind_name_exec
														
 
															-        self.vertical_cols = vertical_cols
														
 
															+        self.vertical_cols = vertical_cols or []
														
 
															         self.vertical_key = vertical_key
														
 
															         self.vertical_value = vertical_value
														
 
															-        self.index_cols = index_cols
														
 
															+        self.index_cols = index_cols or []
														
 
															         self.merge_columns = merge_columns
														
 
															         self.resolve_col_prefix = resolve_col_prefix
														
 
															         self.need_valid_cols = need_valid_cols
														
--- a/etl/wind_power/wave/WaveTrans.py
+++ b/etl/wind_power/wave/WaveTrans.py
@@ -1,14 +1,16 @@
 
															 import json
														
 
															 import multiprocessing
														
 
															 import traceback
														
 
															+from typing import Tuple
														
 
															+from conf.constants import ParallelProcessing, Types
														
 
															 from service.plt_service import get_all_wind
														
 
															 from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
														
 
															     update_trans_status_success, update_trans_status_error
														
 
															 from service.trans_service import get_wave_conf, save_df_to_db, get_or_create_wave_table, \
														
 
															     get_wave_data, delete_exist_wave_data
														
 
															 from utils.file.trans_methods import *
														
 
															-from utils.log.trans_log import set_trance_id
														
 
															+from utils.log.trans_log import set_trance_id, info, error
														
 
															 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
														
 
															 exec("from os.path import *")
														
@@ -16,8 +18,17 @@ exec("import re")
 
															 class WaveTrans(object):
														
 
															-
														
 
															-    def __init__(self, id, wind_farm_code, read_dir):
														
 
															+    """波形数据转换类"""
														
 
															+
														
 
															+    def __init__(self, id: int, wind_farm_code: str, read_dir: str):
														
 
															+        """
														
 
															+        初始化波形数据转换类
														
 
															+        
														
 
															+        Args:
														
 
															+            id: 任务ID
														
 
															+            wind_farm_code: 风电场编码
														
 
															+            read_dir: 读取目录
														
 
															+        """
														
 
															         self.id = id
														
 
															         self.wind_farm_code = wind_farm_code
														
 
															         self.read_dir = read_dir
														
@@ -28,11 +39,28 @@ class WaveTrans(object):
 
															         self.max_date = None
														
 
															         self.data_count = 0
														
 
															-    def get_data_exec(self, func_code, filepath, measupoint_names: set):
														
 
															+    def get_data_exec(self, func_code: str, filepath: str, measupoint_names: List[str]) -> Optional[Tuple]:
														
 
															+        """
														
 
															+        执行数据获取函数
														
 
															+        
														
 
															+        Args:
														
 
															+            func_code: 函数代码
														
 
															+            filepath: 文件路径
														
 
															+            measupoint_names: 测量点名称列表
														
 
															+        
														
 
															+        Returns:
														
 
															+            数据元组
														
 
															+        """
														
 
															         exec(func_code)
														
 
															         return locals()['get_data'](filepath, measupoint_names)
														
 
															-    def del_exists_data(self, df):
														
 
															+    def del_exists_data(self, df: pd.DataFrame):
														
 
															+        """
														
 
															+        删除已存在的数据
														
 
															+        
														
 
															+        Args:
														
 
															+            df: 数据帧
														
 
															+        """
														
 
															         min_date, max_date = df['time_stamp'].min(), df['time_stamp'].max()
														
 
															         db_df = get_wave_data(self.wind_farm_code + '_wave', min_date, max_date)
														
@@ -44,13 +72,17 @@ class WaveTrans(object):
 
															             delete_exist_wave_data(self.wind_farm_code + "_wave", ids)
														
 
															     def run(self):
														
 
															+        """运行波形数据转换"""
														
 
															         update_trans_status_running(self.id)
														
 
															         trance_id = '-'.join([self.wind_farm_code, 'wave'])
														
 
															         set_trance_id(trance_id)
														
 
															         all_files = read_files(self.read_dir, ['txt', 'csv'])
														
 
															         update_trans_transfer_progress(self.id, 5)
														
 
															+
														
 
															         # 最大取系统cpu的 1/2
														
 
															         split_count = get_available_cpu_count_with_percent(1 / 2)
														
 
															+        # 限制最大进程数
														
 
															+        split_count = min(split_count, ParallelProcessing.MAX_PROCESSES)
														
 
															         all_wind, _ = get_all_wind(self.wind_farm_code, False)
														
@@ -58,11 +90,11 @@ class WaveTrans(object):
 
															         wave_conf = get_wave_conf(self.wind_farm_code)
														
 
															-        base_param_exec = wave_conf['base_param_exec']
														
 
															+        base_param_exec = wave_conf.get('base_param_exec', '')
														
 
															         map_dict = {}
														
 
															         if base_param_exec:
														
 
															             base_param_exec = base_param_exec.replace('\r\n', '\n').replace('\t', '    ')
														
 
															-            trans_print(base_param_exec)
														
 
															+            info(base_param_exec)
														
 
															             if 'import ' in base_param_exec:
														
 
															                 raise Exception("方法不支持import方法")
														
@@ -72,23 +104,26 @@ class WaveTrans(object):
 
															         wind_turbine_name_set = set()
														
 
															-        all_array = split_array(all_files, split_count * 10)
														
 
															+        # 优化批次大小
														
 
															+        batch_size = split_count * 10
														
 
															+        all_array = split_array(all_files, batch_size)
														
 
															         total_index = len(all_array)
														
 
															+
														
 
															         for index, now_array in enumerate(all_array):
														
 
															             index_begin = datetime.datetime.now()
														
 
															             with multiprocessing.Pool(split_count) as pool:
														
 
															                 try:
														
 
															                     file_datas = pool.starmap(self.get_data_exec,
														
 
															                                               [(base_param_exec, i, list(map_dict.keys())) for i in now_array])
														
 
															-                    trans_print(f'总数:{len(now_array)},返回个数{len(file_datas)}')
														
 
															+                    info(f'总数:{len(now_array)},返回个数{len(file_datas)}')
														
 
															                 except Exception as e:
														
 
															                     message = str(e)
														
 
															-                    trans_print(traceback.format_exc())
														
 
															+                    error(traceback.format_exc())
														
 
															                     update_trans_status_error(self.id, message[0:len(message) if len(message) < 100 else 100])
														
 
															                     raise e
														
 
															             update_trans_transfer_progress(self.id, 20 + int(index / total_index * 60))
														
 
															-            trans_print("读取文件耗时:", datetime.datetime.now() - self.begin)
														
 
															+            info("读取文件耗时:", datetime.datetime.now() - self.begin)
														
 
															             result_list = list()
														
 
															             for file_data in file_datas:
														
@@ -96,7 +131,7 @@ class WaveTrans(object):
 
															                     wind_turbine_name, time_stamp, sampling_frequency, rotational_speed, mesure_point_name, type, mesure_data = \
														
 
															                         file_data[0], file_data[1], file_data[2], file_data[3], file_data[4], file_data[5], file_data[6]
														
 
															-                    if mesure_point_name in map_dict.keys():
														
 
															+                    if mesure_point_name in map_dict:
														
 
															                         wind_turbine_name_set.add(wind_turbine_name)
														
 
															                         if self.min_date is None or self.min_date > time_stamp:
														
 
															                             self.min_date = time_stamp
														
@@ -109,7 +144,7 @@ class WaveTrans(object):
 
															                              mesure_data])
														
 
															             if result_list:
														
 
															-                self.data_count = self.data_count + len(result_list)
														
 
															+                self.data_count += len(result_list)
														
 
															                 df = pd.DataFrame(result_list,
														
 
															                                   columns=['wind_turbine_name', 'time_stamp', 'rotational_speed', 'sampling_frequency',
														
 
															                                            'mesure_point_name', 'type', 'mesure_data'])
														
@@ -118,16 +153,16 @@ class WaveTrans(object):
 
															                 df.dropna(subset=['mesure_point_name'], inplace=True)
														
 
															                 df['wind_turbine_number'] = df['wind_turbine_name'].map(all_wind).fillna(df['wind_turbine_name'])
														
 
															+                # 批量处理JSON序列化
														
 
															                 df['mesure_data'] = df['mesure_data'].apply(lambda x: json.dumps(x))
														
 
															                 df.sort_values(by=['time_stamp', 'mesure_point_name'], inplace=True)
														
 
															                 # self.del_exists_data(df)
														
 
															                 save_df_to_db(self.wind_farm_code + '_wave', df, batch_count=400)
														
 
															-            trans_print(f"总共{total_index}组,当前{index + 1}", "本次写入耗时:", datetime.datetime.now() - index_begin,
														
 
															-                        "总耗时:", datetime.datetime.now() - self.begin)
														
 
															+            info(f"总共{total_index}组,当前{index + 1}", "本次写入耗时:", datetime.datetime.now() - index_begin,
														
 
															+                 "总耗时:", datetime.datetime.now() - self.begin)
														
 
															-        update_trans_status_success(self.id, len(wind_turbine_name_set), None,
														
 
															+        update_trans_status_success(self.id, len(wind_turbine_name_set), Types.WAVE,
														
 
															                                     self.min_date, self.max_date, self.data_count)
														
 
															-        # update_trans_status_success(self.id)
														
 
															-        trans_print("总耗时:", datetime.datetime.now() - self.begin)
														
 
															+        info("总耗时:", datetime.datetime.now() - self.begin)
														
--- a/service/common_connect.py
+++ b/service/common_connect.py
@@ -1,5 +1,5 @@
 
															-from utils.db.ConnectMysql import ConnectMysql
														
 
															+from utils.db.ConnectMysql import MySQLDatabase
														
 
															-plt = ConnectMysql("plt")
														
 
															+plt = MySQLDatabase("plt")
														
 
															-trans = ConnectMysql("trans")
														
 
															+trans = MySQLDatabase("trans")
														
--- a/service/trans_conf_service.py
+++ b/service/trans_conf_service.py
@@ -4,6 +4,7 @@
 
															 from datetime import datetime
														
 
															 from service.common_connect import trans
														
 
															+from utils.log.trans_log import info
														
 
															 def update_timeout_trans_data():
														
@@ -46,6 +47,7 @@ def update_trans_status_error(id, message="", save_db=True):
 
															         message = message if len(message) <= 200 else message[0:200]
														
 
															         trans.execute(exec_sql, (message, id))
														
 
															+    info("执行失败:", message)
														
 
															 def update_trans_status_success(id, wind_count=0, time_granularity=0,
														
@@ -70,14 +72,16 @@ def update_trans_status_success(id, wind_count=0, time_granularity=0,
 
															             trans.execute(exec_sql, (wind_count, time_granularity, id))
														
 
															-def update_trans_transfer_progress(id,  transfer_progress=0, save_db=True):
														
 
															-    print(id,  transfer_progress)
														
 
															+def update_trans_transfer_progress(id, transfer_progress=0, save_db=True):
														
 
															+    print(id, transfer_progress)
														
 
															     if save_db:
														
 
															         exec_sql = """
														
 
															         update data_transfer set transfer_progress =%s where id = %s 
														
 
															         """
														
 
															         trans.execute(exec_sql, (int(transfer_progress), id))
														
 
															+    info('当前进度:', transfer_progress)
														
 
															+
														
 
															 def get_now_running_count():
														
 
															     query_running_sql = """
														
--- a/service/trans_service.py
+++ b/service/trans_service.py
@@ -9,53 +9,65 @@ import pandas as pd
 
															 from service.common_connect import trans
														
 
															 from service.trans_conf_service import create_wave_table
														
 
															 from utils.file.trans_methods import split_array
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import info, error
														
 
															-def get_min_sec_conf(field_code, trans_type) -> dict:
														
 
															-    query_sql = "SELECT * FROM trans_conf where wind_code = %s and type = %s and status = 1"
														
 
															-    res = trans.execute(query_sql, (field_code, trans_type))
														
 
															+def get_config(table_name, field_code, trans_type=None, field_name='wind_code', status=1) -> dict:
														
 
															+    """
														
 
															+    通用配置获取函数
														
 
															+    
														
 
															+    Args:
														
 
															+        table_name: 表名
														
 
															+        field_code: 字段值
														
 
															+        trans_type: 类型参数
														
 
															+        field_name: 字段名，默认为wind_code
														
 
															+        status: 状态值，默认为1
														
 
															+        
														
 
															+    Returns:
														
 
															+        配置字典
														
 
															+    """
														
 
															+    if table_name == 'warn_fault_conf':
														
 
															+        types = list()
														
 
															+        if trans_type == 'fault':
														
 
															+            types.append(1)
														
 
															+        elif trans_type == 'warn':
														
 
															+            types.append(2)
														
 
															+        else:
														
 
															+            error(f"未找到{trans_type}告警/故障的配置")
														
 
															+            raise ValueError(f"未找到{trans_type}告警/故障的配置")
														
 
															+        types.append(3)
														
 
															+        query_sql = f"SELECT * FROM {table_name} where {field_name} = %s and type in %s and status = %s"
														
 
															+        params = (field_code, types, status)
														
 
															+    elif table_name == 'trans_conf' and field_name == 'wind_name':
														
 
															+        query_sql = f"SELECT * FROM {table_name} where {field_name} = %s and type = %s and status = %s"
														
 
															+        params = (field_code, trans_type, status)
														
 
															+    elif table_name == 'trans_conf':
														
 
															+        query_sql = f"SELECT * FROM {table_name} where {field_name} = %s and type = %s and status = %s"
														
 
															+        params = (field_code, trans_type, status)
														
 
															+    else:
														
 
															+        query_sql = f"SELECT * FROM {table_name} where {field_name} = %s and status = %s"
														
 
															+        params = (field_code, status)
														
 
															+
														
 
															+    res = trans.execute(query_sql, params)
														
 
															     if type(res) == tuple or type(res) == str:
														
 
															         return None
														
 
															     return res[0]
														
 
															-def get_min_sec_conf_test(field_code, trans_type) -> dict:
														
 
															-    query_sql = "SELECT * FROM trans_conf where wind_name = %s and type = %s and status = 1"
														
 
															-    res = trans.execute(query_sql, (field_code, trans_type))
														
 
															-    print(res)
														
 
															-    if type(res) == tuple or type(res) == str:
														
 
															-        return None
														
 
															-    return res[0]
														
 
															+def get_min_sec_conf(field_code, trans_type) -> dict:
														
 
															+    return get_config('trans_conf', field_code, trans_type)
														
 
															-def get_fault_warn_conf(field_code, trans_type) -> dict:
														
 
															-    types = list()
														
 
															-    if trans_type == 'fault':
														
 
															-        types.append(1)
														
 
															-    elif trans_type == 'warn':
														
 
															-        types.append(2)
														
 
															-    else:
														
 
															-        trans_print(f"未找到{trans_type}告警/故障的配置")
														
 
															-        raise ValueError(f"未找到{trans_type}告警/故障的配置")
														
 
															+def get_min_sec_conf_test(field_code, trans_type) -> dict:
														
 
															+    return get_config('trans_conf', field_code, trans_type, field_name='wind_name')
														
 
															-    types.append(3)
														
 
															-    query_sql = "SELECT * FROM warn_fault_conf where wind_code = %s and type in %s and status = 1"
														
 
															-    res = trans.execute(query_sql, (field_code, types))
														
 
															-    print(res)
														
 
															-    if type(res) == tuple or type(res) == str:
														
 
															-        return None
														
 
															-    return res[0]
														
 
															+def get_fault_warn_conf(field_code, trans_type) -> dict:
														
 
															+    return get_config('warn_fault_conf', field_code, trans_type)
														
 
															 def get_wave_conf(field_code) -> dict:
														
 
															-    query_sql = "SELECT * FROM wave_conf where wind_code = %s and status = 1"
														
 
															-    res = trans.execute(query_sql, (field_code))
														
 
															-    print(res)
														
 
															-    if type(res) == tuple or type(res) == str:
														
 
															-        return None
														
 
															-    return res[0]
														
 
															+    return get_config('wave_conf', field_code)
														
 
															 def creat_min_sec_table(table_name, trans_type, wind_farm_name='', use_tidb=False):
														
@@ -64,7 +76,7 @@ def creat_min_sec_table(table_name, trans_type, wind_farm_name='', use_tidb=Fals
 
															     """
														
 
															     count = trans.execute(exists_table_sql)[0]['count']
														
 
															     if count > 0:
														
 
															-        trans_print(f"{table_name}已存在")
														
 
															+        info(f"{table_name}已存在")
														
 
															     if trans_type == 'second':
														
 
															         add_key = 'KEY `year_month` (`year_month`)'
														
@@ -197,52 +209,63 @@ def drop_exists_data(table_name, wind_turbine_number, min_date, max_date):
 
															     """
														
 
															     count = trans.execute(sql)
														
 
															-    trans_print(f"删除数据{count}条，{table_name},{wind_turbine_number},{min_date},{max_date}")
														
 
															-
														
 
															+    info(f"删除数据{count}条，{table_name},{wind_turbine_number},{min_date},{max_date}")
														
 
															-def save_scada_file_to_db(table_name, file: str, wind_turbine_number, date_str, batch_count=100000, use_tidb=False):
														
 
															-    base_name = path.basename(file)
														
 
															-    df = pd.read_csv(file)
														
 
															-    # if use_tidb:
														
 
															-    #     min_date = df['time_stamp'].min()
														
 
															-    #     max_date = df['time_stamp'].max()
														
 
															-    #     # drop_exists_data(table_name, wind_turbine_number, min_date, max_date)
														
 
															-    # else:
														
 
															-    #     add_or_remove_partation(table_name, date_str, wind_turbine_number)
														
 
															-
														
 
															-    add_or_remove_partation(table_name, date_str, wind_turbine_number)
														
 
															+def save_data_to_db(table_name: str, data, batch_count=100000, wind_turbine_number=None, date_str=None, file_name=None):
														
 
															+    """
														
 
															+    通用数据保存函数
														
 
															+    
														
 
															+    Args:
														
 
															+        table_name: 表名
														
 
															+        data: 数据，可以是DataFrame或文件路径
														
 
															+        batch_count: 批处理大小
														
 
															+        wind_turbine_number: 风机编号
														
 
															+        date_str: 日期字符串
														
 
															+        file_name: 文件名
														
 
															+        
														
 
															+    Returns:
														
 
															+        None
														
 
															+    """
														
 
															     try:
														
 
															-        trans_print(f"保存{table_name},{base_name},{wind_turbine_number},数据：{df.shape[0]}")
														
 
															-        trans.execute_df_save(df, table_name, batch_count)
														
 
															-        trans_print(f"保存到{table_name},{base_name},{wind_turbine_number} 成功,总条数:{df.shape[0]}")
														
 
															+        # 处理数据
														
 
															+        if isinstance(data, str):
														
 
															+            # 从文件读取数据
														
 
															+            df = pd.read_csv(data)
														
 
															+            file_name = file_name or path.basename(data)
														
 
															+        else:
														
 
															+            # 直接使用DataFrame
														
 
															+            df = data
														
 
															+
														
 
															+        # 处理分区
														
 
															+        if wind_turbine_number and date_str:
														
 
															+            add_or_remove_partation(table_name, date_str, wind_turbine_number)
														
 
															+
														
 
															+        # 保存数据
														
 
															+        if wind_turbine_number:
														
 
															+            trans.execute_df_save(df, table_name, batch_count)
														
 
															+            info(f"保存到{table_name},{file_name},{wind_turbine_number} 成功,总条数:{df.shape[0]}")
														
 
															+        else:
														
 
															+            trans.execute_df_save(df, table_name, batch_count)
														
 
															+            info(f"保存到{table_name}成功,总条数:{df.shape[0]}")
														
 
															     except Exception as e:
														
 
															-        trans_print(traceback.format_exc())
														
 
															-        message = base_name + str(e)
														
 
															+        if file_name:
														
 
															+            message = file_name + str(e)
														
 
															+        else:
														
 
															+            message = str(e)
														
 
															         raise Exception(message)
														
 
															+def save_scada_file_to_db(table_name, file: str, wind_turbine_number, date_str, batch_count=100000, use_tidb=False):
														
 
															+    save_data_to_db(table_name, file, batch_count, wind_turbine_number, date_str)
														
 
															+
														
 
															+
														
 
															 def save_file_to_db(table_name: str, file: str, batch_count=100000):
														
 
															-    base_name = path.basename(file)
														
 
															-    try:
														
 
															-        df = pd.read_csv(file)
														
 
															-        trans_print(f"保存{table_name},总条数：{df.shape[0]}")
														
 
															-        trans.execute_df_save(df, table_name, batch_count)
														
 
															-        trans_print(f"保存到{table_name}成功,总条数:{df.shape[0]}")
														
 
															-    except Exception as e:
														
 
															-        trans_print(traceback.format_exc())
														
 
															-        message = base_name + str(e)
														
 
															-        raise Exception(message)
														
 
															+    save_data_to_db(table_name, file, batch_count)
														
 
															-def save_df_to_db(table_name: str, df: pd.DataFrame(), batch_count=100000):
														
 
															-    try:
														
 
															-        trans_print(f"保存{table_name},总条数：{df.shape[0]}")
														
 
															-        trans.execute_df_save(df, table_name, batch_count)
														
 
															-        trans_print(f"保存到{table_name}成功,总条数:{df.shape[0]}")
														
 
															-    except Exception as e:
														
 
															-        trans_print(traceback.format_exc())
														
 
															-        raise Exception(str(e))
														
 
															+def save_df_to_db(table_name: str, df: pd.DataFrame, batch_count=100000):
														
 
															+    save_data_to_db(table_name, df, batch_count)
														
 
															 def batch_statistics(table_name):
														
@@ -251,7 +274,7 @@ def batch_statistics(table_name):
 
															         res = trans.execute(query_sql)
														
 
															         return res[0]
														
 
															     except:
														
 
															-        trans_print(traceback.format_exc())
														
 
															+        error(traceback.format_exc())
														
 
															         return None
														
@@ -319,7 +342,7 @@ def get_trans_exec_code(id, query_type):
 
															     if type(res) == tuple or type(res) == str:
														
 
															         return None
														
 
															     exec_code = res[0]['exec_code']
														
 
															-    trans_print("任务ID", id, '类型', type, '获取到执行代码:', exec_code)
														
 
															+    info("任务ID", id, '类型', type, '获取到执行代码:', exec_code)
														
 
															     return exec_code
														
--- a/utils/common.py
+++ b/utils/common.py
@@ -1,3 +1,5 @@
 
															-excel_types = ['xls', 'xlsx', 'xlsm', 'xlsb', 'odf', 'ods', 'csv', 'csv.gz']
														
 
															+from conf.constants import FileTypes
														
 
															-zip_types = ['rar', 'zip']
														
 
															+excel_types = FileTypes.EXCEL_TYPES
														
 
															+
														
 
															+zip_types = FileTypes.ZIP_TYPES
														
--- a/utils/conf/read_conf.py
+++ b/utils/conf/read_conf.py
@@ -1,22 +1,147 @@
 
															 # -*- coding: utf-8 -*-
														
 
															 # @Time    : 2024/6/7
														
 
															 # @Author  : 魏志亮
														
 
															+import os
														
 
															 import yaml
														
 
															+from typing import Any, Optional, Dict
														
 
															-def yaml_conf(path, encoding='utf-8'):
														
 
															-    with open(path, 'r', encoding=encoding) as f:
														
 
															-        data = yaml.safe_load(f)
														
 
															-    return data
														
 
															+def load_yaml_config(file_path: str, encoding: str = 'utf-8') -> Dict[str, Any]:
														
 
															+    """
														
 
															+    加载YAML配置文件
														
 
															+    
														
 
															+    Args:
														
 
															+        file_path: YAML文件路径
														
 
															+        encoding: 文件编码，默认为utf-8
														
 
															+        
														
 
															+    Returns:
														
 
															+        解析后的配置字典
														
 
															+        
														
 
															+    Raises:
														
 
															+        FileNotFoundError: 文件不存在时抛出
														
 
															+        yaml.YAMLError: YAML解析错误时抛出
														
 
															+    """
														
 
															+    try:
														
 
															+        with open(file_path, 'r', encoding=encoding) as f:
														
 
															+            data = yaml.safe_load(f)
														
 
															+            # 确保返回字典类型，防止YAML文件为空时返回None
														
 
															+            return data if isinstance(data, dict) else {}
														
 
															+    except FileNotFoundError:
														
 
															+        raise FileNotFoundError(f"配置文件不存在: {file_path}")
														
 
															+    except yaml.YAMLError as e:
														
 
															+        raise yaml.YAMLError(f"YAML解析错误: {e}")
														
 
															-def read_conf(dict_conf, col, default_value=None):
														
 
															-    if col in dict_conf:
														
 
															-        res = dict_conf[col]
														
 
															-        if res is None and default_value is not None:
														
 
															-            return default_value
														
 
															-        return res
														
 
															-    else:
														
 
															-        return default_value
														
 
															+def get_config_value(config: Dict[str, Any], key: str, default: Optional[Any] = None) -> Any:
														
 
															+    """
														
 
															+    从配置字典中安全地获取值
														
 
															+    
														
 
															+    Args:
														
 
															+        config: 配置字典
														
 
															+        key: 配置键名
														
 
															+        default: 默认值，当键不存在或值为None时返回
														
 
															+        
														
 
															+    Returns:
														
 
															+        配置值或默认值
														
 
															+    """
														
 
															+    # 处理config为None的情况
														
 
															+    if config is None:
														
 
															+        return default
														
 
															+    
														
 
															+    # 支持嵌套键，如 "database.host"
														
 
															+    keys = key.split('.')
														
 
															+    value = config
														
 
															+    
														
 
															+    for k in keys:
														
 
															+        if isinstance(value, dict) and k in value:
														
 
															+            value = value[k]
														
 
															+        else:
														
 
															+            value = None
														
 
															+            break
														
 
															+    
														
 
															+    # 如果值为None且提供了默认值，返回默认值
														
 
															+    if value is None and default is not None:
														
 
															+        return default
														
 
															+    
														
 
															+    return value
														
 
															+
														
 
															+def merge_configs(base_config: Dict[str, Any], override_config: Dict[str, Any]) -> Dict[str, Any]:
														
 
															+    """
														
 
															+    合并配置字典
														
 
															+    
														
 
															+    Args:
														
 
															+        base_config: 基础配置
														
 
															+        override_config: 覆盖配置
														
 
															+        
														
 
															+    Returns:
														
 
															+        合并后的配置
														
 
															+    """
														
 
															+    result = base_config.copy()
														
 
															+    
														
 
															+    for key, value in override_config.items():
														
 
															+        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
														
 
															+            # 递归合并嵌套字典
														
 
															+            result[key] = merge_configs(result[key], value)
														
 
															+        else:
														
 
															+            # 直接覆盖
														
 
															+            result[key] = value
														
 
															+    
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+def load_config_with_env(file_path: str, encoding: str = 'utf-8') -> Dict[str, Any]:
														
 
															+    """
														
 
															+    加载配置文件并支持环境变量覆盖
														
 
															+    
														
 
															+    Args:
														
 
															+        file_path: YAML文件路径
														
 
															+        encoding: 文件编码，默认为utf-8
														
 
															+        
														
 
															+    Returns:
														
 
															+        解析后的配置字典
														
 
															+    """
														
 
															+    # 加载基础配置
														
 
															+    base_config = load_yaml_config(file_path, encoding)
														
 
															+    
														
 
															+    # 检查是否有环境变量覆盖
														
 
															+    env_prefix = "ETL_"
														
 
															+    override_config = {}
														
 
															+    
														
 
															+    for key, value in os.environ.items():
														
 
															+        if key.startswith(env_prefix):
														
 
															+            # 转换环境变量名到配置键名
														
 
															+            config_key = key[len(env_prefix):].lower().replace('_', '.')
														
 
															+            
														
 
															+            # 解析值
														
 
															+            if value.lower() == 'true':
														
 
															+                parsed_value = True
														
 
															+            elif value.lower() == 'false':
														
 
															+                parsed_value = False
														
 
															+            elif value.isdigit():
														
 
															+                parsed_value = int(value)
														
 
															+            elif '.' in value and all(part.isdigit() for part in value.split('.')):
														
 
															+                parsed_value = float(value)
														
 
															+            else:
														
 
															+                parsed_value = value
														
 
															+            
														
 
															+            # 构建嵌套配置
														
 
															+            keys = config_key.split('.')
														
 
															+            current = override_config
														
 
															+            for k in keys[:-1]:
														
 
															+                if k not in current:
														
 
															+                    current[k] = {}
														
 
															+                current = current[k]
														
 
															+            current[keys[-1]] = parsed_value
														
 
															+    
														
 
															+    # 合并配置
														
 
															+    if override_config:
														
 
															+        base_config = merge_configs(base_config, override_config)
														
 
															+    
														
 
															+    return base_config
														
 
															+
														
 
															+
														
 
															+# 为了保持向后兼容，保留原函数名（可选）
														
 
															+yaml_conf = load_yaml_config
														
 
															+read_conf = get_config_value
														
--- a/utils/db/ConnectMysql.py
+++ b/utils/db/ConnectMysql.py
@@ -1,56 +1,246 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+# @Time    : 2024/6/7
														
 
															+# @Author  : 魏志亮
														
 
															+
														
 
															+import os
														
 
															 import traceback
														
 
															-from os import *
														
 
															+from typing import Any, Dict, List, Tuple, Union
														
 
															 import pandas as pd
														
 
															 import pymysql
														
 
															 from pymysql.cursors import DictCursor
														
 
															 from sqlalchemy import create_engine
														
 
															+from sqlalchemy.engine import Engine
														
 
															+
														
 
															+from utils.conf.read_conf import load_yaml_config
														
 
															+from utils.log.trans_log import error, info, debug
														
 
															+
														
 
															+
														
 
															+class MySQLDatabase:
														
 
															+    """MySQL数据库连接管理类"""
														
 
															+
														
 
															+    # 类级别的引擎缓存，避免重复创建
														
 
															+    _engine_cache = {}
														
 
															+
														
 
															+    def __init__(self, connection_name: str):
														
 
															+        """
														
 
															+        初始化MySQL数据库连接
														
 
															+        
														
 
															+        Args:
														
 
															+            connection_name: 配置文件中对应的连接名称
														
 
															+        """
														
 
															+        # 获取配置文件路径
														
 
															+        config_path = os.environ.get('ETL_CONF')
														
 
															+        if not config_path:
														
 
															+            raise ValueError("环境变量 ETL_CONF 未设置")
														
 
															+
														
 
															+        # 加载配置
														
 
															+        self.yaml_data = load_yaml_config(config_path)
														
 
															+        self.connection_name = connection_name
														
 
															+
														
 
															+        # 验证配置是否存在
														
 
															+        if connection_name not in self.yaml_data:
														
 
															+            raise KeyError(f"配置中不存在连接名称: {connection_name}")
														
 
															+
														
 
															+        self.config = self.yaml_data[connection_name]
														
 
															+        self.database = self.config.get('database', '')
														
 
															+
														
 
															+        # 验证必要配置项
														
 
															+        required_keys = ['host', 'user', 'password', 'database']
														
 
															+        missing_keys = [key for key in required_keys if key not in self.config]
														
 
															+        if missing_keys:
														
 
															+            raise KeyError(f"连接配置缺少必要项: {missing_keys}")
														
 
															+
														
 
															+    def get_connection(self) -> pymysql.Connection:
														
 
															+        """
														
 
															+        从连接池中获取一个连接
														
 
															+        
														
 
															+        Returns:
														
 
															+            pymysql连接对象
														
 
															+        """
														
 
															+        # 创建连接配置副本，避免修改原配置
														
 
															+        conn_config = self.config.copy()
														
 
															+        # 移除可能不需要的配置项（如果有）
														
 
															+        conn_config.pop('charset', None)  # pymysql连接时charset参数可能会冲突
														
 
															+
														
 
															+        return pymysql.connect(
														
 
															+            cursorclass=DictCursor,
														
 
															+            charset='utf8mb4',
														
 
															+            **conn_config
														
 
															+        )
														
 
															+
														
 
															+    def execute_query(self, sql: str, params: Union[Tuple, List, Dict] = None) -> List[Dict[str, Any]]:
														
 
															+        """
														
 
															+        执行SQL查询并返回结果
														
 
															+        
														
 
															+        Args:
														
 
															+            sql: SQL语句
														
 
															+            params: SQL参数，可以是元组、列表或字典
														
 
															+            
														
 
															+        Returns:
														
 
															+            查询结果列表，每个元素为字典形式
														
 
															+            
														
 
															+        Raises:
														
 
															+            Exception: SQL执行错误时抛出
														
 
															+        """
														
 
															+        params = params or ()
														
 
															+        conn = None
														
 
															+        cursor = None
														
 
															+
														
 
															+        try:
														
 
															+            conn = self.get_connection()
														
 
															+            cursor = conn.cursor()
														
 
															+
														
 
															+            # 执行SQL
														
 
															+            cursor.execute(sql, params)
														
 
															+            debug("开始执行SQL:\n", cursor._executed)
														
 
															-from utils.conf.read_conf import yaml_conf
														
 
															-from utils.log.trans_log import trans_print
														
 
															+            # 提交事务
														
 
															+            conn.commit()
														
 
															+            # 获取结果
														
 
															+            result = cursor.fetchall()
														
 
															+            return result
														
 
															-class ConnectMysql:
														
 
															+        except Exception as e:
														
 
															+            error(f"执行SQL出错: {sql}")
														
 
															+            error(f"错误信息: {e}")
														
 
															+            error(traceback.format_exc())
														
 
															-    def __init__(self, connet_name):
														
 
															-        self.yaml_data = yaml_conf(environ.get('ETL_CONF'))
														
 
															-        self.connet_name = connet_name
														
 
															-        self.config = self.yaml_data[self.connet_name]
														
 
															-        self.database = self.config['database']
														
 
															+            if conn:
														
 
															+                conn.rollback()
														
 
															+            raise e
														
 
															-    # 从连接池中获取一个连接
														
 
															-    def get_conn(self):
														
 
															-        return pymysql.connect(**self.config)
														
 
															+        finally:
														
 
															+            # 确保资源被释放
														
 
															+            if cursor:
														
 
															+                cursor.close()
														
 
															+            if conn:
														
 
															+                conn.close()
														
 
															-    # 使用连接执行sql
														
 
															-    def execute(self, sql, params=tuple()):
														
 
															+    def execute_update(self, sql: str, params: Union[Tuple, List, Dict] = None) -> int:
														
 
															+        """
														
 
															+        执行更新操作（INSERT, UPDATE, DELETE）
														
 
															+        
														
 
															+        Args:
														
 
															+            sql: SQL语句
														
 
															+            params: SQL参数
														
 
															+            
														
 
															+        Returns:
														
 
															+            影响的行数
														
 
															+        """
														
 
															+        params = params or ()
														
 
															+        conn = None
														
 
															+        cursor = None
														
 
															-        with self.get_conn() as conn:
														
 
															-            with conn.cursor(cursor=DictCursor) as cursor:
														
 
															-                try:
														
 
															-                    cursor.execute(sql, params)
														
 
															-                    trans_print("开始执行SQL:", cursor._executed)
														
 
															-                    conn.commit()
														
 
															-                    result = cursor.fetchall()
														
 
															-                    return result
														
 
															-                except Exception as e:
														
 
															-                    trans_print(f"执行sql：{sql}，报错：{e}")
														
 
															-                    trans_print(traceback.format_exc())
														
 
															-                    conn.rollback()
														
 
															-                    raise e
														
 
															+        try:
														
 
															+            conn = self.get_connection()
														
 
															+            cursor = conn.cursor()
														
 
															-    def get_engine(self):
														
 
															+            cursor.execute(sql, params)
														
 
															+            debug("开始执行SQL:", cursor._executed)
														
 
															+
														
 
															+            conn.commit()
														
 
															+            return cursor.rowcount
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            error(f"执行更新SQL出错: {sql}")
														
 
															+            error(f"错误信息: {e}")
														
 
															+            error(traceback.format_exc())
														
 
															+
														
 
															+            if conn:
														
 
															+                conn.rollback()
														
 
															+            raise e
														
 
															+
														
 
															+        finally:
														
 
															+            if cursor:
														
 
															+                cursor.close()
														
 
															+            if conn:
														
 
															+                conn.close()
														
 
															+
														
 
															+    def get_engine(self) -> Engine:
														
 
															+        """
														
 
															+        获取SQLAlchemy引擎，使用缓存避免重复创建
														
 
															+        
														
 
															+        Returns:
														
 
															+            SQLAlchemy引擎对象
														
 
															+        """
														
 
															+        # 构建缓存键
														
 
															         config = self.config
														
 
															-        username = config['user']
														
 
															-        password = config['password']
														
 
															-        host = config['host']
														
 
															-        port = config['port']
														
 
															-        dbname = config['database']
														
 
															-        return create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{dbname}')
														
 
															-
														
 
															-    def execute_df_save(self, df, table_name, chunk_size=10000):
														
 
															-        df.to_sql(table_name, self.get_engine(), index=False, if_exists='append', chunksize=chunk_size)
														
 
															-
														
 
															-    def read_sql_to_df(self, sql):
														
 
															-        df = pd.read_sql_query(sql, self.get_engine())
														
 
															-        return df
														
 
															+        cache_key = f"{config['host']}:{config['port']}:{config['user']}:{config['database']}"
														
 
															+
														
 
															+        # 检查缓存中是否已有引擎
														
 
															+        if cache_key not in self._engine_cache:
														
 
															+            username = config['user']
														
 
															+            password = config['password']
														
 
															+            host = config['host']
														
 
															+            port = config['port']
														
 
															+            dbname = config['database']
														
 
															+
														
 
															+            # 构建连接URL
														
 
															+            connection_url = f'mysql+pymysql://{username}:{password}@{host}:{port}/{dbname}?charset=utf8mb4'
														
 
															+
														
 
															+            # 创建引擎并缓存
														
 
															+            self._engine_cache[cache_key] = create_engine(
														
 
															+                connection_url,
														
 
															+                pool_size=10,  # 增加连接池大小
														
 
															+                pool_recycle=3600,
														
 
															+                pool_pre_ping=True,  # 连接池预ping，确保连接有效
														
 
															+                echo=False  # 设置为True可打印SQL日志
														
 
															+            )
														
 
															+
														
 
															+        return self._engine_cache[cache_key]
														
 
															+
														
 
															+    def save_dataframe(self, df: pd.DataFrame, table_name: str, chunk_size: int = 10000,
														
 
															+                       if_exists: str = 'append') -> None:
														
 
															+        """
														
 
															+        将DataFrame保存到数据库表
														
 
															+        
														
 
															+        Args:
														
 
															+            df: pandas DataFrame对象
														
 
															+            table_name: 目标表名
														
 
															+            chunk_size: 每批写入的行数
														
 
															+            if_exists: 表存在时的处理方式：'fail', 'replace', 'append'
														
 
															+        """
														
 
															+        try:
														
 
															+            df.to_sql(
														
 
															+                table_name,
														
 
															+                self.get_engine(),
														
 
															+                index=False,
														
 
															+                if_exists=if_exists,
														
 
															+                chunksize=chunk_size,
														
 
															+                method='multi'  # 使用多值插入提高性能
														
 
															+            )
														
 
															+            info(f"成功保存 {len(df)} 条数据到表 {table_name}")
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            error(f"保存DataFrame到表 {table_name} 失败: {e}")
														
 
															+            error(traceback.format_exc())
														
 
															+            raise e
														
 
															+
														
 
															+    def read_sql_to_dataframe(self, sql: str) -> pd.DataFrame:
														
 
															+        """
														
 
															+        执行SQL查询并返回DataFrame
														
 
															+        
														
 
															+        Args:
														
 
															+            sql: SQL查询语句
														
 
															+            
														
 
															+        Returns:
														
 
															+            查询结果的DataFrame
														
 
															+        """
														
 
															+        try:
														
 
															+            df = pd.read_sql_query(sql, self.get_engine())
														
 
															+            debug(f"查询返回 {len(df)} 行数据")
														
 
															+            return df
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            error(f"执行SQL查询失败: {sql}")
														
 
															+            error(f"错误信息: {e}")
														
 
															+            error(traceback.format_exc())
														
 
															+            raise e
														
 
															+
														
 
															+    # 为了保持向后兼容，保留原方法名（可选）
														
 
															+    get_conn = get_connection
														
 
															+    execute = execute_query
														
 
															+    execute_df_save = save_dataframe
														
 
															+    read_sql_to_df = read_sql_to_dataframe
														
--- a/utils/db/ConnectMysql_tidb_fix.py
+++ b/utils/db/ConnectMysql_tidb_fix.py
@@ -8,7 +8,7 @@ from pymysql.cursors import DictCursor
 
															 from sqlalchemy import create_engine
														
 
															 from utils.conf.read_conf import yaml_conf
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from utils.log.trans_log import error, debug
														
 
															 class ConnectMysql:
														
@@ -30,13 +30,13 @@ class ConnectMysql:
 
															             with conn.cursor(cursor=DictCursor) as cursor:
														
 
															                 try:
														
 
															                     cursor.execute(sql, params)
														
 
															-                    trans_print("开始执行SQL:", cursor._executed)
														
 
															+                    debug("开始执行SQL:", cursor._executed)
														
 
															                     conn.commit()
														
 
															                     result = cursor.fetchall()
														
 
															                     return result
														
 
															                 except Exception as e:
														
 
															-                    trans_print(f"执行sql：{sql}，报错：{e}")
														
 
															-                    trans_print(traceback.format_exc())
														
 
															+                    error(f"执行sql：{sql}，报错：{e}")
														
 
															+                    error(traceback.format_exc())
														
 
															                     conn.rollback()
														
 
															                     raise e
														
@@ -66,10 +66,10 @@ class ConnectMysql:
 
															                     df.to_sql(table_name, engine, if_exists='append', index=False, chunksize=chunksize)
														
 
															                 except Exception as e:
														
 
															                     retry_count += 1
														
 
															-                    trans_print(f" 第 {retry_count} 次重试, 错误: {str(e)}")
														
 
															+                    error(f" 第 {retry_count} 次重试, 错误: {str(e)}")
														
 
															                     time.sleep(5 * retry_count)  # 指数退避
														
 
															                     if retry_count == max_retries:
														
 
															-                        trans_print(f"处理失败: {str(e)}")
														
 
															+                        error(f"处理失败: {str(e)}")
														
 
															                         raise
														
 
															         except Exception as e:
														
 
															             engine.dispose()
														
--- a/utils/df_utils/util.py
+++ b/utils/df_utils/util.py
@@ -6,7 +6,7 @@ import datetime
 
															 import pandas as pd
														
 
															-def get_time_space(df, time_str):
														
 
															+def estimate_time_interval(df, time_str):
														
 
															     """
														
 
															     :return: 查询时间间隔
														
 
															     """
														
@@ -15,7 +15,6 @@ def get_time_space(df, time_str):
 
															     df1['chazhi'] = df1[time_str].shift(-1) - df1[time_str]
														
 
															     result = df1.sample(int(df1.shape[0] / 100))['chazhi'].value_counts().idxmax().seconds
														
 
															     del df1
														
 
															-    print(datetime.datetime.now() - begin)
														
 
															     return result
														
@@ -46,7 +45,7 @@ def calculate_time_difference(now: datetime.datetime, date: datetime.datetime):
 
															 if __name__ == '__main__':
														
 
															     df = pd.read_csv(r"D:\data\清理数据\密马风电场\test_11_test\minute\WOG00469.csv")
														
 
															     df['time_stamp'] = pd.to_datetime(df['time_stamp'])
														
 
															-    space = get_time_space(df, 'time_stamp')
														
 
															+    space = estimate_time_interval(df, 'time_stamp')
														
 
															     min = df['time_stamp'].min()
														
 
															     max = df['time_stamp'].max()
														
 
															     result = get_time_space_count(min, max, space)
														
--- a/utils/file/trans_methods.py
+++ b/utils/file/trans_methods.py
@@ -6,25 +6,35 @@ import datetime
 
															 import os
														
 
															 import shutil
														
 
															 import warnings
														
 
															+from typing import List, Dict, Optional
														
 
															 import chardet
														
 
															 import pandas as pd
														
 
															-from utils.common import excel_types, zip_types
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from conf.constants import FileTypes
														
 
															+from utils.log.trans_log import error, debug
														
 
															 warnings.filterwarnings("ignore")
														
 
															 # 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															+def detect_file_encoding(filename: str) -> str:
														
 
															+    """
														
 
															+    检测文件编码
														
 
															+    
														
 
															+    Args:
														
 
															+        filename: 文件路径
														
 
															+    
														
 
															+    Returns:
														
 
															+        检测到的编码
														
 
															+    """
														
 
															     # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															     with open(filename, 'rb') as f:
														
 
															         rawdata = f.read(1000)
														
 
															     result = chardet.detect(rawdata)
														
 
															     encoding = result['encoding']
														
 
															-    trans_print("文件类型:", filename, encoding)
														
 
															+    debug("文件类型:", filename, encoding)
														
 
															     if encoding is None:
														
 
															         encoding = 'gb18030'
														
@@ -35,19 +45,52 @@ def detect_file_encoding(filename):
 
															     return 'gb18030'
														
 
															-def del_blank(df=pd.DataFrame(), cols=list()):
														
 
															+def del_blank(df: pd.DataFrame = pd.DataFrame(), cols: Optional[List[str]] = None) -> pd.DataFrame:
														
 
															+    """
														
 
															+    删除指定列的空白字符
														
 
															+    
														
 
															+    Args:
														
 
															+        df: 数据帧
														
 
															+        cols: 要处理的列列表
														
 
															+    
														
 
															+    Returns:
														
 
															+        处理后的数据帧
														
 
															+    """
														
 
															+    if cols is None:
														
 
															+        cols = []
														
 
															     for col in cols:
														
 
															-        if df[col].dtype == object:
														
 
															+        if col in df.columns and df[col].dtype == object:
														
 
															             df[col] = df[col].str.strip()
														
 
															     return df
														
 
															 # 切割数组到多个数组
														
 
															-def split_array(array, num):
														
 
															+def split_array(array: List, num: int) -> List[List]:
														
 
															+    """
														
 
															+    将数组切割成多个子数组
														
 
															+    
														
 
															+    Args:
														
 
															+        array: 原始数组
														
 
															+        num: 每个子数组的长度
														
 
															+    
														
 
															+    Returns:
														
 
															+        子数组列表
														
 
															+    """
														
 
															     return [array[i:i + num] for i in range(0, len(array), num)]
														
 
															-def find_read_header(file_path, trans_cols, resolve_col_prefix=None):
														
 
															+def find_read_header(file_path: str, trans_cols: List[str], resolve_col_prefix: Optional[str] = None) -> Optional[int]:
														
 
															+    """
														
 
															+    查找文件的表头行
														
 
															+    
														
 
															+    Args:
														
 
															+        file_path: 文件路径
														
 
															+        trans_cols: 要匹配的列名列表
														
 
															+        resolve_col_prefix: 列名前缀解析表达式
														
 
															+    
														
 
															+    Returns:
														
 
															+        表头行索引
														
 
															+    """
														
 
															     df = read_file_to_df(file_path, nrows=20)
														
 
															     df.reset_index(inplace=True)
														
 
															     count = 0
														
@@ -59,7 +102,7 @@ def find_read_header(file_path, trans_cols, resolve_col_prefix=None):
 
															     for col in trans_cols:
														
 
															         if col in df_cols:
														
 
															-            count = count + 1
														
 
															+            count += 1
														
 
															             if count >= 2:
														
 
															                 header = 0
														
 
															                 break
														
@@ -73,7 +116,7 @@ def find_read_header(file_path, trans_cols, resolve_col_prefix=None):
 
															             values = row.values
														
 
															         for col in trans_cols:
														
 
															             if col in values:
														
 
															-                count = count + 1
														
 
															+                count += 1
														
 
															                 if count > 2:
														
 
															                     header = index + 1
														
 
															                     return header
														
@@ -82,30 +125,44 @@ def find_read_header(file_path, trans_cols, resolve_col_prefix=None):
 
															 # 读取数据到df
														
 
															-def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None, not_find_header='raise',
														
 
															-                    resolve_col_prefix=None):
														
 
															+def read_file_to_df(file_path: str, read_cols: Optional[List[str]] = None, trans_cols: Optional[List[str]] = None,
														
 
															+                    nrows: Optional[int] = None, not_find_header: str = 'raise',
														
 
															+                    resolve_col_prefix: Optional[str] = None) -> pd.DataFrame:
														
 
															+    """
														
 
															+    读取文件到数据帧
														
 
															+    
														
 
															+    Args:
														
 
															+        file_path: 文件路径
														
 
															+        read_cols: 要读取的列列表
														
 
															+        trans_cols: 要匹配的列名列表
														
 
															+        nrows: 读取的行数
														
 
															+        not_find_header: 未找到表头时的处理方式
														
 
															+        resolve_col_prefix: 列名前缀解析表达式
														
 
															+    
														
 
															+    Returns:
														
 
															+        读取的数据帧
														
 
															+    """
														
 
															     begin = datetime.datetime.now()
														
 
															-    trans_print('开始读取文件', file_path)
														
 
															+    debug('开始读取文件', file_path)
														
 
															     header = 0
														
 
															-    find_cols = list()
														
 
															     if trans_cols:
														
 
															         header = find_read_header(file_path, trans_cols, resolve_col_prefix)
														
 
															-        trans_print(os.path.basename(file_path), "读取第", header, "行")
														
 
															+        debug(os.path.basename(file_path), "读取第", header, "行")
														
 
															         if header is None:
														
 
															             if not_find_header == 'raise':
														
 
															                 message = '未匹配到开始行，请检查并重新指定'
														
 
															-                trans_print(message)
														
 
															+                debug(message)
														
 
															                 raise Exception(message)
														
 
															             elif not_find_header == 'ignore':
														
 
															                 pass
														
 
															-    # read_cols.extend(find_cols)
														
 
															     df = pd.DataFrame()
														
 
															     if header is not None:
														
 
															         try:
														
 
															-            if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
														
 
															+            file_path_lower = str(file_path).lower()
														
 
															+            if file_path_lower.endswith("csv") or file_path_lower.endswith("gz"):
														
 
															                 encoding = detect_file_encoding(file_path)
														
 
															-                end_with_gz = str(file_path).lower().endswith("gz")
														
 
															+                end_with_gz = file_path_lower.endswith("gz")
														
 
															                 if read_cols:
														
 
															                     if end_with_gz:
														
 
															                         df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip',
														
@@ -115,7 +172,6 @@ def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None, no
 
															                         df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header,
														
 
															                                          on_bad_lines='warn', nrows=nrows)
														
 
															                 else:
														
 
															-
														
 
															                     if end_with_gz:
														
 
															                         df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header, nrows=nrows)
														
 
															                     else:
														
@@ -135,16 +191,25 @@ def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None, no
 
															                     now_df['sheet_name'] = sheet_name
														
 
															                     df = pd.concat([df, now_df])
														
 
															                 xls.close()
														
 
															-            trans_print('文件读取成功:', file_path, '数据数量:', df.shape, '耗时:', datetime.datetime.now() - begin)
														
 
															+            debug('文件读取成功:', file_path, '数据数量:', df.shape, '耗时:', datetime.datetime.now() - begin)
														
 
															         except Exception as e:
														
 
															-            trans_print('读取文件出错', file_path, str(e))
														
 
															+            error('读取文件出错', file_path, str(e))
														
 
															             message = '文件:' + os.path.basename(file_path) + ',' + str(e)
														
 
															             raise ValueError(message)
														
 
															     return df
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															+def __build_directory_dict(directory_dict: Dict[str, List[str]], path: str,
														
 
															+                           filter_types: Optional[List[str]] = None) -> None:
														
 
															+    """
														
 
															+    构建目录文件字典
														
 
															+    
														
 
															+    Args:
														
 
															+        directory_dict: 目录文件字典
														
 
															+        path: 目录路径
														
 
															+        filter_types: 文件类型过滤器
														
 
															+    """
														
 
															     # 遍历目录下的所有项
														
 
															     for item in os.listdir(path):
														
 
															         item_path = os.path.join(path, item)
														
@@ -156,18 +221,31 @@ def __build_directory_dict(directory_dict, path, filter_types=None):
 
															             if filter_types is None or len(filter_types) == 0:
														
 
															                 directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															+            else:
														
 
															+                # 获取文件扩展名
														
 
															+                ext = os.path.splitext(item_path)[1].lstrip('.').lower()
														
 
															+                if ext in filter_types and "~$" not in item_path:
														
 
															                     directory_dict[path].append(item_path)
														
 
															 # 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path, filter_types=None):
														
 
															+def read_excel_files(read_path: str, filter_types: Optional[List[str]] = None) -> List[str]:
														
 
															+    """
														
 
															+    读取路径下所有的Excel文件
														
 
															+    
														
 
															+    Args:
														
 
															+        read_path: 读取路径
														
 
															+        filter_types: 文件类型过滤器
														
 
															+    
														
 
															+    Returns:
														
 
															+        文件路径列表
														
 
															+    """
														
 
															     if not os.path.exists(read_path):
														
 
															         return []
														
 
															     if filter_types is None:
														
 
															-        filter_types = ['xls', 'xlsx', 'csv', 'gz']
														
 
															+        # filter_types = ['xls', 'xlsx', 'csv', 'gz']
														
 
															+        filter_types = FileTypes.EXCEL_TYPES
														
 
															     if os.path.isfile(read_path):
														
 
															         return [read_path]
														
@@ -178,10 +256,20 @@ def read_excel_files(read_path, filter_types=None):
 
															 # 读取路径下所有的文件
														
 
															-def read_files(read_path, filter_types=None):
														
 
															+def read_files(read_path: str, filter_types: Optional[List[str]] = None) -> List[str]:
														
 
															+    """
														
 
															+    读取路径下所有的文件
														
 
															+    
														
 
															+    Args:
														
 
															+        read_path: 读取路径
														
 
															+        filter_types: 文件类型过滤器
														
 
															+    
														
 
															+    Returns:
														
 
															+        文件路径列表
														
 
															+    """
														
 
															     if filter_types is None:
														
 
															-        filter_types = [i for i in excel_types]
														
 
															-        filter_types.extend(zip_types)
														
 
															+        filter_types = list(FileTypes.EXCEL_TYPES)
														
 
															+        filter_types.extend(FileTypes.ZIP_TYPES)
														
 
															     if os.path.isfile(read_path):
														
 
															         return [read_path]
														
 
															     directory_dict = {}
														
@@ -190,10 +278,15 @@ def read_files(read_path, filter_types=None):
 
															     return [path1 for paths in directory_dict.values() for path1 in paths if path1]
														
 
															-def copy_to_new(from_path, to_path):
														
 
															-    is_file = False
														
 
															-    if to_path.count('.') > 0:
														
 
															-        is_file = True
														
 
															+def copy_to_new(from_path: str, to_path: str) -> None:
														
 
															+    """
														
 
															+    复制文件到新路径
														
 
															+    
														
 
															+    Args:
														
 
															+        from_path: 源文件路径
														
 
															+        to_path: 目标文件路径
														
 
															+    """
														
 
															+    is_file = '.' in to_path
														
 
															     create_file_path(to_path, is_file_path=is_file)
														
@@ -201,11 +294,13 @@ def copy_to_new(from_path, to_path):
 
															 # 创建路径
														
 
															-def create_file_path(read_path, is_file_path=False):
														
 
															+def create_file_path(read_path: str, is_file_path: bool = False) -> None:
														
 
															     """
														
 
															     创建路径
														
 
															-    :param read_path:创建文件夹的路径
														
 
															-    :param is_file_path: 传入的path是否包含具体的文件名
														
 
															+    
														
 
															+    Args:
														
 
															+        read_path: 创建文件夹的路径
														
 
															+        is_file_path: 传入的path是否包含具体的文件名
														
 
															     """
														
 
															     if is_file_path:
														
 
															         read_path = os.path.dirname(read_path)
														
@@ -214,9 +309,15 @@ def create_file_path(read_path, is_file_path=False):
 
															         os.makedirs(read_path, exist_ok=True)
														
 
															-def valid_eval(eval_str):
														
 
															+def valid_eval(eval_str: str) -> bool:
														
 
															     """
														
 
															     验证 eval 是否包含非法的参数
														
 
															+    
														
 
															+    Args:
														
 
															+        eval_str: 要验证的表达式
														
 
															+    
														
 
															+    Returns:
														
 
															+        是否合法
														
 
															     """
														
 
															     safe_param = ["column", "wind_name", "df", "error_time", "str", "int"]
														
 
															     eval_str_names = [node.id for node in ast.walk(ast.parse(eval_str)) if isinstance(node, ast.Name)]
														
--- a/utils/file/trans_methods.py_1
+++ b/utils/file/trans_methods.py_1
@@ -1,202 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Time    : 2024/5/16
														
 
															-# @Author  : 魏志亮
														
 
															-import datetime
														
 
															-from os import *
														
 
															-import shutil
														
 
															-import warnings
														
 
															-
														
 
															-import chardet
														
 
															-import pandas as pd
														
 
															-
														
 
															-from utils.log.trans_log import trans_print
														
 
															-
														
 
															-warnings.filterwarnings("ignore")
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    trans_print("文件类型:", filename, encoding)
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding.lower() in ['utf-8', 'ascii', 'utf8']:
														
 
															-        return 'utf-8'
														
 
															-
														
 
															-    return 'gb18030'
														
 
															-
														
 
															-
														
 
															-def del_blank(df=pd.DataFrame(), cols=list()):
														
 
															-    for col in cols:
														
 
															-        if df[col].dtype == object:
														
 
															-            df[col] = df[col].str.strip()
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-# 切割数组到多个数组
														
 
															-def split_array(array, num):
														
 
															-    return [array[i:i + num] for i in range(0, len(array), num)]
														
 
															-
														
 
															-
														
 
															-def find_read_header(file_path, trans_cols):
														
 
															-    df = read_file_to_df(file_path, nrows=20)
														
 
															-    count = 0
														
 
															-    header = None
														
 
															-    for col in trans_cols:
														
 
															-        if col in df.columns:
														
 
															-            count = count + 1
														
 
															-            if count >= 2:
														
 
															-                header = 0
														
 
															-                break
														
 
															-
														
 
															-    count = 0
														
 
															-
														
 
															-    values = list()
														
 
															-    for index, row in df.iterrows():
														
 
															-        values = list(row.values)
														
 
															-        if type(row.name) == tuple:
														
 
															-            values.extend(list(row.name))
														
 
															-        for col in trans_cols:
														
 
															-            if col in values:
														
 
															-                count = count + 1
														
 
															-                if count > 2:
														
 
															-                    header = index + 1
														
 
															-                    break
														
 
															-
														
 
															-    read_cols = []
														
 
															-    for col in values:
														
 
															-        if col in trans_cols:
														
 
															-            read_cols.append(col)
														
 
															-
														
 
															-    return header, read_cols
														
 
															-
														
 
															-
														
 
															-# 读取数据到df
														
 
															-def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None):
														
 
															-    begin = datetime.datetime.now()
														
 
															-    trans_print('开始读取文件', file_path)
														
 
															-    header = 0
														
 
															-    find_cols = list()
														
 
															-    if trans_cols:
														
 
															-        header, find_cols = find_read_header(file_path, trans_cols)
														
 
															-        trans_print(path.basename(file_path), "读取第", header, "行")
														
 
															-        if header is None:
														
 
															-            message = '未匹配到开始行，请检查并重新指定'
														
 
															-            trans_print(message)
														
 
															-            raise Exception(message)
														
 
															-
														
 
															-    read_cols.extend(find_cols)
														
 
															-
														
 
															-    try:
														
 
															-        df = pd.DataFrame()
														
 
															-        if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
														
 
															-            encoding = detect_file_encoding(file_path)
														
 
															-            end_with_gz = str(file_path).lower().endswith("gz")
														
 
															-            if read_cols:
														
 
															-                if end_with_gz:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header,
														
 
															-                                     nrows=nrows)
														
 
															-                else:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header,
														
 
															-                                     on_bad_lines='warn', nrows=nrows)
														
 
															-            else:
														
 
															-
														
 
															-                if end_with_gz:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header, nrows=nrows)
														
 
															-                else:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn', nrows=nrows)
														
 
															-
														
 
															-        else:
														
 
															-            xls = pd.ExcelFile(file_path, engine="calamine")
														
 
															-            # 获取所有的sheet名称
														
 
															-            sheet_names = xls.sheet_names
														
 
															-            for sheet_name in sheet_names:
														
 
															-                if read_cols:
														
 
															-                    now_df = pd.read_excel(xls, sheet_name=sheet_name, header=header, usecols=read_cols, nrows=nrows)
														
 
															-                else:
														
 
															-                    now_df = pd.read_excel(xls, sheet_name=sheet_name, header=header, nrows=nrows)
														
 
															-
														
 
															-                now_df['sheet_name'] = sheet_name
														
 
															-                df = pd.concat([df, now_df])
														
 
															-            xls.close()
														
 
															-        trans_print('文件读取成功:', file_path, '数据数量:', df.shape, '耗时:', datetime.datetime.now() - begin)
														
 
															-    except Exception as e:
														
 
															-        trans_print('读取文件出错', file_path, str(e))
														
 
															-        message = '文件:' + path.basename(file_path) + ',' + str(e)
														
 
															-        raise ValueError(message)
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    if path.isfile(read_path):
														
 
															-        return [read_path]
														
 
															-
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的文件
														
 
															-def read_files(read_path):
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-def copy_to_new(from_path, to_path):
														
 
															-    is_file = False
														
 
															-    if to_path.count('.') > 0:
														
 
															-        is_file = True
														
 
															-
														
 
															-    create_file_path(to_path, is_file_path=is_file)
														
 
															-
														
 
															-    shutil.copy(from_path, to_path)
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    datas = read_excel_files(r"D:\data\清理数据\招远风电场\WOF053600062-WOB000009_ZYFDC000012\minute")
														
 
															-    for data in datas:
														
 
															-        print(data)
														
 
															-
														
 
															-    print("*" * 20)
														
 
															-
														
 
															-    datas = read_excel_files(r"D:\data\清理数据\招远风电场\WOF053600062-WOB000009_ZYFDC000012\minute\WOG00066.csv.gz")
														
 
															-    for data in datas:
														
 
															-        print(data)
														
--- a/utils/log/trans_log.py
+++ b/utils/log/trans_log.py
@@ -7,6 +7,7 @@ import logging
 
															 import sys
														
 
															 from os import *
														
 
															+from conf.constants import Log
														
 
															 from utils.conf.read_conf import read_conf, yaml_conf
														
@@ -26,34 +27,107 @@ class ContextFilter(logging.Filter):
 
															         return True
														
 
															-logger = logging.getLogger("etl_tools")
														
 
															-logger.setLevel(logging.INFO)
														
 
															-stout_handle = logging.StreamHandler(sys.stdout)
														
 
															-stout_handle.setFormatter(
														
 
															-    logging.Formatter("%(asctime)s-%(trace_id)s: %(message)s"))
														
 
															-stout_handle.setLevel(logging.INFO)
														
 
															-stout_handle.addFilter(ContextFilter())
														
 
															-logger.addHandler(stout_handle)
														
 
															+# 初始化日志配置
														
 
															+def init_logger():
														
 
															+    """初始化日志配置"""
														
 
															+    logger = logging.getLogger("etl_tools")
														
 
															+    logger.setLevel(logging.DEBUG)  # 设置为DEBUG以捕获所有级别的日志
														
 
															-config_path = path.abspath(__file__).split("utils")[0] + 'conf' + sep + 'etl_config_dev.yaml'
														
 
															-config_path = environ.get('ETL_CONF', config_path)
														
 
															-config = yaml_conf(environ.get('ETL_CONF', config_path))
														
 
															-log_path_dir = read_conf(config, 'log_path_dir', "/data/logs")
														
 
															+    # 清除已有的处理器
														
 
															+    if logger.handlers:
														
 
															+        logger.handlers.clear()
														
 
															-log_path = log_path_dir + sep + r'etl_tools_' + (environ['env'] if 'env' in environ else 'dev')
														
 
															-file_path = path.join(log_path)
														
 
															+    formatter = logging.Formatter("%(asctime)s-%(levelname)s-%(trace_id)s: %(message)s")
														
 
															-if not path.exists(file_path):
														
 
															-    makedirs(file_path, exist_ok=True)
														
 
															-file_name = file_path + sep + str(datetime.date.today()) + '.log'
														
 
															+    # 控制台处理器
														
 
															+    stout_handle = logging.StreamHandler(sys.stdout)
														
 
															+    stout_handle.setFormatter(formatter)
														
 
															-file_handler = logging.FileHandler(file_name, encoding='utf-8')
														
 
															-file_handler.setFormatter(
														
 
															-    logging.Formatter("%(asctime)s-%(trace_id)s: %(message)s"))
														
 
															-file_handler.setLevel(logging.INFO)
														
 
															-file_handler.addFilter(ContextFilter())
														
 
															-logger.addHandler(file_handler)
														
 
															+    # 根据环境设置日志级别
														
 
															+    env = environ.get('env', 'dev')
														
 
															+    stout_handle.setLevel(logging.INFO)
														
 
															-def trans_print(*args):
														
 
															-    logger.info("  ".join([str(a) for a in args]))
														
 
															+    stout_handle.addFilter(ContextFilter())
														
 
															+    logger.addHandler(stout_handle)
														
 
															+
														
 
															+    # 文件处理器
														
 
															+    try:
														
 
															+        config_path = environ.get('ETL_CONF')
														
 
															+        if config_path:
														
 
															+            config = yaml_conf(config_path)
														
 
															+            log_path_dir = read_conf(config, 'log_path_dir', Log.DEFAULT_LOG_PATH)
														
 
															+        else:
														
 
															+            log_path_dir = Log.DEFAULT_LOG_PATH
														
 
															+
														
 
															+        log_path = log_path_dir + sep + Log.LOG_FILE_PREFIX + (environ['env'] if 'env' in environ else 'dev')
														
 
															+        file_path = path.join(log_path)
														
 
															+
														
 
															+        if not path.exists(file_path):
														
 
															+            makedirs(file_path, exist_ok=True)
														
 
															+        # 普通日志文件（INFO及以上）
														
 
															+        file_name = file_path + sep + str(datetime.date.today()) + '.log'
														
 
															+        file_handler = logging.FileHandler(file_name, encoding='utf-8')
														
 
															+        file_handler.setFormatter(formatter)
														
 
															+        file_handler.setLevel(logging.INFO)
														
 
															+        file_handler.addFilter(ContextFilter())
														
 
															+        logger.addHandler(file_handler)
														
 
															+
														
 
															+        # 错误日志文件（ERROR及以上）
														
 
															+        error_file_name = file_path + sep + str(datetime.date.today()) + '.error.log'
														
 
															+        error_file_handler = logging.FileHandler(error_file_name, encoding='utf-8')
														
 
															+        error_file_handler.setFormatter(formatter)
														
 
															+        error_file_handler.setLevel(logging.ERROR)
														
 
															+        error_file_handler.addFilter(ContextFilter())
														
 
															+        logger.addHandler(error_file_handler)
														
 
															+    except Exception as e:
														
 
															+        # 如果日志文件创建失败，只使用控制台日志
														
 
															+        pass
														
 
															+
														
 
															+    return logger
														
 
															+
														
 
															+
														
 
															+# 初始化日志记录器
														
 
															+logger = init_logger()
														
 
															+
														
 
															+
														
 
															+def trans_print(*args, level: str = 'info'):
														
 
															+    """
														
 
															+    打印日志
														
 
															+    
														
 
															+    Args:
														
 
															+        *args: 日志内容
														
 
															+        level: 日志级别，可选值: 'debug', 'info', 'warning', 'error'
														
 
															+    """
														
 
															+    message = "  ".join([str(a) for a in args])
														
 
															+
														
 
															+    if level == 'debug':
														
 
															+        logger.debug(message)
														
 
															+    elif level == 'info':
														
 
															+        logger.info(message)
														
 
															+    elif level == 'warning':
														
 
															+        logger.warning(message)
														
 
															+    elif level == 'error':
														
 
															+        logger.error(message)
														
 
															+    else:
														
 
															+        logger.info(message)
														
 
															+
														
 
															+
														
 
															+def debug(*args):
														
 
															+    """打印调试日志"""
														
 
															+    trans_print(*args, level='debug')
														
 
															+
														
 
															+
														
 
															+def info(*args):
														
 
															+    """打印信息日志"""
														
 
															+    trans_print(*args, level='info')
														
 
															+
														
 
															+
														
 
															+def warning(*args):
														
 
															+    """打印警告日志"""
														
 
															+    trans_print(*args, level='warning')
														
 
															+
														
 
															+
														
 
															+def error(*args):
														
 
															+    """打印错误日志"""
														
 
															+    trans_print(*args, level='error')
														
--- a/utils/systeminfo/sysinfo.py
+++ b/utils/systeminfo/sysinfo.py
@@ -1,13 +1,21 @@
 
															-from os import *
														
 
															+import os
														
 
															+from typing import List
														
 
															 import psutil
														
 
															-from utils.log.trans_log import trans_print
														
 
															+from conf.constants import ParallelProcessing
														
 
															+from utils.log.trans_log import info, debug
														
 
															-def print_memory_usage(detail=""):
														
 
															+def print_memory_usage(detail: str = "") -> None:
														
 
															+    """
														
 
															+    打印内存使用情况
														
 
															+    
														
 
															+    Args:
														
 
															+        detail: 详细信息
														
 
															+    """
														
 
															     # 获取当前进程ID
														
 
															-    pid = getpid()
														
 
															+    pid = os.getpid()
														
 
															     # 获取进程信息
														
 
															     py = psutil.Process(pid)
														
 
															     # 获取内存信息
														
@@ -21,34 +29,85 @@ def print_memory_usage(detail=""):
 
															     memory_usage_rss_mb = memory_usage_rss / (1024 ** 2)
														
 
															     memory_usage_vms_mb = memory_usage_vms / (1024 ** 2)
														
 
															-    trans_print(f"{detail},Memory usage (RSS): {memory_usage_rss_mb:.2f} MB")
														
 
															-    trans_print(f"{detail},Memory usage (VMS): {memory_usage_vms_mb:.2f} MB")
														
 
															+    debug(f"{detail},Memory usage (RSS): {memory_usage_rss_mb:.2f} MB")
														
 
															+    debug(f"{detail},Memory usage (VMS): {memory_usage_vms_mb:.2f} MB")
														
 
															-def get_cpu_count():
														
 
															+def get_cpu_count() -> int:
														
 
															+    """
														
 
															+    获取CPU核心数
														
 
															+    
														
 
															+    Returns:
														
 
															+        CPU核心数
														
 
															+    """
														
 
															     return psutil.cpu_count()
														
 
															-def get_available_cpu_count_with_percent(percent: float = 1):
														
 
															+def get_available_cpu_count_with_percent(percent: float = 1) -> int:
														
 
															+    """
														
 
															+    根据百分比获取可用CPU数
														
 
															+    
														
 
															+    Args:
														
 
															+        percent: CPU使用百分比
														
 
															+    
														
 
															+    Returns:
														
 
															+        可用CPU数
														
 
															+    """
														
 
															     cpu_count = get_cpu_count()
														
 
															     return int(cpu_count * percent)
														
 
															-def get_file_size(file_path):
														
 
															-    return path.getsize(file_path)
														
 
															-
														
 
															-
														
 
															-def get_dir_size(dir_path):
														
 
															-    return sum(get_file_size(path.join(dir_path, file)) for file in listdir(dir_path) if
														
 
															-               path.isfile(path.join(dir_path, file)))
														
 
															-
														
 
															-
														
 
															-def get_available_memory_with_percent(percent: float = 1):
														
 
															+def get_file_size(file_path: str) -> int:
														
 
															+    """
														
 
															+    获取文件大小
														
 
															+    
														
 
															+    Args:
														
 
															+        file_path: 文件路径
														
 
															+    
														
 
															+    Returns:
														
 
															+        文件大小（字节）
														
 
															+    """
														
 
															+    return os.path.getsize(file_path)
														
 
															+
														
 
															+
														
 
															+def get_dir_size(dir_path: str) -> int:
														
 
															+    """
														
 
															+    获取目录大小
														
 
															+    
														
 
															+    Args:
														
 
															+        dir_path: 目录路径
														
 
															+    
														
 
															+    Returns:
														
 
															+        目录大小（字节）
														
 
															+    """
														
 
															+    return sum(get_file_size(os.path.join(dir_path, file)) for file in os.listdir(dir_path) if
														
 
															+               os.path.isfile(os.path.join(dir_path, file)))
														
 
															+
														
 
															+
														
 
															+def get_available_memory_with_percent(percent: float = 1) -> int:
														
 
															+    """
														
 
															+    根据百分比获取可用内存
														
 
															+    
														
 
															+    Args:
														
 
															+        percent: 内存使用百分比
														
 
															+    
														
 
															+    Returns:
														
 
															+        可用内存（字节）
														
 
															+    """
														
 
															     memory_info = psutil.virtual_memory()
														
 
															     return int(memory_info.available * percent)
														
 
															-def get_max_file_size(file_paths: list[str]):
														
 
															+def get_max_file_size(file_paths: List[str]) -> int:
														
 
															+    """
														
 
															+    获取文件列表中的最大文件大小
														
 
															+    
														
 
															+    Args:
														
 
															+        file_paths: 文件路径列表
														
 
															+    
														
 
															+    Returns:
														
 
															+        最大文件大小（字节）
														
 
															+    """
														
 
															     max_size = 0
														
 
															     for file_path in file_paths:
														
 
															         file_size = get_file_size(file_path)
														
@@ -57,11 +116,25 @@ def get_max_file_size(file_paths: list[str]):
 
															     return max_size
														
 
															-def use_files_get_max_cpu_count(file_paths: list[str], memory_percent: float = 1 / 12, cpu_percent: float = 2 / 5):
														
 
															+def use_files_get_max_cpu_count(file_paths: List[str], memory_percent: float = 1 / 12,
														
 
															+                                cpu_percent: float = 2 / 5) -> int:
														
 
															+    """
														
 
															+    根据文件大小和内存情况计算最大进程数
														
 
															+    
														
 
															+    Args:
														
 
															+        file_paths: 文件路径列表
														
 
															+        memory_percent: 内存使用百分比
														
 
															+        cpu_percent: CPU使用百分比
														
 
															+    
														
 
															+    Returns:
														
 
															+        最大进程数
														
 
															+    """
														
 
															     max_file_size = get_max_file_size(file_paths)
														
 
															     free_memory = get_available_memory_with_percent(memory_percent)
														
 
															     count = int(free_memory / max_file_size)
														
 
															     max_cpu_count = get_available_cpu_count_with_percent(cpu_percent)
														
 
															+    # 限制最大进程数
														
 
															+    max_cpu_count = min(max_cpu_count, ParallelProcessing.MAX_PROCESSES)
														
 
															     result = count if count <= max_cpu_count else max_cpu_count
														
 
															     if result == 0:
														
 
															         result = 1
														
@@ -69,21 +142,35 @@ def use_files_get_max_cpu_count(file_paths: list[str], memory_percent: float = 1
 
															     if result > len(file_paths):
														
 
															         result = len(file_paths)
														
 
															-    trans_print("总文件数:", len(file_paths), ",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
														
 
															-                "可用内存:", str(get_available_memory_with_percent(1) / 2 ** 20) + "M",
														
 
															-                "总CPU数:", get_cpu_count(), "CPU使用比例:", round(cpu_percent, 2), "CPU可用数量:", max_cpu_count,
														
 
															-                ",最终确定使用进程数:", result)
														
 
															+    info("总文件数:", len(file_paths), ",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
														
 
															+         "可用内存:", str(get_available_memory_with_percent(1) / 2 ** 20) + "M",
														
 
															+         "总CPU数:", get_cpu_count(), "CPU使用比例:", round(cpu_percent, 2), "CPU可用数量:", max_cpu_count,
														
 
															+         ",最终确定使用进程数:", result)
														
 
															     return result
														
 
															-def max_file_size_get_max_cpu_count(max_file_size, memory_percent: float = 1 / 6, cpu_percent: float = 2 / 5):
														
 
															+def max_file_size_get_max_cpu_count(max_file_size: int, memory_percent: float = 1 / 6,
														
 
															+                                    cpu_percent: float = 2 / 5) -> int:
														
 
															+    """
														
 
															+    根据最大文件大小和内存情况计算最大进程数
														
 
															+    
														
 
															+    Args:
														
 
															+        max_file_size: 最大文件大小
														
 
															+        memory_percent: 内存使用百分比
														
 
															+        cpu_percent: CPU使用百分比
														
 
															+    
														
 
															+    Returns:
														
 
															+        最大进程数
														
 
															+    """
														
 
															     free_memory = get_available_memory_with_percent(memory_percent)
														
 
															     count = int(free_memory / max_file_size)
														
 
															     max_cpu_count = get_available_cpu_count_with_percent(cpu_percent)
														
 
															+    # 限制最大进程数
														
 
															+    max_cpu_count = min(max_cpu_count, ParallelProcessing.MAX_PROCESSES)
														
 
															     result = count if count <= max_cpu_count else max_cpu_count
														
 
															     if result == 0:
														
 
															         result = 1
														
 
															-    trans_print(",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
														
 
															+    info(",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
														
 
															                 "可用内存:", str(get_available_memory_with_percent(1) / 2 ** 20) + "M",
														
 
															                 "总CPU数:", get_cpu_count(), "CPU使用比例:", round(cpu_percent, 2), "CPU可用数量:", max_cpu_count,
														
 
															                 ",最终确定使用进程数:", result)
														
--- a/utils/tmp_util/__init__.py
+++ b/utils/tmp_util/__init__.py
--- a/utils/tmp_util/合并文件.py
+++ b/utils/tmp_util/合并文件.py
@@ -1,37 +0,0 @@
 
															-import multiprocessing
														
 
															-
														
 
															-read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/整改复核数据/2025年06月19日16时17分41秒'
														
 
															-
														
 
															-import os
														
 
															-import pandas as pd
														
 
															-
														
 
															-# 获取文件夹下所有文件的路径
														
 
															-file_paths = [os.path.join(read_dir, file) for file in os.listdir(read_dir) if
														
 
															-              os.path.isfile(os.path.join(read_dir, file))]
														
 
															-
														
 
															-
														
 
															-def read_and_save(wind_no, files, save_dir):
														
 
															-    # 读取文件
														
 
															-    df = pd.concat([pd.read_csv(file) for file in files])
														
 
															-
														
 
															-    # 保存文件
														
 
															-    df.to_csv(os.path.join(save_dir, f'{wind_no}.csv'), index=False, encoding='utf-8')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-
														
 
															-    wind_dicts = dict()
														
 
															-
														
 
															-    save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/整改复核数据/合并202506191654'
														
 
															-
														
 
															-    os.makedirs(save_dir, exist_ok=True)
														
 
															-
														
 
															-    for file in os.listdir(read_dir):
														
 
															-        wind_no = file.split('(')[0]
														
 
															-        if wind_no not in wind_dicts:
														
 
															-            wind_dicts[wind_no] = [os.path.join(read_dir, file)]
														
 
															-        else:
														
 
															-            wind_dicts[wind_no].append(os.path.join(read_dir, file))
														
 
															-
														
 
															-    with multiprocessing.Pool(20) as pool:
														
 
															-        pool.starmap(read_and_save, [(key, files, save_dir) for key, files in wind_dicts.items()])
														
--- a/utils/tmp_util/整理INSERT到批量INSERT.py
+++ b/utils/tmp_util/整理INSERT到批量INSERT.py
@@ -1,100 +0,0 @@
 
															-# coding=utf-8
														
 
															-
														
 
															-
														
 
															-import re
														
 
															-from collections import defaultdict
														
 
															-
														
 
															-import pymysql
														
 
															-
														
 
															-
														
 
															-def read_sql_inserts(file_path):
														
 
															-    """生成器函数，逐行读取INSERT语句"""
														
 
															-    with open(file_path, 'r', encoding='utf-8') as f:
														
 
															-        for line in f:
														
 
															-            line = line.strip()
														
 
															-            if line.startswith('INSERT INTO'):
														
 
															-                yield line
														
 
															-
														
 
															-
														
 
															-def process_large_sql_file(input_file, batch_size=10000):
														
 
															-    table_data = defaultdict(lambda: {
														
 
															-        'columns': None,
														
 
															-        'value_rows': []
														
 
															-    })
														
 
															-
														
 
															-    insert_pattern = re.compile(
														
 
															-        r'INSERT\s+INTO\s+`?([a-zA-Z_][a-zA-Z0-9_]*)`?\s*\((.*?)\)\s*VALUES\s*\((.*?)\);',
														
 
															-        re.IGNORECASE
														
 
															-    )
														
 
															-
														
 
															-    # 使用生成器处理
														
 
															-    for insert_stmt in read_sql_inserts(input_file):
														
 
															-        match = insert_pattern.match(insert_stmt)
														
 
															-        if match:
														
 
															-            table_name = match.group(1)
														
 
															-            columns = match.group(2)
														
 
															-            values = match.group(3)
														
 
															-
														
 
															-            if table_data[table_name]['columns'] is None:
														
 
															-                table_data[table_name]['columns'] = columns
														
 
															-
														
 
															-            table_data[table_name]['value_rows'].append(values)
														
 
															-
														
 
															-    # 生成批量INSERT语句
														
 
															-    batch_inserts = {}
														
 
															-    for table_name, data in table_data.items():
														
 
															-        columns = data['columns']
														
 
															-        value_rows = data['value_rows']
														
 
															-
														
 
															-        for i in range(0, len(value_rows), batch_size):
														
 
															-            batch_values = value_rows[i:i + batch_size]
														
 
															-            batch_insert = f"INSERT INTO `{table_name}` ({columns}) VALUES\n"
														
 
															-            batch_insert += ",\n".join([f"({values})" for values in batch_values])
														
 
															-            batch_insert += ";"
														
 
															-
														
 
															-            if table_name not in batch_inserts:
														
 
															-                batch_inserts[table_name] = []
														
 
															-            batch_inserts[table_name].append(batch_insert)
														
 
															-
														
 
															-    return batch_inserts
														
 
															-
														
 
															-
														
 
															-def execute_batch_inserts(db_config, batch_inserts):
														
 
															-    """直接执行批量INSERT到数据库"""
														
 
															-    connection = pymysql.connect(**db_config)
														
 
															-    try:
														
 
															-        with connection.cursor() as cursor:
														
 
															-            for table_name, inserts in batch_inserts.items():
														
 
															-                for index, insert_sql in enumerate(inserts):
														
 
															-                    cursor.execute(insert_sql)
														
 
															-                    print(f"表 {table_name},共 {len(inserts)} 个, 第 {index + 1} 个批量INSERT语句执行成功")
														
 
															-        connection.commit()
														
 
															-    finally:
														
 
															-        connection.close()
														
 
															-
														
 
															-
														
 
															-# 数据库配置
														
 
															-db_config = {
														
 
															-    'host': '192.168.50.235',
														
 
															-    'user': 'root',
														
 
															-    'password': 'admin123456',
														
 
															-    'db': 'wtlivedb_1',
														
 
															-    'charset': 'utf8mb4'
														
 
															-}
														
 
															-
														
 
															-"""
														
 
															-移除INSERT 语句 其他的就是建表语句了
														
 
															-cat file |grep -v 'INSERT ' > create_talbe.sql
														
 
															-下面是 INSERT 转化为  BATCH INSERT 的脚本
														
 
															-"""
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    input_file = "wtlivedb.sql"
														
 
															-
														
 
															-    # 使用
														
 
															-    batch_inserts = process_large_sql_file("input.sql")
														
 
															-    execute_batch_inserts(db_config, batch_inserts)
														
 
															-
														
 
															-    # 打印统计信息
														
 
															-    for table_name, inserts in batch_inserts.items():
														
 
															-        print(f"表 '{table_name}': {len(inserts)} 个批量INSERT语句")
														
--- a/utils/tmp_util/神木_完整度_10分.py
+++ b/utils/tmp_util/神木_完整度_10分.py
@@ -1,87 +0,0 @@
 
															-# coding=utf-8
														
 
															-
														
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-sys.path.insert(0, os.path.abspath(__file__).split("utils")[0])
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-from utils.file.trans_methods import read_file_to_df, read_excel_files
														
 
															-
														
 
															-
														
 
															-def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
														
 
															-    """
														
 
															-    获取俩个时间之间的个数
														
 
															-    :return: 查询时间间隔
														
 
															-    """
														
 
															-    delta = end_time - start_time
														
 
															-    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
														
 
															-
														
 
															-    return abs(int(total_seconds / time_space)) + 1
														
 
															-
														
 
															-
														
 
															-def save_percent(value, save_decimal=7):
														
 
															-    return round(value, save_decimal) * 100
														
 
															-
														
 
															-
														
 
															-def read_and_select(file_path, read_cols_bak):
														
 
															-    try:
														
 
															-        read_cols = read_cols_bak[0:len(read_cols_bak)]
														
 
															-        result_df = pd.DataFrame()
														
 
															-        df = read_file_to_df(file_path, read_cols=read_cols)
														
 
															-        wind_name = df['名称'].values[0]
														
 
															-        df['时间'] = pd.to_datetime(df['时间'])
														
 
															-        count = get_time_space_count(df['时间'].min(), df['时间'].max(), 600)
														
 
															-        repeat_time_count = df.shape[0] - len(df['时间'].unique())
														
 
															-        print(wind_name, count, repeat_time_count)
														
 
															-        result_df['风机号'] = [wind_name]
														
 
															-        result_df['重复率'] = [save_percent(repeat_time_count / count)]
														
 
															-        result_df['重复次数'] = [repeat_time_count]
														
 
															-        result_df['总记录数'] = [count]
														
 
															-
														
 
															-        read_cols.remove('名称')
														
 
															-        for read_col in read_cols:
														
 
															-
														
 
															-            if read_col != '时间':
														
 
															-                df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
														
 
															-            else:
														
 
															-                df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
														
 
															-
														
 
															-        group_df = df.groupby(by=['名称']).count()
														
 
															-        group_df.reset_index(inplace=True)
														
 
															-        count_df = pd.DataFrame(group_df)
														
 
															-        total_count = count_df[read_cols].values[0].sum()
														
 
															-        print(wind_name, total_count, count * len(read_cols))
														
 
															-        result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
														
 
															-        result_df['缺失数值'] = [
														
 
															-            '-'.join([f'{col_name}_{str(count - i)}' for col_name, i in zip(read_cols, count_df[read_cols].values[0])])]
														
 
															-        del group_df
														
 
															-
														
 
															-        error_fengsu_count = df.query("(风速 < 0) | (风速 > 80)").shape[0]
														
 
															-        error_yougong_gonglv = df.query("(发电机有功功率 < -200) | (发电机有功功率 > 2500)").shape[0]
														
 
															-
														
 
															-        result_df['平均异常率'] = [save_percent((error_fengsu_count + error_yougong_gonglv) / (2 * count))]
														
 
															-    except Exception as e:
														
 
															-        print(file_path)
														
 
															-        raise e
														
 
															-
														
 
															-    return result_df
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    read_cols_str = '名称,时间,发电机有功功率,发电机转速,发电机驱动端轴承温度,发电机非驱动端轴承温度,发电机定子U相线圈温度,发电机定子V相线圈温度,发电机定子W相线圈温度,实际扭矩,设定扭矩,仪表盘风速,舱内温度,控制柜内温度,舱外温度,风向,风速,机舱风向夹角,1#桨叶片角度,1#桨设定角度,2#桨叶片角度,2#桨设定角度,3#桨叶片角度,3#桨设定角度,1#桨电机温度,2#桨电机温度,3#桨电机温度,轮毂内温度,齿轮箱油泵吸油口油压,齿轮箱分配器位置油压,偏航液压刹车系统蓄能罐压力,主轴转速,齿轮箱油路入口温度,齿轮箱中间轴驱动端轴承温度,齿轮箱中间轴非驱动端轴承温度,齿轮箱油池温度,主轴承外圈温度,可利用率,机舱位置,总扭缆角度'
														
 
															-    read_cols = [i for i in read_cols_str.split(",") if i]
														
 
															-    read_dir = r'D:\data\tmp_data\10分'
														
 
															-
														
 
															-    files = read_excel_files(read_dir)
														
 
															-
														
 
															-    with multiprocessing.Pool(4) as pool:
														
 
															-        dfs = pool.starmap(read_and_select, [(os.path.join(read_dir, i), read_cols) for i in files])
														
 
															-
														
 
															-    df = pd.concat(dfs, ignore_index=True)
														
 
															-    df.sort_values(by=['风机号'], inplace=True)
														
 
															-
														
 
															-    df.to_csv("神木风电场-10分钟.csv", encoding='utf8', index=False)
														
--- a/utils/tmp_util/神木_完整度_1分.py
+++ b/utils/tmp_util/神木_完整度_1分.py
@@ -1,90 +0,0 @@
 
															-# coding=utf-8
														
 
															-
														
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-sys.path.insert(0, os.path.abspath(__file__).split("utils")[0])
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-from utils.file.trans_methods import read_file_to_df, read_excel_files
														
 
															-
														
 
															-
														
 
															-def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
														
 
															-    """
														
 
															-    获取俩个时间之间的个数
														
 
															-    :return: 查询时间间隔
														
 
															-    """
														
 
															-    delta = end_time - start_time
														
 
															-    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
														
 
															-
														
 
															-    return abs(int(total_seconds / time_space)) + 1
														
 
															-
														
 
															-
														
 
															-def save_percent(value, save_decimal=7):
														
 
															-    return round(value, save_decimal) * 100
														
 
															-
														
 
															-
														
 
															-def read_and_select(file_path):
														
 
															-    try:
														
 
															-        result_df = pd.DataFrame()
														
 
															-        df = read_file_to_df(file_path)
														
 
															-        read_cols_bak = df.columns.tolist()
														
 
															-
														
 
															-        wind_name = df['名称'].values[0]
														
 
															-        df['时间'] = pd.to_datetime(df['时间'])
														
 
															-        count = get_time_space_count(df['时间'].min(), df['时间'].max(), 60)
														
 
															-        repeat_time_count = df.shape[0] - len(df['时间'].unique())
														
 
															-        print(wind_name, count, repeat_time_count)
														
 
															-        result_df['风机号'] = [wind_name]
														
 
															-        result_df['重复率'] = [save_percent(repeat_time_count / count)]
														
 
															-        result_df['重复次数'] = [repeat_time_count]
														
 
															-        result_df['总记录数'] = [count]
														
 
															-
														
 
															-        read_cols_bak.remove('名称')
														
 
															-        read_cols = list()
														
 
															-        for read_col in read_cols_bak:
														
 
															-
														
 
															-            if read_col == '时间':
														
 
															-                df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
														
 
															-                read_cols.append(read_col)
														
 
															-            else:
														
 
															-                df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
														
 
															-                if not df[read_col].isnull().all():
														
 
															-                    read_cols.append(read_col)
														
 
															-
														
 
															-        group_df = df.groupby(by=['名称']).count()
														
 
															-        group_df.reset_index(inplace=True)
														
 
															-        count_df = pd.DataFrame(group_df)
														
 
															-        total_count = count_df[read_cols].values[0].sum()
														
 
															-        print(wind_name, total_count, count * len(read_cols))
														
 
															-        result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
														
 
															-        result_df['缺失数值'] = [
														
 
															-            '-'.join([f'{col_name}_{str(count - i)}' for col_name, i in zip(read_cols, count_df[read_cols].values[0])])]
														
 
															-        del group_df
														
 
															-
														
 
															-        error_fengsu_count = df.query("(风速 < 0) | (风速 > 80)").shape[0]
														
 
															-        error_yougong_gonglv = df.query("(发电机有功功率 < -200) | (发电机有功功率 > 2500)").shape[0]
														
 
															-
														
 
															-        result_df['平均异常率'] = [save_percent((error_fengsu_count + error_yougong_gonglv) / (2 * count))]
														
 
															-    except Exception as e:
														
 
															-        print(file_path)
														
 
															-        raise e
														
 
															-
														
 
															-    return result_df
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    read_dir = r'D:\data\tmp_data\1分\远景1min'
														
 
															-
														
 
															-    files = read_excel_files(read_dir)
														
 
															-
														
 
															-    with multiprocessing.Pool(4) as pool:
														
 
															-        dfs = pool.map(read_and_select, files)
														
 
															-
														
 
															-    df = pd.concat(dfs, ignore_index=True)
														
 
															-    df.sort_values(by=['风机号'], inplace=True)
														
 
															-
														
 
															-    df.to_csv("神木风电场-1分钟.csv", encoding='utf8', index=False)
														
--- a/utils/tmp_util/获取台账所有wind表信息.py
+++ b/utils/tmp_util/获取台账所有wind表信息.py
@@ -1,18 +0,0 @@
 
															-import sys
														
 
															-from os import path, environ
														
 
															-
														
 
															-env = 'dev'
														
 
															-if len(sys.argv) >= 2:
														
 
															-    env = sys.argv[1]
														
 
															-
														
 
															-conf_path = path.abspath(__file__).split("energy-data-trans")[0] + f"/energy-data-trans/conf/etl_config_{env}.yaml"
														
 
															-environ['ETL_CONF'] = conf_path
														
 
															-environ['env'] = env
														
 
															-
														
 
															-from service.common_connect import plt
														
 
															-
														
 
															-tables = 'wind_company,wind_engine_group,wind_engine_mill,wind_exception_count,wind_field,wind_field_batch,wind_field_contract,wind_field_resource,wind_relation'
														
 
															-
														
 
															-for table in tables.split(','):
														
 
															-    df = plt.read_sql_to_df(f"select * from {table}")
														
 
															-    df.to_csv(table + '.csv', encoding='utf8', index=False)
														
--- a/utils/tmp_util/表添加列.py
+++ b/utils/tmp_util/表添加列.py
@@ -1,76 +0,0 @@
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-env = 'prod'
														
 
															-if len(sys.argv) >= 2:
														
 
															-    env = sys.argv[1]
														
 
															-
														
 
															-conf_path = os.path.abspath(__file__).split("energy-data-trans")[0] + f"/energy-data-trans/conf/etl_config_{env}.yaml"
														
 
															-os.environ['ETL_CONF'] = conf_path
														
 
															-os.environ['env'] = env
														
 
															-
														
 
															-db_last = ''
														
 
															-if env != 'dev':
														
 
															-    db_last = db_last + '_' + env
														
 
															-
														
 
															-query_sql = f"""
														
 
															-SELECT
														
 
															-	t.TABLE_NAME
														
 
															-FROM
														
 
															-	information_schema.`TABLES` t
														
 
															-WHERE
														
 
															-	t.TABLE_SCHEMA = 'energy_data{db_last}'
														
 
															-AND t.TABLE_NAME LIKE 'WOF%%_minute'
														
 
															-AND t.TABLE_NAME NOT IN (
														
 
															-	SELECT
														
 
															-		table_name
														
 
															-	FROM
														
 
															-		information_schema.`COLUMNS` a
														
 
															-	WHERE
														
 
															-		a.TABLE_SCHEMA = 'energy_data{db_last}'
														
 
															-	AND a.TABLE_NAME LIKE 'WOF%%_minute'
														
 
															-	AND a.COLUMN_NAME = 'main_bearing_temperature_2'
														
 
															-)
														
 
															-"""
														
 
															-
														
 
															-
														
 
															-def get_table_count(table_name):
														
 
															-    query_begin = time.time()
														
 
															-    query_sql = f"""
														
 
															-    select count(1) as count from {table_name}
														
 
															-    """
														
 
															-    print(table_name, '统计条数耗时', time.time() - query_begin, trans.execute(query_sql)[0]['count'])
														
 
															-
														
 
															-
														
 
															-def get_update_sql(table_name):
														
 
															-    update_sql = f"""
														
 
															-        ALTER TABLE {table_name}
														
 
															-        ADD COLUMN `main_bearing_temperature_2` double DEFAULT NULL COMMENT '主轴承轴承温度2', 
														
 
															-        ADD COLUMN `grid_a_phase_current` double DEFAULT NULL COMMENT '电网A相电流',
														
 
															-        ADD COLUMN `grid_b_phase_current` double DEFAULT NULL COMMENT '电网B相电流',
														
 
															-        ADD COLUMN `grid_c_phase_current` double DEFAULT NULL COMMENT '电网C相电流',
														
 
															-        ADD COLUMN `reactive_power` double DEFAULT NULL COMMENT '无功功率';
														
 
															-        """
														
 
															-    return update_sql
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    from service.common_connect import trans
														
 
															-
														
 
															-    # tables = trans.execute(query_sql)
														
 
															-    # print(tables)
														
 
															-
														
 
															-    tables = list()
														
 
															-    tables.append({'TABLE_NAME': 'WOF093400005_minute'})
														
 
															-
														
 
															-    import time
														
 
															-
														
 
															-    begin_time = time.time()
														
 
															-    for table in tables:
														
 
															-        table_name = '`' + table['TABLE_NAME'] + '`'
														
 
															-        get_table_count(table_name)
														
 
															-        update_time = time.time()
														
 
															-        trans.execute(get_update_sql(table_name))
														
 
															-        print(table_name, '更新耗时', time.time() - update_time)
														
 
															-
														
 
															-    print(len(tables), '张表,总耗时:', time.time() - begin_time)
														
--- a/utils/tmp_util/表添加注释.py
+++ b/utils/tmp_util/表添加注释.py
@@ -1,49 +0,0 @@
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-env = 'tidbprod'
														
 
															-if len(sys.argv) >= 2:
														
 
															-    env = sys.argv[1]
														
 
															-
														
 
															-conf_path = os.path.abspath(__file__).split("energy-data-trans")[0] + f"/energy-data-trans/conf/etl_config_{env}.yaml"
														
 
															-os.environ['ETL_CONF'] = conf_path
														
 
															-os.environ['env'] = env
														
 
															-
														
 
															-from service.common_connect import trans, plt
														
 
															-
														
 
															-
														
 
															-def get_all_tables():
														
 
															-    query_sql = f"""
														
 
															-    
														
 
															-    SELECT 
														
 
															-        t.TABLE_NAME
														
 
															-    FROM
														
 
															-        information_schema.`TABLES` t
														
 
															-    WHERE
														
 
															-        t.TABLE_SCHEMA = 'energy_data_prod'
														
 
															-"""
														
 
															-
														
 
															-    return trans.execute(query_sql)
														
 
															-
														
 
															-
														
 
															-def get_all_wind_company():
														
 
															-    query_sql = "SELECT t.field_code,t.field_name FROM wind_field t where t.del_state = 0"
														
 
															-    datas = plt.execute(query_sql)
														
 
															-    result_dict = dict()
														
 
															-    for data in datas:
														
 
															-        result_dict[data['field_code']] = data['field_name']
														
 
															-
														
 
															-    return result_dict
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    code_name_dict = get_all_wind_company()
														
 
															-    tables = get_all_tables()
														
 
															-    for table in tables:
														
 
															-        table_name = table['TABLE_NAME']
														
 
															-
														
 
															-        if table_name.startswith('WOF'):
														
 
															-            field_code = table_name.split('_')[0].split('-')[0]
														
 
															-            if field_code in code_name_dict.keys():
														
 
															-                update_sql = f"ALTER TABLE `{table_name}` COMMENT = '{code_name_dict[field_code]}'"
														
 
															-                trans.execute(update_sql)
														
--- a/utils/tmp_util/颗粒度变大.py
+++ b/utils/tmp_util/颗粒度变大.py
@@ -1,27 +0,0 @@
 
															-import os
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-def trans_time_granularity(read_dir: str, save_dir: str, time_str: str, time_granularity: str, group_by: list):
														
 
															-    for root, dirs, files in os.walk(read_dir):
														
 
															-        for file in files:
														
 
															-            file_path = os.path.join(root, file)
														
 
															-            df = pd.read_csv(file_path)
														
 
															-            # df = df.drop(index=0)
														
 
															-            df[time_str] = pd.to_datetime(df[time_str], errors='coerce')
														
 
															-            df[time_str] = df[time_str].dt.ceil(time_granularity)
														
 
															-            groupby_df = df.groupby(group_by).mean(numeric_only=True).reset_index()
														
 
															-
														
 
															-            save_file = file_path.replace(read_dir, save_dir)
														
 
															-            if not os.path.exists(os.path.dirname(save_file)):
														
 
															-                os.makedirs(os.path.dirname(save_file))
														
 
															-
														
 
															-            groupby_df.to_csv(save_file, index=False, encoding='utf-8')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    read_dir = r'D:\data\tmp_data\龙源\minute'
														
 
															-    save_dir = r'D:\data\tmp_data\龙源\minute12'
														
 
															-
														
 
															-    trans_time_granularity(read_dir, save_dir, 'time_stamp', '20min', ['time_stamp'])
														
--- a/utils/zip/unzip.py
+++ b/utils/zip/unzip.py
@@ -1,17 +1,27 @@
 
															 # -*- coding: utf-8 -*-
														
 
															 # @Time    : 2024/5/17
														
 
															 # @Author  : 魏志亮
														
 
															+import os
														
 
															 import traceback
														
 
															 import zipfile
														
 
															-from os import *
														
 
															+from typing import Tuple, Optional
														
 
															 import rarfile
														
 
															-from utils.file.trans_methods import detect_file_encoding
														
 
															-from utils.log.trans_log import trans_print, logger
														
 
															+from utils.file.trans_methods import detect_file_encoding, create_file_path
														
 
															+from utils.log.trans_log import debug, error
														
 
															-def __support_gbk(zip_file: zipfile.ZipFile):
														
 
															+def __support_gbk(zip_file: zipfile.ZipFile) -> zipfile.ZipFile:
														
 
															+    """
														
 
															+    支持GBK编码的zip文件
														
 
															+    
														
 
															+    Args:
														
 
															+        zip_file: ZipFile对象
														
 
															+    
														
 
															+    Returns:
														
 
															+        处理后的ZipFile对象
														
 
															+    """
														
 
															     name_to_info = zip_file.NameToInfo
														
 
															     # copy map first
														
 
															     for name, info in name_to_info.copy().items():
														
@@ -23,18 +33,31 @@ def __support_gbk(zip_file: zipfile.ZipFile):
 
															     return zip_file
														
 
															-def unzip(zip_filepath, dest_path):
														
 
															+def unzip(zip_filepath: str, dest_path: str) -> Tuple[bool, Optional[Exception]]:
														
 
															+    """
														
 
															+    解压zip文件
														
 
															+    
														
 
															+    Args:
														
 
															+        zip_filepath: zip文件路径
														
 
															+        dest_path: 解压目标路径
														
 
															+    
														
 
															+    Returns:
														
 
															+        (是否成功, 错误信息)
														
 
															+    """
														
 
															     # 解压zip文件
														
 
															     is_success = True
														
 
															-    trans_print('开始读取文件:', zip_filepath)
														
 
															-    trans_print("解压到:", dest_path)
														
 
															+    debug('开始读取文件:', zip_filepath)
														
 
															+    debug("解压到:", dest_path)
														
 
															+
														
 
															+    # 确保目标路径存在
														
 
															+    create_file_path(dest_path)
														
 
															     try:
														
 
															         if detect_file_encoding(zip_filepath).startswith("gb"):
														
 
															             try:
														
 
															-                with __support_gbk(zipfile.ZipFile(zip_filepath, 'r'))  as zip_ref:
														
 
															+                with __support_gbk(zipfile.ZipFile(zip_filepath, 'r')) as zip_ref:
														
 
															                     zip_ref.extractall(dest_path)
														
 
															-            except:
														
 
															+            except Exception:
														
 
															                 with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
														
 
															                     zip_ref.extractall(dest_path)
														
 
															         else:
														
@@ -42,46 +65,60 @@ def unzip(zip_filepath, dest_path):
 
															                 zip_ref.extractall(dest_path)
														
 
															     except zipfile.BadZipFile as e:
														
 
															-        trans_print(traceback.format_exc())
														
 
															+        error(traceback.format_exc())
														
 
															+        is_success = False
														
 
															+        error('不是zip文件:', zip_filepath)
														
 
															+        return is_success, e
														
 
															+    except Exception as e:
														
 
															+        error(traceback.format_exc())
														
 
															         is_success = False
														
 
															-        trans_print('不是zip文件:', zip_filepath)
														
 
															+        error('解压文件出错:', zip_filepath, str(e))
														
 
															         return is_success, e
														
 
															     # 遍历解压后的文件
														
 
															-    dest_path = dest_path
														
 
															-    trans_print('解压再次读取', dest_path)
														
 
															+    debug('解压再次读取', dest_path)
														
 
															     if is_success:
														
 
															-        for root, dirs, files in walk(dest_path):
														
 
															+        for root, dirs, files in os.walk(dest_path):
														
 
															             for file in files:
														
 
															-                file_path = path.join(root, file)
														
 
															+                file_path = os.path.join(root, file)
														
 
															                 # 检查文件是否是zip文件
														
 
															                 if file_path.endswith('.zip'):
														
 
															                     if file_path.endswith('.csv.zip'):
														
 
															-                        rename(file_path, file_path.replace(".csv.zip", ".csv.gz"))
														
 
															+                        os.rename(file_path, file_path.replace(".csv.zip", ".csv.gz"))
														
 
															                     else:
														
 
															                         # 如果是，递归解压
														
 
															-                        unzip(file_path, dest_path + sep + get_desc_path(str(file)))
														
 
															-                        # 删除已解压的zip文件（可选）
														
 
															-                        remove(file_path)
														
 
															-                    # 检查文件是否是zip文件
														
 
															-                if file_path.endswith('.rar'):
														
 
															+                        unzip(file_path, os.path.join(dest_path, get_desc_path(str(file))))
														
 
															+                        # 删除已解压的zip文件
														
 
															+                        os.remove(file_path)
														
 
															+                # 检查文件是否是rar文件
														
 
															+                elif file_path.endswith('.rar'):
														
 
															                     # 如果是，递归解压
														
 
															-                    unrar(file_path, dest_path + sep + get_desc_path(str(file)))
														
 
															-                    # 删除已解压的zip文件（可选）
														
 
															-                    remove(file_path)
														
 
															-
														
 
															-    return is_success, ''
														
 
															-
														
 
															-
														
 
															-def unrar(rar_file_path, dest_dir):
														
 
															-    # 检查目标目录是否存在，如果不存在则创建
														
 
															-    # 解压zip文件
														
 
															+                    unrar(file_path, os.path.join(dest_path, get_desc_path(str(file))))
														
 
															+                    # 删除已解压的rar文件
														
 
															+                    os.remove(file_path)
														
 
															+
														
 
															+    return is_success, None
														
 
															+
														
 
															+
														
 
															+def unrar(rar_file_path: str, dest_dir: str) -> Tuple[bool, Optional[Exception]]:
														
 
															+    """
														
 
															+    解压rar文件
														
 
															+    
														
 
															+    Args:
														
 
															+        rar_file_path: rar文件路径
														
 
															+        dest_dir: 解压目标目录
														
 
															+    
														
 
															+    Returns:
														
 
															+        (是否成功, 错误信息)
														
 
															+    """
														
 
															+    # 解压rar文件
														
 
															     is_success = True
														
 
															-    trans_print('开始读取文件:', rar_file_path)
														
 
															+    debug('开始读取文件:', rar_file_path)
														
 
															     dest_path = dest_dir
														
 
															-    trans_print("解压到:", dest_path)
														
 
															-    if not path.exists(dest_path):
														
 
															-        makedirs(dest_path)
														
 
															+    debug("解压到:", dest_path)
														
 
															+
														
 
															+    # 确保目标路径存在
														
 
															+    create_file_path(dest_path)
														
 
															     try:
														
 
															         # 打开RAR文件
														
@@ -91,33 +128,41 @@ def unrar(rar_file_path, dest_dir):
 
															                 # 解压文件到目标目录
														
 
															                 rf.extract(member, dest_path)
														
 
															     except Exception as e:
														
 
															-        trans_print(traceback.format_exc())
														
 
															-        logger.exception(e)
														
 
															+        error(traceback.format_exc())
														
 
															         is_success = False
														
 
															-        trans_print('不是rar文件:', rar_file_path)
														
 
															+        error('不是rar文件:', rar_file_path)
														
 
															         return is_success, e
														
 
															     # 遍历解压后的文件
														
 
															-    print('解压再次读取', dest_path)
														
 
															+    debug('解压再次读取', dest_path)
														
 
															     if is_success:
														
 
															-        for root, dirs, files in walk(dest_path):
														
 
															+        for root, dirs, files in os.walk(dest_path):
														
 
															             for file in files:
														
 
															-                file_path = path.join(root, file)
														
 
															-                # 检查文件是否是zip文件
														
 
															+                file_path = os.path.join(root, file)
														
 
															+                # 检查文件是否是rar文件
														
 
															                 if file_path.endswith('.rar'):
														
 
															                     # 如果是，递归解压
														
 
															                     unrar(file_path, get_desc_path(file_path))
														
 
															-                    # 删除已解压的zip文件（可选）
														
 
															-                    remove(file_path)
														
 
															+                    # 删除已解压的rar文件
														
 
															+                    os.remove(file_path)
														
 
															-                if file_path.endswith('.zip'):
														
 
															+                elif file_path.endswith('.zip'):
														
 
															                     # 如果是，递归解压
														
 
															                     unzip(file_path, get_desc_path(file_path))
														
 
															-                    # 删除已解压的zip文件（可选）
														
 
															-                    remove(file_path)
														
 
															-
														
 
															-    return is_success, ''
														
 
															-
														
 
															-
														
 
															-def get_desc_path(path):
														
 
															-    return path[0:path.rfind(".")]
														
 
															+                    # 删除已解压的zip文件
														
 
															+                    os.remove(file_path)
														
 
															+
														
 
															+    return is_success, None
														
 
															+
														
 
															+
														
 
															+def get_desc_path(file_path: str) -> str:
														
 
															+    """
														
 
															+    获取文件路径的描述路径（去除扩展名）
														
 
															+    
														
 
															+    Args:
														
 
															+        file_path: 文件路径
														
 
															+    
														
 
															+    Returns:
														
 
															+        去除扩展名的路径
														
 
															+    """
														
 
															+    return file_path[0:file_path.rfind(".")]
														
--- a/wind_farm/CGN/__init__.py
+++ b/wind_farm/CGN/__init__.py
--- a/wind_farm/CGN/minute_data.py
+++ b/wind_farm/CGN/minute_data.py
@@ -1,83 +0,0 @@
 
															-import datetime
														
 
															-import logging
														
 
															-import os
														
 
															-
														
 
															-import pandas as pd
														
 
															-import sys
														
 
															-from sqlalchemy import create_engine
														
 
															-
														
 
															-# 更新为第三方数据源
														
 
															-engine = create_engine('mysql+pymysql://root:admin123456@192.168.50.235:30306/appoint')
														
 
															-
														
 
															-base_dir = r'/data/logs/104'
														
 
															-save_dir = base_dir + os.sep + 'minute'
														
 
															-log_dir = base_dir + os.sep + 'logs' + os.sep + 'minute'
														
 
															-
														
 
															-wind_farm_code_dict = {
														
 
															-    '风场编号1': '山西风场',
														
 
															-    '风场编号2': '桂林风场'
														
 
															-}
														
 
															-
														
 
															-
														
 
															-def create_dir(save_dir, is_file=False):
														
 
															-    if is_file:
														
 
															-        save_dir = os.path.dirname(save_dir)
														
 
															-    os.makedirs(save_dir, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def init_log():
														
 
															-    logger = logging.getLogger("104data")
														
 
															-    logger.setLevel(logging.INFO)
														
 
															-    stout_handle = logging.StreamHandler(sys.stdout)
														
 
															-    stout_handle.setFormatter(
														
 
															-        logging.Formatter("%(asctime)s: %(message)s"))
														
 
															-    stout_handle.setLevel(logging.INFO)
														
 
															-    logger.addHandler(stout_handle)
														
 
															-    create_dir(log_dir)
														
 
															-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-info.log'
														
 
															-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
														
 
															-    file_handler.setFormatter(
														
 
															-        logging.Formatter("%(asctime)s: %(message)s"))
														
 
															-    file_handler.setLevel(logging.INFO)
														
 
															-    logger.addHandler(file_handler)
														
 
															-
														
 
															-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-error.log'
														
 
															-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
														
 
															-    file_handler.setFormatter(
														
 
															-        logging.Formatter("%(asctime)s: %(message)s"))
														
 
															-    file_handler.setLevel(logging.ERROR)
														
 
															-    logger.addHandler(file_handler)
														
 
															-
														
 
															-    return logger
														
 
															-
														
 
															-
														
 
															-logger = init_log()
														
 
															-
														
 
															-
														
 
															-def info_print(*kwargs):
														
 
															-    message = " ".join([str(i) for i in kwargs])
														
 
															-    logger.info(message)
														
 
															-
														
 
															-
														
 
															-def error_print(*kwargs):
														
 
															-    message = " ".join([str(i) for i in kwargs])
														
 
															-    logger.error(message)
														
 
															-
														
 
															-
														
 
															-def get_data_and_save_file(df_sql, save_path):
														
 
															-    info_print(df_sql)
														
 
															-    df = pd.read_sql_query(df_sql, engine)
														
 
															-    info_print(df.shape)
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    info_print("开始执行")
														
 
															-    begin = datetime.datetime.now()
														
 
															-    yestoday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y%m%d')
														
 
															-    yestoday_sql = f"select * from information_schema.TABLES where TABLE_NAME = {yestoday}"
														
 
															-
														
 
															-    get_data_and_save_file(yestoday_sql,
														
 
															-                           os.path.join(save_dir, wind_farm_code_dict['风场编号1'], yestoday[0:4], yestoday[0:6],
														
 
															-                                        f'{yestoday}.csv.gz'))
														
 
															-
														
 
															-    info_print("执行结束，总耗时:", datetime.datetime.now() - begin)
														
--- a/wind_farm/CGN/purge_history_data.py
+++ b/wind_farm/CGN/purge_history_data.py
@@ -1,83 +0,0 @@
 
															-import datetime
														
 
															-import logging
														
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-import pandas as pd
														
 
															-from sqlalchemy import create_engine, text
														
 
															-
														
 
															-engine = create_engine('mysql+pymysql://root:admin123456@192.168.50.235:30306/appoint')
														
 
															-
														
 
															-base_dir = r'/data/logs/104'
														
 
															-log_dir = base_dir + os.sep + 'logs' + os.sep + 'delete'
														
 
															-
														
 
															-
														
 
															-def create_dir(save_dir, is_file=False):
														
 
															-    if is_file:
														
 
															-        save_dir = os.path.dirname(save_dir)
														
 
															-    os.makedirs(save_dir, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def init_log():
														
 
															-    logger = logging.getLogger("104data")
														
 
															-    logger.setLevel(logging.INFO)
														
 
															-    stout_handle = logging.StreamHandler(sys.stdout)
														
 
															-    stout_handle.setFormatter(
														
 
															-        logging.Formatter("%(asctime)s: %(message)s"))
														
 
															-    stout_handle.setLevel(logging.INFO)
														
 
															-    logger.addHandler(stout_handle)
														
 
															-    create_dir(log_dir)
														
 
															-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-info.log'
														
 
															-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
														
 
															-    file_handler.setFormatter(
														
 
															-        logging.Formatter("%(asctime)s: %(message)s"))
														
 
															-    file_handler.setLevel(logging.INFO)
														
 
															-    logger.addHandler(file_handler)
														
 
															-
														
 
															-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-error.log'
														
 
															-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
														
 
															-    file_handler.setFormatter(
														
 
															-        logging.Formatter("%(asctime)s: %(message)s"))
														
 
															-    file_handler.setLevel(logging.ERROR)
														
 
															-    logger.addHandler(file_handler)
														
 
															-
														
 
															-    return logger
														
 
															-
														
 
															-
														
 
															-logger = init_log()
														
 
															-
														
 
															-
														
 
															-def info_print(*kwargs):
														
 
															-    message = " ".join([str(i) for i in kwargs])
														
 
															-    logger.info(message)
														
 
															-
														
 
															-
														
 
															-def error_print(*kwargs):
														
 
															-    message = " ".join([str(i) for i in kwargs])
														
 
															-    logger.error(message)
														
 
															-
														
 
															-
														
 
															-def drop_table(lastdays):
														
 
															-    # 构建查询语句
														
 
															-    query = text(
														
 
															-        f"SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA='appoint' AND TABLE_NAME like '{lastdays}%'")
														
 
															-    table_df = pd.read_sql(query, engine)
														
 
															-
														
 
															-    info_print('查询到表', table_df['TABLE_NAME'].values)
														
 
															-    for table_name in table_df['TABLE_NAME'].values:
														
 
															-        # 构建删除表的SQL语句
														
 
															-        drop_query = text(f"DROP TABLE {table_name}")
														
 
															-        # 执行删除操作
														
 
															-        with engine.connect() as connection:
														
 
															-            connection.execute(drop_query)
														
 
															-
														
 
															-        info_print(f"Table {table_name} deleted")
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    info_print("开始执行")
														
 
															-    begin = datetime.datetime.now()
														
 
															-    lastdays = (datetime.datetime.now() - datetime.timedelta(days=8)).strftime('%Y%m%d')
														
 
															-    print(lastdays)
														
 
															-    drop_table(lastdays)
														
 
															-    info_print("执行结束，总耗时:", datetime.datetime.now() - begin)
														
--- a/wind_farm/CGN/second_data.py
+++ b/wind_farm/CGN/second_data.py
@@ -1,173 +0,0 @@
 
															-import datetime
														
 
															-import json
														
 
															-import logging
														
 
															-import multiprocessing
														
 
															-import os
														
 
															-import traceback
														
 
															-
														
 
															-import sys
														
 
															-
														
 
															-import numpy as np
														
 
															-import pandas as pd
														
 
															-from sqlalchemy import create_engine
														
 
															-
														
 
															-engine = create_engine('mysql+pymysql://root:admin123456@192.168.50.235:30306/appoint')
														
 
															-
														
 
															-base_dir = r'/data/logs/104'
														
 
															-save_dir = base_dir + os.sep + 'second'
														
 
															-log_dir = base_dir + os.sep + 'logs' + os.sep + 'second'
														
 
															-
														
 
															-def create_dir(save_dir, is_file=False):
														
 
															-    if is_file:
														
 
															-        save_dir = os.path.dirname(save_dir)
														
 
															-    os.makedirs(save_dir, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def init_log():
														
 
															-    logger = logging.getLogger("104data")
														
 
															-    logger.setLevel(logging.INFO)
														
 
															-    stout_handle = logging.StreamHandler(sys.stdout)
														
 
															-    stout_handle.setFormatter(
														
 
															-        logging.Formatter("%(asctime)s: %(message)s"))
														
 
															-    stout_handle.setLevel(logging.INFO)
														
 
															-    logger.addHandler(stout_handle)
														
 
															-    create_dir(log_dir)
														
 
															-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-info.log'
														
 
															-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
														
 
															-    file_handler.setFormatter(
														
 
															-        logging.Formatter("%(asctime)s: %(message)s"))
														
 
															-    file_handler.setLevel(logging.INFO)
														
 
															-    logger.addHandler(file_handler)
														
 
															-
														
 
															-    file_name = log_dir + os.sep + datetime.datetime.now().strftime('%Y%m') + '-error.log'
														
 
															-    file_handler = logging.FileHandler(file_name, encoding='utf-8')
														
 
															-    file_handler.setFormatter(
														
 
															-        logging.Formatter("%(asctime)s: %(message)s"))
														
 
															-    file_handler.setLevel(logging.ERROR)
														
 
															-    logger.addHandler(file_handler)
														
 
															-
														
 
															-    return logger
														
 
															-
														
 
															-
														
 
															-logger = init_log()
														
 
															-
														
 
															-
														
 
															-def get_all_mesurement_conf():
														
 
															-    sql = "select * from measurement_conf "
														
 
															-    return pd.read_sql(sql, engine)
														
 
															-
														
 
															-
														
 
															-def get_all_mesurepoint_conf():
														
 
															-    sql = "select * from measurepoint_conf t where t.status = 1"
														
 
															-    return pd.read_sql(sql, engine)
														
 
															-
														
 
															-
														
 
															-def df_value_to_dict(df, key='col1', value='col2'):
														
 
															-    """
														
 
															-    :param df: dataframe
														
 
															-    :param key: 字典的key，如果重复，则返回
														
 
															-    :param value: 字典的value
														
 
															-    :return:
														
 
															-    """
														
 
															-    result_dict = dict()
														
 
															-    for k, v in zip(df[key], df[value]):
														
 
															-        if k in result_dict.keys():
														
 
															-            if type(result_dict[k]) == list:
														
 
															-                result_dict[k].append(v)
														
 
															-            else:
														
 
															-                result_dict[k] = [result_dict[k]]
														
 
															-                result_dict[k].append(v)
														
 
															-        else:
														
 
															-            result_dict[k] = v
														
 
															-
														
 
															-    return result_dict
														
 
															-
														
 
															-
														
 
															-def info_print(*kwargs):
														
 
															-    message = " ".join([str(i) for i in kwargs])
														
 
															-    logger.info(message)
														
 
															-
														
 
															-
														
 
															-def error_print(*kwargs):
														
 
															-    message = " ".join([str(i) for i in kwargs])
														
 
															-    logger.error(message)
														
 
															-
														
 
															-
														
 
															-def exists_table(table_name):
														
 
															-    sql = f"SELECT * FROM information_schema.tables WHERE table_schema = 'appoint' AND table_name = '{table_name}'"
														
 
															-    info_print(sql)
														
 
															-    table_df = pd.read_sql_query(sql, engine)
														
 
															-    if table_df.empty:
														
 
															-        return False
														
 
															-    return True
														
 
															-
														
 
															-
														
 
															-def get_data_and_save_file(table_name, save_path, measurepoint_use_dict):
														
 
															-    if not exists_table(table_name):
														
 
															-        error_print(f"{table_name} 表不存在")
														
 
															-    else:
														
 
															-        df_sql = f"SELECT * FROM {table_name}"
														
 
															-        info_print(df_sql)
														
 
															-        df = pd.read_sql_query(df_sql, engine)
														
 
															-        info_print(df.shape)
														
 
															-
														
 
															-        data_dict = dict()
														
 
															-        for receive_time, information_object_data in zip(df['receive_time'],
														
 
															-                                                         df['information_object_data']):
														
 
															-
														
 
															-            json_data = json.loads(information_object_data)
														
 
															-            for k, v in json_data.items():
														
 
															-                k = int(k)
														
 
															-                wind_num = k // 103 + 1
														
 
															-                mesurepoint_num = k % 103
														
 
															-
														
 
															-                if wind_num not in data_dict.keys():
														
 
															-                    data_dict[wind_num] = dict()
														
 
															-
														
 
															-                if receive_time not in data_dict[wind_num].keys():
														
 
															-                    data_dict[wind_num][receive_time] = dict()
														
 
															-
														
 
															-                if mesurepoint_num in measurepoint_use_dict.keys():
														
 
															-                    data_dict[wind_num][receive_time][mesurepoint_num] = v
														
 
															-
														
 
															-        datas = list()
														
 
															-        for wind_num, data in data_dict.items():
														
 
															-            for receive_time, mesurepoint_data in data.items():
														
 
															-                data = [wind_num, receive_time]
														
 
															-                for point_num in measurepoint_use_dict.keys():
														
 
															-                    data.append(mesurepoint_data[point_num] if point_num in mesurepoint_data.keys() else np.nan)
														
 
															-                if len(data) > 2:
														
 
															-                    datas.append(data)
														
 
															-
														
 
															-        cols = ['风机编号', '时间']
														
 
															-        cols.extend(measurepoint_use_dict.values())
														
 
															-        result_df = pd.DataFrame(data=datas, columns=cols)
														
 
															-        result_df.sort_values(by=['风机编号', '时间'])
														
 
															-        create_dir(save_path, True)
														
 
															-        result_df.to_csv(save_path, encoding='utf8', index=False, compression='gzip')
														
 
															-        info_print("文件", save_path, '保存成功')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    info_print("开始执行")
														
 
															-    begin = datetime.datetime.now()
														
 
															-    try:
														
 
															-        measurepoint_conf_df = get_all_mesurepoint_conf()
														
 
															-        measurepoint_use_dict = df_value_to_dict(measurepoint_conf_df, 'id', 'name')
														
 
															-
														
 
															-        yestoday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y%m%d')
														
 
															-
														
 
															-        measurement_conf_df = get_all_mesurement_conf()
														
 
															-        tables = list()
														
 
															-        for id, measurement_wind_field in zip(measurement_conf_df['id'], measurement_conf_df['measurement_wind_field']):
														
 
															-            tables.append(
														
 
															-                (f'{yestoday}_{id}', os.path.join(save_dir, measurement_wind_field, yestoday[0:4], yestoday[0:6],
														
 
															-                                                  yestoday + '.csv.gz')))
														
 
															-
														
 
															-        with multiprocessing.Pool(len(tables)) as pool:
														
 
															-            pool.starmap(get_data_and_save_file, [(t[0], t[1], measurepoint_use_dict) for t in tables])
														
 
															-    except Exception as e:
														
 
															-        error_print(traceback.format_exc())
														
 
															-        raise e
														
 
															-
														
 
															-    info_print("执行结束，总耗时:", datetime.datetime.now() - begin)
														
--- a/wind_farm/__init__.py
+++ b/wind_farm/__init__.py