1 год назад · 5c1321dbb7
--- a/app_run.py
+++ b/app_run.py
@@ -7,37 +7,39 @@ from os import *
 
															 from utils.conf.read_conf import yaml_conf, read_conf
														
 
															-def get_exec_data(batch_no=None, read_type=None, run_count=1):
														
 
															-    if batch_no and read_type:
														
 
															-        data = get_data_by_batch_no_and_type(batch_no, read_type)
														
 
															-        if data is None:
														
 
															-            raise ValueError(f"未找到批次号:{batch_no},类型:{read_type}")
														
 
															-
														
 
															+def get_exec_data(run_count=1):
														
 
															+    now_run_count = get_now_running_count()
														
 
															+    data = None
														
 
															+    if now_run_count >= run_count:
														
 
															+        trans_print(f"当前有{now_run_count}个任务在执行")
														
 
															     else:
														
 
															-        data = get_batch_exec_data(run_count)
														
 
															-        if data is None:
														
 
															-            trans_print("当前有任务在执行")
														
 
															-            sys.exit(0)
														
 
															-        elif len(data.keys()) == 0:
														
 
															-            trans_print("当前无任务")
														
 
															-            sys.exit(0)
														
 
															-
														
 
															+        data = get_batch_exec_data()
														
 
															     return data
														
 
															-def run(batch_no=None, read_type=None, save_db=True, run_count=1):
														
 
															+def run(save_db=True, run_count=1, yaml_config=None, step=0, end=6):
														
 
															     update_timeout_trans_data()
														
 
															-    data = get_exec_data(batch_no, read_type, run_count)
														
 
															+    data = get_exec_data(run_count)
														
 
															+
														
 
															+    if data is None:
														
 
															+        trans_print("没有需要执行的任务")
														
 
															+        return
														
 
															     exec_process = None
														
 
															     if data['transfer_type'] in ['second', 'minute']:
														
 
															-        exec_process = MinSecTrans(data=data, save_db=save_db)
														
 
															+        exec_process = MinSecTrans(data=data, save_db=save_db, yaml_config=yaml_config, step=step, end=end)
														
 
															     if data['transfer_type'] in ['fault', 'warn']:
														
 
															-        exec_process = FaultWarnTrans(data=data, save_db=save_db)
														
 
															+        exec_process = FaultWarnTrans(data=data, save_db=save_db, yaml_config=yaml_config)
														
 
															+
														
 
															+    if data['transfer_type'] == 'wave':
														
 
															+        exec_process = WaveTrans(data['id'], data['wind_farm_code'], data['read_dir'])
														
 
															+
														
 
															+    if data['transfer_type'] == 'laser':
														
 
															+        exec_process = LaserTrans(data['id'], data['wind_farm_code'], data['read_dir'])
														
 
															     if exec_process is None:
														
 
															-        raise Exception("No exec process")
														
 
															+        raise Exception("没有相应的执行器")
														
 
															     exec_process.run()
														
@@ -54,11 +56,14 @@ if __name__ == '__main__':
 
															     run_count = int(read_conf(yaml_config, "run_batch_count", 1))
														
 
															     from utils.log.trans_log import trans_print
														
 
															-    from service.plt_service import get_batch_exec_data, get_data_by_batch_no_and_type, update_timeout_trans_data
														
 
															+    from service.trans_conf_service import update_timeout_trans_data, \
														
 
															+        get_now_running_count, get_batch_exec_data
														
 
															     from etl.wind_power.fault_warn.FaultWarnTrans import FaultWarnTrans
														
 
															     from etl.wind_power.min_sec.MinSecTrans import MinSecTrans
														
 
															+    from etl.wind_power.laser.LaserTrans import LaserTrans
														
 
															+    from etl.wind_power.wave.WaveTrans import WaveTrans
														
 
															     trans_print("所有请求参数:", sys.argv, "env:", env, "最大可执行个数:", run_count)
														
 
															     trans_print("配置文件路径:", environ.get("ETL_CONF"))
														
 
															-    run(run_count=run_count)
														
 
															+    run(run_count=run_count, yaml_config=yaml_config, step=0)
														
--- a/conf/etl_config_dev.yaml
+++ b/conf/etl_config_dev.yaml
@@ -18,9 +18,11 @@ etl_origin_path_contain: 收资数据
 
															 save_path:
														
 
															 # 日志保存路径
														
 
															-log_path_dir: /data/logs
														
 
															+log_path_dir: /data/logs/no_batch_trans
														
 
															 # 临时文件存放处,有些甲方公司隔得tmp太小,只好自己配置
														
 
															 tmp_base_path: /tmp
														
 
															-run_batch_count: 1
														
 
															+run_batch_count: 1
														
 
															+
														
 
															+archive_path: /data/download/collection_data/archive/dev
														
--- a/conf/etl_config_prod.yaml
+++ b/conf/etl_config_prod.yaml
@@ -18,9 +18,11 @@ etl_origin_path_contain: 收资数据
 
															 save_path:
														
 
															 # 日志保存路径
														
 
															-log_path_dir: /data/logs
														
 
															+log_path_dir: /data/logs/no_batch_trans
														
 
															 # 临时文件存放处,有些甲方公司隔得tmp太小,只好自己配置
														
 
															-tmp_base_path: /tmp
														
 
															+tmp_base_path: /data/download/collection_data/tmp
														
 
															-run_batch_count: 3
														
 
															+run_batch_count: 2
														
 
															+
														
 
															+archive_path: /data/download/collection_data/archive/prod
														
--- a/etl/common/ArchiveFile.py
+++ b/etl/common/ArchiveFile.py
@@ -0,0 +1,25 @@
 
															+import os.path
														
 
															+import shutil
														
 
															+
														
 
															+from etl.common.PathsAndTable import PathsAndTable
														
 
															+from service.trans_conf_service import update_archive_success
														
 
															+from utils.log.trans_log import trans_print
														
 
															+
														
 
															+
														
 
															+class ArchiveFile(object):
														
 
															+
														
 
															+    def __init__(self, pathsAndTable: PathsAndTable, exec_id):
														
 
															+        self.pathsAndTable = pathsAndTable
														
 
															+        self.exec_id = exec_id
														
 
															+
														
 
															+    def run(self):
														
 
															+        """
														
 
															+        归档文件
														
 
															+        """
														
 
															+        if os.path.exists(self.pathsAndTable.get_tmp_formal_path()):
														
 
															+            shutil.make_archive(self.pathsAndTable.get_archive_path(), 'zip', self.pathsAndTable.get_tmp_formal_path())
														
 
															+            update_archive_success(self.exec_id, self.pathsAndTable.read_type,
														
 
															+                                   f"{self.pathsAndTable.get_archive_path()}.zip")
														
 
															+            trans_print(f"文件夹已归档为 {self.pathsAndTable.get_archive_path()}.zip")
														
 
															+        else:
														
 
															+            trans_print(f"文件夹 {self.pathsAndTable.get_tmp_formal_path()} 不存在")
														
--- a/etl/common/BaseDataTrans.py
+++ b/etl/common/BaseDataTrans.py
@@ -1,112 +1,152 @@
 
															 import datetime
														
 
															 import traceback
														
 
															+from etl.common.ArchiveFile import ArchiveFile
														
 
															 from etl.common.ClearData import ClearData
														
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															 from etl.common.SaveToDb import SaveToDb
														
 
															 from etl.common.UnzipAndRemove import UnzipAndRemove
														
 
															-from service.plt_service import get_all_wind, update_trans_status_success, update_trans_status_error, \
														
 
															+from service.plt_service import get_all_wind
														
 
															+from service.trans_conf_service import update_trans_status_success, update_trans_status_error, \
														
 
															     update_trans_status_running
														
 
															 from utils.file.trans_methods import read_excel_files
														
 
															 from utils.log.trans_log import trans_print, set_trance_id
														
 
															 class BaseDataTrans(object):
														
 
															-    def __init__(self, data: dict = None, save_db=True, step=0, end=4):
														
 
															-
														
 
															-        self.batch_no = data['batch_code']
														
 
															-        self.batch_name = data['batch_name']
														
 
															-        self.read_type = data['transfer_type']
														
 
															-        self.read_path = data['transfer_addr']
														
 
															-        self.field_code = data['field_code']
														
 
															-        self.field_name = data['field_name']
														
 
															+    def __init__(self, data: dict = None, save_db=True, yaml_config=None, step=0, end=6):
														
 
															+
														
 
															+        self.id = data['id']
														
 
															+        self.task_name = data['task_name']
														
 
															+        self.transfer_type = data['transfer_type']
														
 
															+        self.read_dir = data['read_dir']
														
 
															+        self.wind_farm_code = data['wind_farm_code']
														
 
															+        self.wind_farm_name = data['wind_farm_name']
														
 
															+        self.yaml_config = yaml_config
														
 
															         self.save_zip = False
														
 
															         self.step = step
														
 
															         self.end = end
														
 
															-        self.wind_col_trans, self.rated_power_and_cutout_speed_map = get_all_wind(self.field_code)
														
 
															+        self.wind_col_trans, self.rated_power_and_cutout_speed_map = get_all_wind(self.wind_farm_code)
														
 
															         self.batch_count = 100000
														
 
															         self.save_db = save_db
														
 
															         self.filed_conf = self.get_filed_conf()
														
 
															-
														
 
															-        # trans_print("是否是秒转分钟:", self.boolean_sec_to_min)
														
 
															+        self.update_files = list()
														
 
															         try:
														
 
															-            self.pathsAndTable = PathsAndTable(self.batch_no, self.batch_name, self.read_path, self.field_name,
														
 
															-                                               self.read_type, save_db, self.save_zip)
														
 
															+            self.pathsAndTable = PathsAndTable(self.id, self.task_name, self.read_dir, self.wind_farm_code,
														
 
															+                                               self.wind_farm_name, self.transfer_type, save_db, self.save_zip,
														
 
															+                                               self.yaml_config, self.wind_col_trans)
														
 
															         except Exception as e:
														
 
															             trans_print(traceback.format_exc())
														
 
															-            update_trans_status_error(self.batch_no, self.read_type, str(e), self.save_db)
														
 
															+            update_trans_status_error(self.id, self.transfer_type, str(e), self.save_db)
														
 
															             raise e
														
 
															     def get_filed_conf(self):
														
 
															         raise NotImplementedError("需要实现 获取点检表 方法")
														
 
															-    # 第一步 清理数据
														
 
															+    # 清理数据
														
 
															     def clean_file_and_db(self):
														
 
															         clean_data = ClearData(self.pathsAndTable)
														
 
															         clean_data.run()
														
 
															-    # 第二步 解压 移动到临时文件
														
 
															+    # 解压 移动到临时文件
														
 
															     def unzip_or_remove_to_tmp_dir(self):
														
 
															         # 解压并删除
														
 
															         unzip_and_remove = UnzipAndRemove(self.pathsAndTable)
														
 
															         unzip_and_remove.run()
														
 
															-    # 第三步 读取 并 保存到临时文件
														
 
															+    # 读取并保存到临时文件
														
 
															     def read_and_save_tmp_file(self):
														
 
															-        raise NotImplementedError("第三步未做实现")
														
 
															+        raise NotImplementedError("读取并保存到临时文件未做实现")
														
 
															+
														
 
															+    # 读取并保存到临时正式文件
														
 
															+    def statistics_and_save_tmp_formal_file(self):
														
 
															+        raise NotImplementedError("读取并保存到临时正式文件未做实现")
														
 
															+
														
 
															+    # 归档文件
														
 
															+    def archive_file(self):
														
 
															+        archive_file = ArchiveFile(self.pathsAndTable, self.id)
														
 
															+        archive_file.run()
														
 
															-    # 第四步 统计 并 保存到正式文件
														
 
															-    def statistics_and_save_to_file(self):
														
 
															-        raise NotImplementedError("第四步未做实现")
														
 
															+    # 合并到正式文件
														
 
															+    def combine_and_save_formal_file(self):
														
 
															+        raise NotImplementedError("合并到正式文件未做实现")
														
 
															-    # 第五步 保存到数据库
														
 
															+    # 保存到数据库
														
 
															     def save_to_db(self):
														
 
															-        save_to_db = SaveToDb(self.pathsAndTable, self.batch_count)
														
 
															+        save_to_db = SaveToDb(self.pathsAndTable, self.update_files, self.batch_count)
														
 
															         save_to_db.run()
														
 
															     # 最后更新执行程度
														
 
															     def update_exec_progress(self):
														
 
															-        update_trans_status_success(self.batch_no, self.read_type,
														
 
															+        update_trans_status_success(self.id, self.transfer_type,
														
 
															                                     len(read_excel_files(self.pathsAndTable.get_save_path())),
														
 
															                                     None, None, None, None, self.save_db)
														
 
															     def run(self):
														
 
															         total_begin = datetime.datetime.now()
														
 
															         try:
														
 
															-            trance_id = '-'.join([self.batch_no, self.field_name, self.read_type])
														
 
															+            trance_id = '-'.join([str(self.id), self.wind_farm_name, self.transfer_type])
														
 
															             set_trance_id(trance_id)
														
 
															-            update_trans_status_running(self.batch_no, self.read_type, self.save_db)
														
 
															+            update_trans_status_running(self.id, self.transfer_type, self.save_db)
														
 
															-            if self.step <= 0 and self.end >= 0:
														
 
															+            now_index = 0
														
 
															+            # 0
														
 
															+            if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															                 trans_print("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
														
 
															                 self.clean_file_and_db()
														
 
															                 trans_print("清理数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															                             datetime.datetime.now() - total_begin)
														
 
															-            if self.step <= 1 and self.end >= 1:
														
 
															+            now_index = now_index + 1
														
 
															+            # 1
														
 
															+            if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															                 trans_print("开始解压移动文件")
														
 
															                 self.unzip_or_remove_to_tmp_dir()
														
 
															                 trans_print("解压移动文件结束:耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															                             datetime.datetime.now() - total_begin)
														
 
															-            if self.step <= 2 and self.end >= 2:
														
 
															+            now_index = now_index + 1
														
 
															+            # 2
														
 
															+            if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															                 trans_print("开始保存数据到临时文件")
														
 
															                 self.read_and_save_tmp_file()
														
 
															                 trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															                             datetime.datetime.now() - total_begin)
														
 
															-            if self.step <= 3 and self.end >= 3:
														
 
															+            now_index = now_index + 1
														
 
															+            # 3
														
 
															+            if self.step <= now_index <= self.end:
														
 
															+                begin = datetime.datetime.now()
														
 
															+                trans_print("开始保存到临时正式文件")
														
 
															+                self.statistics_and_save_tmp_formal_file()
														
 
															+                trans_print("保存到临时正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															+                            datetime.datetime.now() - total_begin)
														
 
															+
														
 
															+            now_index = now_index + 1
														
 
															+            # 4
														
 
															+            if self.step <= now_index <= self.end:
														
 
															+                begin = datetime.datetime.now()
														
 
															+                trans_print("开始保存归档文件")
														
 
															+                self.archive_file()
														
 
															+                trans_print("保存到保存归档文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															+                            datetime.datetime.now() - total_begin)
														
 
															+
														
 
															+            now_index = now_index + 1
														
 
															+            # 5
														
 
															+            if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															                 trans_print("开始保存数据到正式文件")
														
 
															-                self.statistics_and_save_to_file()
														
 
															+                self.combine_and_save_formal_file()
														
 
															                 trans_print("保存数据到正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
														
 
															                             datetime.datetime.now() - total_begin)
														
 
															-            if self.step <= 4 and self.end >= 4:
														
 
															+            now_index = now_index + 1
														
 
															+            # 6
														
 
															+            if self.step <= now_index <= self.end:
														
 
															                 begin = datetime.datetime.now()
														
 
															                 trans_print("开始保存到数据库,是否存库:", self.pathsAndTable.save_db)
														
 
															                 self.save_to_db()
														
@@ -116,14 +156,8 @@ class BaseDataTrans(object):
 
															             self.update_exec_progress()
														
 
															         except Exception as e:
														
 
															             trans_print(traceback.format_exc())
														
 
															-            update_trans_status_error(self.batch_no, self.read_type, str(e), self.save_db)
														
 
															+            update_trans_status_error(self.id, self.transfer_type, str(e), self.save_db)
														
 
															             raise e
														
 
															         finally:
														
 
															             self.pathsAndTable.delete_tmp_files()
														
 
															             trans_print("执行结束,总耗时:", str(datetime.datetime.now() - total_begin))
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    test = BaseDataTrans(save_db=False, batch_no="WOF053600062-WOB000010", read_type="fault")
														
 
															-
														
 
															-    test.run()
														
--- a/etl/common/ClearData.py
+++ b/etl/common/ClearData.py
@@ -1,7 +1,7 @@
 
															 import datetime
														
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															-from service.plt_service import update_trans_transfer_progress
														
 
															+from service.trans_conf_service import update_trans_transfer_progress
														
 
															 from utils.log.trans_log import trans_print
														
@@ -12,14 +12,16 @@ class ClearData(object):
 
															     def clean_data(self):
														
 
															         self.pathsAndTable.delete_tmp_files()
														
 
															-        if self.pathsAndTable.save_db:
														
 
															-            self.pathsAndTable.delete_batch_db()
														
 
															-        self.pathsAndTable.delete_batch_files()
														
 
															+
														
 
															+        # 不基于批次,不能删除数据库了以及历史数据了
														
 
															+        # if self.pathsAndTable.save_db:
														
 
															+        #     self.pathsAndTable.delete_batch_db()
														
 
															+        # self.pathsAndTable.delete_batch_files()
														
 
															     def run(self):
														
 
															         trans_print("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
														
 
															         begin = datetime.datetime.now()
														
 
															         self.clean_data()
														
 
															-        update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type, 5,
														
 
															+        update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type, 5,
														
 
															                                        self.pathsAndTable.save_db)
														
 
															         trans_print("清理数据结束,耗时:", datetime.datetime.now() - begin)
														
--- a/etl/common/CombineAndSaveFormalFile.py
+++ b/etl/common/CombineAndSaveFormalFile.py
@@ -0,0 +1,61 @@
 
															+import multiprocessing
														
 
															+import os
														
 
															+
														
 
															+import pandas as pd
														
 
															+
														
 
															+from etl.common.PathsAndTable import PathsAndTable
														
 
															+from utils.file.trans_methods import read_excel_files, read_file_to_df, copy_to_new
														
 
															+from utils.log.trans_log import trans_print
														
 
															+from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
														
 
															+
														
 
															+
														
 
															+class CombineAndSaveFormalFile(object):
														
 
															+
														
 
															+    def __init__(self, pathsAndTable: PathsAndTable):
														
 
															+        self.pathsAndTable = pathsAndTable
														
 
															+        self.update_files = multiprocessing.Manager().list()
														
 
															+
														
 
															+    def combine_and_save(self, file_path, key, exists_file_path):
														
 
															+        exists_same = False
														
 
															+        if exists_file_path:
														
 
															+            exists_same = True
														
 
															+            exists_df = read_file_to_df(exists_file_path)
														
 
															+            now_df = read_file_to_df(file_path)
														
 
															+            # 合并两个 DataFrame
														
 
															+            combined_df = pd.concat([exists_df, now_df])
														
 
															+            # 去重，保留 now_df 的值
														
 
															+            combined_df = combined_df.drop_duplicates(subset='time_stamp', keep='last')
														
 
															+            # 按 time_stamp 排序
														
 
															+            combined_df = combined_df.sort_values(by='time_stamp').reset_index(drop=True)
														
 
															+            combined_df.to_csv(exists_file_path, encoding='utf-8', index=False)
														
 
															+            self.update_files.append(exists_file_path)
														
 
															+        else:
														
 
															+            save_path = str(os.path.join(self.pathsAndTable.get_save_path(), key[0], key[1]))
														
 
															+            copy_to_new(file_path, save_path)
														
 
															+            self.update_files.append(save_path)
														
 
															+        trans_print(f"{key[0]}/{key[1]} {'包含' if exists_same else '不包含'} 相同文件,保存成功")
														
 
															+
														
 
															+    def combine_and_save_formal_file(self):
														
 
															+        exists_files = read_excel_files(self.pathsAndTable.get_save_path())
														
 
															+        exists_file_maps = dict()
														
 
															+        for file_path in exists_files:
														
 
															+            name = (os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path))
														
 
															+            exists_file_maps[name] = file_path
														
 
															+
														
 
															+        new_files = read_excel_files(self.pathsAndTable.get_tmp_formal_path())
														
 
															+        new_file_maps = dict()
														
 
															+        for file_path in new_files:
														
 
															+            name = (os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path))
														
 
															+            new_file_maps[name] = file_path
														
 
															+
														
 
															+        same_keys = list(set(exists_file_maps.keys()).intersection(new_file_maps.keys()))
														
 
															+        split_count = get_available_cpu_count_with_percent(2 / 3)
														
 
															+        with multiprocessing.Pool(split_count) as pool:
														
 
															+            pool.starmap(self.combine_and_save,
														
 
															+                         [(file_path, key, exists_file_maps[key] if key in same_keys else None) for key, file_path in
														
 
															+                          new_file_maps.items()])
														
 
															+
														
 
															+    def run(self):
														
 
															+        self.combine_and_save_formal_file()
														
 
															+        print(self.update_files)
														
 
															+        return list(self.update_files)
														
--- a/etl/common/PathsAndTable.py
+++ b/etl/common/PathsAndTable.py
@@ -1,45 +1,47 @@
 
															 import shutil
														
 
															-from os import path, environ, sep
														
 
															+from os import path, sep
														
 
															-from service.trans_service import drop_table, creat_min_sec_table, create_warn_fault_table
														
 
															+from service.trans_service import creat_min_sec_table, create_warn_fault_table
														
 
															 from utils.conf.read_conf import *
														
 
															 from utils.log.trans_log import trans_print
														
 
															 class PathsAndTable(object):
														
 
															-    def __init__(self, batch_no=None, batch_name=None, read_path=None, field_name=None, read_type=None,
														
 
															-                 save_db=True, save_zip=True):
														
 
															-        self.batch_no = batch_no
														
 
															-        self.batch_name = batch_name
														
 
															-        self.read_path = read_path
														
 
															-        self.field_name = field_name
														
 
															+    def __init__(self, id=None, task_name=None, read_dir=None, wind_farm_code=None, wind_farm_name=None,
														
 
															+                 read_type=None, save_db=True, save_zip=True, yaml_config=None, wind_col_trans=None):
														
 
															+        self.id = id
														
 
															+        self.task_name = task_name
														
 
															+        self.read_dir = read_dir
														
 
															+        self.wind_farm_code = wind_farm_code
														
 
															+        self.wind_farm_name = wind_farm_name
														
 
															         self.read_type = read_type
														
 
															         self.save_db = save_db
														
 
															         self.save_zip = save_zip
														
 
															         self.multi_pool_count = 6
														
 
															-        self.is_delete_db = False
														
 
															-
														
 
															-        yaml_config = yaml_conf(environ.get('ETL_CONF'))
														
 
															+        self.yaml_config = yaml_config
														
 
															+        self.wind_col_trans = wind_col_trans
														
 
															         save_path_conf = read_conf(yaml_config, "save_path")
														
 
															         self.tmp_base_path = read_conf(yaml_config, "tmp_base_path", "/tmp")
														
 
															         if save_path_conf:
														
 
															-            self.save_path = save_path_conf + sep + self.field_name
														
 
															+            self.save_path = save_path_conf + sep + self.wind_farm_name
														
 
															         else:
														
 
															-            find_index = read_path.find(read_conf(yaml_config, 'etl_origin_path_contain', "etl_origin_path_contain"))
														
 
															+            find_index = read_dir.find(read_conf(yaml_config, 'etl_origin_path_contain', "etl_origin_path_contain"))
														
 
															             if find_index == -1:
														
 
															-                raise Exception("路径未包含原始数据特定字符:" + read_path)
														
 
															-            self.save_path = read_path[0:find_index] + sep + "清理数据"
														
 
															+                raise Exception("路径未包含原始数据特定字符:" + read_dir)
														
 
															+            self.save_path = read_dir[0:find_index] + sep + "清理数据"
														
 
															         if self.save_path is None:
														
 
															-            raise Exception("未配置保存路径:" + read_path)
														
 
															+            raise Exception("未配置保存路径:" + read_dir)
														
 
															+
														
 
															+        self.archive_path = read_conf(yaml_config, "archive_path", "/tmp/archive")
														
 
															     def get_save_path(self):
														
 
															-        return path.join(self.save_path, self.batch_no + "_" + self.batch_name, self.read_type)
														
 
															+        return path.join(self.save_path, self.read_type)
														
 
															     def get_tmp_path(self):
														
 
															-        return path.join(self.tmp_base_path, self.field_name, self.batch_no + "_" + self.batch_name,
														
 
															-                         self.read_type)
														
 
															+        return str(path.join(self.tmp_base_path, self.wind_farm_name, str(self.id) + "_" + self.task_name,
														
 
															+                             self.read_type))
														
 
															     def get_excel_tmp_path(self):
														
 
															         return path.join(self.get_tmp_path(), 'excel_tmp' + sep)
														
@@ -53,14 +55,14 @@ class PathsAndTable(object):
 
															         else:
														
 
															             return path.join(self.get_tmp_path(), 'merge_tmp', str(wind_turbine_number))
														
 
															-    def get_table_name(self):
														
 
															-        return "_".join([self.batch_no, self.read_type])
														
 
															+    def get_tmp_formal_path(self):
														
 
															+        return path.join(self.get_tmp_path(), 'formal_tmp')
														
 
															-    def delete_batch_files(self):
														
 
															-        trans_print("开始删除已存在的批次文件夹")
														
 
															-        if path.exists(self.get_save_path()):
														
 
															-            shutil.rmtree(self.get_save_path())
														
 
															-        trans_print("删除已存在的批次文件夹")
														
 
															+    def get_archive_path(self):
														
 
															+        return path.join(self.archive_path, self.wind_farm_name, self.read_type, f'{self.id}_{self.task_name}')
														
 
															+
														
 
															+    def get_table_name(self):
														
 
															+        return "_".join([self.wind_farm_code, self.read_type])
														
 
															     def delete_tmp_files(self):
														
 
															         trans_print("开始删除临时文件夹")
														
@@ -68,20 +70,11 @@ class PathsAndTable(object):
 
															             shutil.rmtree(self.get_tmp_path())
														
 
															         trans_print("删除临时文件夹删除成功")
														
 
															-    def delete_batch_db(self):
														
 
															-        if self.save_db:
														
 
															-            trans_print("开始删除表")
														
 
															-            if not self.is_delete_db:
														
 
															-                table_name = self.get_table_name()
														
 
															-                drop_table(table_name, self.save_db)
														
 
															-                self.is_delete_db = True
														
 
															-            trans_print("删除表结束")
														
 
															-
														
 
															-    def create_batch_db(self, wind_names: list = list()):
														
 
															+    def create_wind_farm_db(self):
														
 
															         if self.save_db:
														
 
															             trans_print("开始创建表")
														
 
															             if self.read_type in ['second', 'minute']:
														
 
															-                creat_min_sec_table(self.get_table_name(), wind_names, self.read_type)
														
 
															+                creat_min_sec_table(self.get_table_name(), self.read_type)
														
 
															             elif self.read_type in ['fault', 'warn']:
														
 
															                 create_warn_fault_table(self.get_table_name())
														
 
															             else:
														
--- a/etl/common/SaveToDb.py
+++ b/etl/common/SaveToDb.py
@@ -1,40 +1,49 @@
 
															 import multiprocessing
														
 
															+import os.path
														
 
															 import traceback
														
 
															-from os import path
														
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															-from service.plt_service import update_trans_transfer_progress
														
 
															-from service.trans_service import save_file_to_db
														
 
															-from utils.file.trans_methods import read_excel_files, split_array
														
 
															+from service.trans_conf_service import update_trans_transfer_progress
														
 
															+from service.trans_service import save_partation_file_to_db, save_file_to_db
														
 
															+from utils.file.trans_methods import split_array
														
 
															 from utils.log.trans_log import trans_print
														
 
															 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
														
 
															 class SaveToDb(object):
														
 
															-    def __init__(self, pathsAndTable: PathsAndTable, batch_count=100000):
														
 
															+    def __init__(self, pathsAndTable: PathsAndTable, update_files, batch_count=100000):
														
 
															         self.pathsAndTable = pathsAndTable
														
 
															         self.batch_count = batch_count
														
 
															+        self.update_files = update_files
														
 
															     def mutiprocessing_to_save_db(self):
														
 
															         # 开始保存到SQL文件
														
 
															+        all_saved_files = self.update_files
														
 
															-        self.pathsAndTable.delete_batch_db()
														
 
															-        all_saved_files = read_excel_files(self.pathsAndTable.get_save_path())
														
 
															-        wind_names = [str(path.basename(i)).replace(".csv", "") for i in all_saved_files]
														
 
															+        # 映射到的文件保存到数据库
														
 
															+        all_saved_files = [i for i in all_saved_files if
														
 
															+                           os.path.basename(i).split(".")[0] in self.pathsAndTable.wind_col_trans.keys()]
														
 
															-        self.pathsAndTable.create_batch_db(wind_names)
														
 
															+        self.pathsAndTable.create_wind_farm_db()
														
 
															-        split_count = get_available_cpu_count_with_percent(percent=1 / 2)
														
 
															+        split_count = get_available_cpu_count_with_percent(percent=2 / 3)
														
 
															         split_count = split_count if split_count <= len(all_saved_files) else len(all_saved_files)
														
 
															         all_arrays = split_array(all_saved_files, split_count)
														
 
															         try:
														
 
															             for index, arr in enumerate(all_arrays):
														
 
															                 with multiprocessing.Pool(split_count) as pool:
														
 
															-                    pool.starmap(save_file_to_db,
														
 
															-                                 [(self.pathsAndTable.get_table_name(), file, self.batch_count) for file in
														
 
															-                                  all_saved_files])
														
 
															-                update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type,
														
 
															+                    if self.pathsAndTable.read_type in ['minute', 'second']:
														
 
															+                        pool.starmap(save_partation_file_to_db,
														
 
															+                                     [(self.pathsAndTable.get_table_name(), file,
														
 
															+                                       self.pathsAndTable.wind_col_trans[os.path.basename(file).split(".")[0]],
														
 
															+                                       os.path.basename(os.path.dirname(file)),
														
 
															+                                       self.batch_count) for file in arr])
														
 
															+                    else:
														
 
															+                        pool.starmap(save_file_to_db,
														
 
															+                                     [(self.pathsAndTable.get_table_name(), file, self.batch_count) for file in arr])
														
 
															+
														
 
															+                update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type,
														
 
															                                                round(70 + 29 * (index + 1) / len(all_arrays), 2),
														
 
															                                                self.pathsAndTable.save_db)
														
 
															         except Exception as e:
														
@@ -45,5 +54,5 @@ class SaveToDb(object):
 
															     def run(self):
														
 
															         if self.pathsAndTable.save_db:
														
 
															             self.mutiprocessing_to_save_db()
														
 
															-            update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type, 99,
														
 
															+            update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type, 99,
														
 
															                                            self.pathsAndTable.save_db)
														
--- a/etl/common/UnzipAndRemove.py
+++ b/etl/common/UnzipAndRemove.py
@@ -3,7 +3,7 @@ import traceback
 
															 from os import *
														
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															-from service.plt_service import update_trans_transfer_progress
														
 
															+from service.trans_conf_service import update_trans_transfer_progress
														
 
															 from utils.file.trans_methods import read_files, read_excel_files, copy_to_new, split_array
														
 
															 from utils.log.trans_log import trans_print
														
 
															 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
														
@@ -20,40 +20,38 @@ class UnzipAndRemove(object):
 
															         to_path = self.pathsAndTable.get_excel_tmp_path()
														
 
															         if str(file).endswith("zip"):
														
 
															             if str(file).endswith("csv.zip"):
														
 
															-                copy_to_new(file, file.replace(self.pathsAndTable.read_path, to_path).replace("csv.zip", 'csv.gz'))
														
 
															+                copy_to_new(file, file.replace(self.pathsAndTable.read_dir, to_path).replace("csv.zip", 'csv.gz'))
														
 
															             else:
														
 
															-                desc_path = file.replace(self.pathsAndTable.read_path, to_path)
														
 
															-                is_success, e = unzip(file, get_desc_path(desc_path))
														
 
															+                desc_path = file.replace(self.pathsAndTable.read_dir, to_path)
														
 
															+                unzip(file, get_desc_path(desc_path))
														
 
															                 self.pathsAndTable.has_zip = True
														
 
															-                if not is_success:
														
 
															-                    # raise e
														
 
															-                    pass
														
 
															         elif str(file).endswith("rar"):
														
 
															-            desc_path = file.replace(self.pathsAndTable.read_path, to_path)
														
 
															+            desc_path = file.replace(self.pathsAndTable.read_dir, to_path)
														
 
															             is_success, e = unrar(file, get_desc_path(desc_path))
														
 
															             self.pathsAndTable.has_zip = True
														
 
															             if not is_success:
														
 
															                 trans_print(traceback.format_exc())
														
 
															                 pass
														
 
															         else:
														
 
															-            copy_to_new(file, file.replace(self.pathsAndTable.read_path, to_path))
														
 
															+            copy_to_new(file, file.replace(self.pathsAndTable.read_dir, to_path))
														
 
															     def remove_file_to_tmp_path(self):
														
 
															         # 读取文件
														
 
															         try:
														
 
															-            if path.isfile(self.pathsAndTable.read_path):
														
 
															-                all_files = [self.pathsAndTable.read_path]
														
 
															+            if path.isfile(self.pathsAndTable.read_dir):
														
 
															+                all_files = [self.pathsAndTable.read_dir]
														
 
															             else:
														
 
															-                all_files = read_files(self.pathsAndTable.read_path)
														
 
															+                all_files = read_files(self.pathsAndTable.read_dir)
														
 
															             # 最大取系统cpu的 三分之二
														
 
															-            split_count = get_available_cpu_count_with_percent(1 / 2)
														
 
															+            split_count = get_available_cpu_count_with_percent(2 / 3)
														
 
															             all_arrays = split_array(all_files, split_count)
														
 
															             for index, arr in enumerate(all_arrays):
														
 
															-                with multiprocessing.Pool(self.pathsAndTable.multi_pool_count) as pool:
														
 
															+                pool_count = split_count if split_count < len(arr) else len(arr)
														
 
															+                with multiprocessing.Pool(pool_count) as pool:
														
 
															                     pool.starmap(self.get_and_remove, [(i,) for i in arr])
														
 
															-                update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type,
														
 
															+                update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type,
														
 
															                                                round(5 + 15 * (index + 1) / len(all_arrays), 2),
														
 
															                                                self.pathsAndTable.save_db)
														
@@ -62,11 +60,11 @@ class UnzipAndRemove(object):
 
															             trans_print('读取文件数量:', len(all_files))
														
 
															         except Exception as e:
														
 
															             trans_print(traceback.format_exc())
														
 
															-            message = "读取文件列表错误:" + self.pathsAndTable.read_path + ",系统返回错误:" + str(e)
														
 
															+            message = "读取文件列表错误:" + self.pathsAndTable.read_dir + ",系统返回错误:" + str(e)
														
 
															             raise ValueError(message)
														
 
															         return all_files
														
 
															     def run(self):
														
 
															         self.remove_file_to_tmp_path()
														
 
															-        update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type, 20,
														
 
															+        update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type, 20,
														
 
															                                        self.pathsAndTable.save_db)
														
--- a/etl/wind_power/fault_warn/FaultWarnTrans.py
+++ b/etl/wind_power/fault_warn/FaultWarnTrans.py
@@ -1,11 +1,13 @@
 
															+import os.path
														
 
															 from os import *
														
 
															 import numpy as np
														
 
															 import pandas as pd
														
 
															 from etl.common.BaseDataTrans import BaseDataTrans
														
 
															-from service.plt_service import update_trans_status_error
														
 
															-from service.trans_service import get_fault_warn_conf, get_trans_exec_code
														
 
															+from service.trans_conf_service import update_trans_status_error, update_trans_status_success
														
 
															+from service.trans_service import get_fault_warn_conf, get_trans_exec_code, drop_table, create_warn_fault_table, \
														
 
															+    save_file_to_db
														
 
															 from utils.conf.read_conf import read_conf
														
 
															 from utils.file.trans_methods import read_excel_files, read_file_to_df, create_file_path, valid_eval
														
 
															 from utils.log.trans_log import trans_print
														
@@ -13,23 +15,27 @@ from utils.log.trans_log import trans_print
 
															 class FaultWarnTrans(BaseDataTrans):
														
 
															-    def __init__(self, data: dict = None, save_db=True, step=0, end=4):
														
 
															-        super(FaultWarnTrans, self).__init__(data, save_db, step, end)
														
 
															+    def __init__(self, data: dict = None, save_db=True, yaml_config=None, step=0, end=6):
														
 
															+        super(FaultWarnTrans, self).__init__(data, save_db, yaml_config, step, end)
														
 
															+        self.engine_count = 0
														
 
															+        self.min_date = None
														
 
															+        self.max_date = None
														
 
															+        self.data_count = 0
														
 
															     def get_filed_conf(self):
														
 
															-        return get_fault_warn_conf(self.field_code, self.read_type)
														
 
															+        return get_fault_warn_conf(self.wind_farm_code, self.transfer_type)
														
 
															     # 第三步 读取 并 保存到临时文件
														
 
															     def read_and_save_tmp_file(self):
														
 
															         trans_print("无需保存临时文件")
														
 
															-    # 第四步 统计 并 保存到正式文件
														
 
															-    def statistics_and_save_to_file(self):
														
 
															+    # 读取并保存到临时正式文件
														
 
															+    def statistics_and_save_tmp_formal_file(self):
														
 
															         conf_map = self.get_filed_conf()
														
 
															         if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
														
 
															-            message = f"未找到{self.batch_no}的{self.read_type}配置"
														
 
															+            message = f"未找到{self.id}的{self.transfer_type}配置"
														
 
															             trans_print(message)
														
 
															-            update_trans_status_error(self.batch_no, self.read_type, message, self.save_db)
														
 
															+            update_trans_status_error(self.id, self.transfer_type, message, self.save_db)
														
 
															         else:
														
 
															             for key, v in conf_map.items():
														
@@ -112,21 +118,62 @@ class FaultWarnTrans(BaseDataTrans):
 
															                 df['time_diff'] = (df['end_time'] - df['begin_time']).dt.total_seconds()
														
 
															                 df.loc[df['time_diff'] < 0, 'time_diff'] = np.nan
														
 
															-            # 如果有需要处理的,先进行代码处理,在保存文件
														
 
															-            exec_code = get_trans_exec_code(self.batch_no, self.read_type)
														
 
															-            if exec_code:
														
 
															-                exec(exec_code)
														
 
															-
														
 
															-            # 根绝开始时间进行排序
														
 
															-            df.sort_values(by=['wind_turbine_number', 'begin_time'], inplace=True)
														
 
															-
														
 
															             if self.save_zip:
														
 
															-                save_path = path.join(self.pathsAndTable.get_save_path(), str(self.batch_name) + '.csv.gz')
														
 
															+                save_path = path.join(self.pathsAndTable.get_tmp_formal_path(),
														
 
															+                                      str(self.pathsAndTable.read_type) + '.csv.gz')
														
 
															             else:
														
 
															-                save_path = path.join(self.pathsAndTable.get_save_path(), str(self.batch_name) + '.csv')
														
 
															+                save_path = path.join(self.pathsAndTable.get_tmp_formal_path(),
														
 
															+                                      str(self.pathsAndTable.read_type) + '.csv')
														
 
															             create_file_path(save_path, is_file_path=True)
														
 
															-            if self.save_zip:
														
 
															-                df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8', date_format='%Y-%m-%d %H:%M:%S')
														
 
															-            else:
														
 
															-                df.to_csv(save_path, index=False, encoding='utf-8', date_format='%Y-%m-%d %H:%M:%S')
														
 
															+
														
 
															+            df.to_csv(save_path, index=False, encoding='utf-8')
														
 
															+
														
 
															+    # 归档文件
														
 
															+    # def archive_file(self):
														
 
															+    #     trans_print("无需归档文件")
														
 
															+
														
 
															+    # 合并到正式文件
														
 
															+    def combine_and_save_formal_file(self):
														
 
															+
														
 
															+        df = read_file_to_df(
														
 
															+            os.path.join(self.pathsAndTable.get_tmp_formal_path(), str(self.pathsAndTable.read_type) + '.csv'))
														
 
															+
														
 
															+        self.engine_count = len(df['wind_turbine_number'].unique())
														
 
															+        self.min_date = df['begin_time'].min()
														
 
															+        self.max_date = df['begin_time'].max()
														
 
															+        self.data_count = df.shape[0]
														
 
															+
														
 
															+        df = df[df['wind_turbine_number'].isin(self.wind_col_trans.values())]
														
 
															+
														
 
															+        save_path = os.path.join(self.pathsAndTable.get_save_path(), str(self.pathsAndTable.read_type) + '.csv')
														
 
															+
														
 
															+        exists_df = pd.DataFrame()
														
 
															+        if os.path.exists(save_path):
														
 
															+            exists_df = read_file_to_df(save_path)
														
 
															+        else:
														
 
															+            create_file_path(save_path, is_file_path=True)
														
 
															+
														
 
															+        df = pd.concat([exists_df, df], ignore_index=True)
														
 
															+        df.drop_duplicates(inplace=True, keep='last')
														
 
															+        self.update_files = [save_path]
														
 
															+
														
 
															+        # 根据开始时间进行排序
														
 
															+        df.sort_values(by=['wind_turbine_number', 'begin_time'], inplace=True)
														
 
															+
														
 
															+
														
 
															+        if self.save_zip:
														
 
															+            df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8', date_format='%Y-%m-%d %H:%M:%S')
														
 
															+        else:
														
 
															+            df.to_csv(save_path, index=False, encoding='utf-8', date_format='%Y-%m-%d %H:%M:%S')
														
 
															+
														
 
															+    def save_to_db(self):
														
 
															+        table_name = self.pathsAndTable.get_table_name()
														
 
															+        drop_table(table_name)
														
 
															+        create_warn_fault_table(table_name)
														
 
															+        save_file_to_db(table_name, self.update_files[0], self.batch_count)
														
 
															+
														
 
															+    def update_exec_progress(self):
														
 
															+        update_trans_status_success(self.id, self.transfer_type,
														
 
															+                                    self.engine_count, None, self.min_date, self.max_date, self.data_count,
														
 
															+                                    self.save_db)
														
--- a/etl/wind_power/laser/LaserTrans.py
+++ b/etl/wind_power/laser/LaserTrans.py
@@ -8,6 +8,8 @@ import pandas as pd
 
															 from service.plt_service import get_all_wind
														
 
															 from service.trans_service import save_df_to_db
														
 
															+from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
														
 
															+    update_trans_status_success
														
 
															 from utils.file.trans_methods import read_files, read_file_to_df
														
 
															 from utils.log.trans_log import set_trance_id, trans_print
														
@@ -17,11 +19,12 @@ class LaserTrans():
 
															     激光测距仪转化
														
 
															     """
														
 
															-    def __init__(self, field_code, read_path):
														
 
															-        self.field_code = field_code
														
 
															+    def __init__(self, id, wind_farm_code, read_path):
														
 
															+        self.id = id
														
 
															+        self.wind_farm_code = wind_farm_code
														
 
															         self.read_path = read_path
														
 
															         self.begin = datetime.datetime.now()
														
 
															-        self.wind_col_trans, _ = get_all_wind(self.field_code, need_rated_param=False)
														
 
															+        self.wind_col_trans, _ = get_all_wind(self.wind_farm_code, need_rated_param=False)
														
 
															     def get_file_data(self, file_path):
														
 
															         file_name = os.path.basename(file_path)
														
@@ -49,18 +52,25 @@ class LaserTrans():
 
															         return result_df
														
 
															     def run(self):
														
 
															-        trance_id = '-'.join([self.field_code, 'laser'])
														
 
															+        update_trans_status_running(self.id)
														
 
															+        trance_id = '-'.join([self.wind_farm_code, 'laser'])
														
 
															         set_trance_id(trance_id)
														
 
															         all_files = read_files(self.read_path, ['csv'])
														
 
															-        trans_print(self.field_code, '获取文件总数为:', len(all_files))
														
 
															+        trans_print(self.wind_farm_code, '获取文件总数为:', len(all_files))
														
 
															         pool_count = 8 if len(all_files) > 8 else len(all_files)
														
 
															         with multiprocessing.Pool(pool_count) as pool:
														
 
															             dfs = pool.map(self.get_file_data, all_files)
														
 
															+
														
 
															+        update_trans_transfer_progress(self.id, 80)
														
 
															         df = pd.concat(dfs, ignore_index=True)
														
 
															-        save_df_to_db(self.field_code + "_laser", df)
														
 
															+        update_trans_transfer_progress(self.id, 90)
														
 
															         df.sort_values(by=['acquisition_time'], inplace=True)
														
 
															-        trans_print(self.field_code, '执行结束,总耗时:', (datetime.datetime.now() - self.begin))
														
 
															+        save_df_to_db(self.wind_farm_code + "_laser", df)
														
 
															+        update_trans_status_success(self.id, 'laser', len(df['wind_turbine_number'].unique()), None,
														
 
															+                                    df['acquisition_time'].min(), df['acquisition_time'].max(), df.shape[0])
														
 
															+        #update_trans_status_success(self.id)
														
 
															+        trans_print(self.wind_farm_code, '执行结束,总耗时:', (datetime.datetime.now() - self.begin))
														
 
															 if __name__ == '__main__':
														
--- a/etl/wind_power/min_sec/MinSecTrans.py
+++ b/etl/wind_power/min_sec/MinSecTrans.py
@@ -2,38 +2,36 @@
 
															 # @Time    : 2024/5/15
														
 
															 # @Author  : 魏志亮
														
 
															 import multiprocessing
														
 
															-
														
 
															-import pandas as pd
														
 
															+import os.path
														
 
															 from etl.common.BaseDataTrans import BaseDataTrans
														
 
															+from etl.common.CombineAndSaveFormalFile import CombineAndSaveFormalFile
														
 
															 from etl.wind_power.min_sec.ReadAndSaveTmp import ReadAndSaveTmp
														
 
															-from etl.wind_power.min_sec.StatisticsAndSaveFile import StatisticsAndSaveFile
														
 
															+from etl.wind_power.min_sec.StatisticsAndSaveTmpFormalFile import StatisticsAndSaveTmpFormalFile
														
 
															 from etl.wind_power.min_sec.TransParam import TransParam
														
 
															-from service.plt_service import update_trans_status_success, update_trans_status_error
														
 
															-from service.trans_service import batch_statistics, get_min_sec_conf
														
 
															+from service.trans_conf_service import update_trans_status_success, update_trans_status_error
														
 
															+from service.trans_service import get_min_sec_conf
														
 
															 from utils.conf.read_conf import read_conf
														
 
															-from utils.df_utils.util import get_time_space
														
 
															-from utils.file.trans_methods import read_excel_files, read_file_to_df
														
 
															 from utils.log.trans_log import trans_print
														
 
															 class MinSecTrans(BaseDataTrans):
														
 
															-    def __init__(self, data: dict = None, save_db=True, step=0, end=4):
														
 
															-        super(MinSecTrans, self).__init__(data, save_db, step, end)
														
 
															+    def __init__(self, data: dict = None, save_db=True, yaml_config=None, step=0, end=6):
														
 
															+        super(MinSecTrans, self).__init__(data, save_db, yaml_config, step, end)
														
 
															         self.statistics_map = multiprocessing.Manager().dict()
														
 
															         self.trans_param = self.get_trans_param()
														
 
															         self.trans_param.wind_col_trans = self.wind_col_trans
														
 
															     def get_filed_conf(self):
														
 
															-        return get_min_sec_conf(self.field_code, self.read_type)
														
 
															+        return get_min_sec_conf(self.wind_farm_code, self.transfer_type)
														
 
															     def get_trans_param(self):
														
 
															         conf_map = self.get_filed_conf()
														
 
															         if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
														
 
															-            message = f"未找到{self.batch_no}的{self.read_type}配置"
														
 
															+            message = f"未找到{self.id}的{self.transfer_type}配置"
														
 
															             trans_print(message)
														
 
															-            update_trans_status_error(self.batch_no, self.read_type, message, self.save_db)
														
 
															+            update_trans_status_error(self.id, self.transfer_type, message, self.save_db)
														
 
															         else:
														
 
															             resolve_col_prefix = read_conf(conf_map, 'resolve_col_prefix')
														
 
															             wind_name_exec = read_conf(conf_map, 'wind_name_exec', None)
														
@@ -74,7 +72,7 @@ class MinSecTrans(BaseDataTrans):
 
															             for col in trans_cols:
														
 
															                 cols_trans_all[col] = read_conf(conf_map, col, '')
														
 
															-            return TransParam(read_type=self.read_type, read_path=self.read_path,
														
 
															+            return TransParam(read_type=self.transfer_type, read_path=self.read_dir,
														
 
															                               cols_tran=cols_trans_all,
														
 
															                               wind_name_exec=wind_name_exec, is_vertical_table=is_vertical_table,
														
 
															                               vertical_cols=vertical_cols, vertical_key=vertical_key,
														
@@ -88,37 +86,22 @@ class MinSecTrans(BaseDataTrans):
 
															         read_and_save_tmp.run()
														
 
															     # 第四步 统计 并 保存到正式文件
														
 
															-    def statistics_and_save_to_file(self):
														
 
															+    def statistics_and_save_tmp_formal_file(self):
														
 
															         # 保存到正式文件
														
 
															-        statistics_and_save_file = StatisticsAndSaveFile(self.pathsAndTable, self.trans_param, self.statistics_map,
														
 
															-                                                         self.rated_power_and_cutout_speed_map)
														
 
															-        statistics_and_save_file.run()
														
 
															+        statistics_and_save_tmp_formal_file = StatisticsAndSaveTmpFormalFile(self.pathsAndTable, self.trans_param,
														
 
															+                                                                             self.statistics_map,
														
 
															+                                                                             self.rated_power_and_cutout_speed_map)
														
 
															+        statistics_and_save_tmp_formal_file.run()
														
 
															+
														
 
															+    def combine_and_save_formal_file(self):
														
 
															+        combine_and_save_formal_file = CombineAndSaveFormalFile(self.pathsAndTable)
														
 
															+        self.update_files = combine_and_save_formal_file.run()
														
 
															     # 最后更新执行程度
														
 
															     def update_exec_progress(self):
														
 
															-        if self.end >= 4:
														
 
															-            all_files = read_excel_files(self.pathsAndTable.get_save_path())
														
 
															-            if self.step <= 3:
														
 
															-                update_trans_status_success(self.batch_no, self.trans_param.read_type,
														
 
															-                                            len(all_files),
														
 
															-                                            self.statistics_map['time_granularity'],
														
 
															-                                            self.statistics_map['min_date'], self.statistics_map['max_date'],
														
 
															-                                            self.statistics_map['total_count'], self.save_db)
														
 
															-            else:
														
 
															-                df = read_file_to_df(all_files[0], read_cols=['time_stamp'])
														
 
															-                df['time_stamp'] = pd.to_datetime(df['time_stamp'])
														
 
															-                time_granularity = get_time_space(df, 'time_stamp')
														
 
															-                batch_data = batch_statistics("_".join([self.batch_no, self.trans_param.read_type]))
														
 
															-                if batch_data is not None:
														
 
															-                    update_trans_status_success(self.batch_no, self.trans_param.read_type,
														
 
															-                                                len(read_excel_files(self.pathsAndTable.get_save_path())),
														
 
															-                                                time_granularity,
														
 
															-                                                batch_data['min_date'], batch_data['max_date'],
														
 
															-                                                batch_data['total_count'], self.save_db)
														
 
															-                else:
														
 
															-                    update_trans_status_success(self.batch_no, self.trans_param.read_type,
														
 
															-                                                len(read_excel_files(self.pathsAndTable.get_save_path())),
														
 
															-                                                time_granularity,
														
 
															-                                                None, None,
														
 
															-                                                None, self.save_db)
														
 
															-
														
 
															+        all_files = set([os.path.basename(i) for i in self.update_files])
														
 
															+        update_trans_status_success(self.id, self.trans_param.read_type,
														
 
															+                                    len(all_files),
														
 
															+                                    self.statistics_map['time_granularity'],
														
 
															+                                    self.statistics_map['min_date'], self.statistics_map['max_date'],
														
 
															+                                    self.statistics_map['total_count'], self.save_db)
														
--- a/etl/wind_power/min_sec/ReadAndSaveTmp.py
+++ b/etl/wind_power/min_sec/ReadAndSaveTmp.py
@@ -8,7 +8,7 @@ import pandas as pd
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															 from etl.wind_power.min_sec import TransParam
														
 
															-from service.plt_service import update_trans_transfer_progress
														
 
															+from service.trans_conf_service import update_trans_transfer_progress
														
 
															 from utils.file.trans_methods import read_excel_files, split_array, del_blank, \
														
 
															     create_file_path, read_file_to_df, valid_eval
														
 
															 from utils.log.trans_log import trans_print
														
@@ -166,7 +166,7 @@ class ReadAndSaveTmp(object):
 
															                     message = "整理临时文件,系统返回错误:" + str(e)
														
 
															                     raise ValueError(message)
														
 
															-                update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type,
														
 
															+                update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type,
														
 
															                                                round(20 + 20 * (index + 1) / len(all_arrays), 2),
														
 
															                                                self.pathsAndTable.save_db)
														
@@ -186,7 +186,7 @@ class ReadAndSaveTmp(object):
 
															                     message = "整理临时文件,系统返回错误:" + str(e)
														
 
															                     raise ValueError(message)
														
 
															-                update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type,
														
 
															+                update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type,
														
 
															                                                round(20 + 30 * (index + 1) / len(all_arrays), 2),
														
 
															                                                self.pathsAndTable.save_db)
														
@@ -203,7 +203,7 @@ class ReadAndSaveTmp(object):
 
															                     message = "整理临时文件,系统返回错误:" + str(e)
														
 
															                     raise ValueError(message)
														
 
															-                update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type,
														
 
															+                update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type,
														
 
															                                                round(20 + 30 * (index + 1) / len(all_arrays), 2),
														
 
															                                                self.pathsAndTable.save_db)
														
@@ -361,6 +361,6 @@ class ReadAndSaveTmp(object):
 
															         trans_print("开始保存数据到临时文件")
														
 
															         begin = datetime.datetime.now()
														
 
															         self.read_file_and_save_tmp()
														
 
															-        update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type, 50,
														
 
															+        update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type, 50,
														
 
															                                        self.pathsAndTable.save_db)
														
 
															         trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin)
														
--- a/etl/wind_power/min_sec/StatisticsAndSaveTmpFormalFile.py
+++ b/etl/wind_power/min_sec/StatisticsAndSaveTmpFormalFile.py
@@ -1,4 +1,3 @@
 
															-import datetime
														
 
															 import multiprocessing
														
 
															 import traceback
														
 
															 from os import path
														
@@ -9,8 +8,7 @@ import pandas as pd
 
															 from etl.common.PathsAndTable import PathsAndTable
														
 
															 from etl.wind_power.min_sec import TransParam
														
 
															 from etl.wind_power.min_sec.ClassIdentifier import ClassIdentifier
														
 
															-from service.plt_service import update_trans_transfer_progress
														
 
															-from service.trans_service import get_trans_exec_code
														
 
															+from service.trans_conf_service import update_trans_transfer_progress
														
 
															 from utils.conf.read_conf import read_conf
														
 
															 from utils.df_utils.util import get_time_space
														
 
															 from utils.file.trans_methods import create_file_path, read_excel_files, read_file_to_df, split_array
														
@@ -20,7 +18,7 @@ from utils.systeminfo.sysinfo import use_files_get_max_cpu_count
 
															 exec("import math")
														
 
															-class StatisticsAndSaveFile(object):
														
 
															+class StatisticsAndSaveTmpFormalFile(object):
														
 
															     def __init__(self, paths_and_table: PathsAndTable, trans_param: TransParam, statistics_map,
														
 
															                  rated_power_and_cutout_speed_map):
														
@@ -80,7 +78,7 @@ class StatisticsAndSaveFile(object):
 
															         # 删除 有功功率 和 风速均为空的情况
														
 
															         df.dropna(subset=['active_power', 'wind_velocity'], how='all', inplace=True)
														
 
															-        trans_print(wind_col_name, "删除有功功率和风速均为空的情况后:", df.shape)
														
 
															+        trans_print(origin_wind_name, wind_col_name, "删除有功功率和风速均为空的情况后:", df.shape)
														
 
															         df.replace(np.nan, -999999999, inplace=True)
														
 
															         number_cols = df.select_dtypes(include=['number']).columns.tolist()
														
 
															         for col in df.columns:
														
@@ -89,7 +87,7 @@ class StatisticsAndSaveFile(object):
 
															                     df[col] = pd.to_numeric(df[col], errors='coerce')
														
 
															                     # 删除包含NaN的行（即那些列A转换失败的行）
														
 
															                     df = df.dropna(subset=[col])
														
 
															-                    trans_print(wind_col_name, "删除非数值列名:", col)
														
 
															+                    trans_print(origin_wind_name, wind_col_name, "删除非数值列名:", col)
														
 
															         df.replace(-999999999, np.nan, inplace=True)
														
 
															         df.drop_duplicates(['wind_turbine_number', 'time_stamp'], keep='first', inplace=True)
														
@@ -107,11 +105,11 @@ class StatisticsAndSaveFile(object):
 
															             df = df.groupby(['wind_turbine_number', 'time_stamp']).mean().reset_index()
														
 
															         trans_print('有功功率前10个', df.head(10)['active_power'].values)
														
 
															         power_df = df[df['active_power'] > 0]
														
 
															-        trans_print(wind_col_name, "功率大于0的数量:", power_df.shape)
														
 
															+        trans_print(origin_wind_name, wind_col_name, "功率大于0的数量:", power_df.shape)
														
 
															         power = power_df.sample(int(power_df.shape[0] / 100))['active_power'].median()
														
 
															         del power_df
														
 
															-        trans_print(wind_col_name, '有功功率，中位数', power)
														
 
															+        trans_print(origin_wind_name, wind_col_name, '有功功率，中位数', power)
														
 
															         if power > 100000:
														
 
															             df['active_power'] = df['active_power'] / 1000
														
 
															         ## 做数据检测前,羡强行处理有功功率
														
@@ -122,33 +120,43 @@ class StatisticsAndSaveFile(object):
 
															             rated_power_and_cutout_speed_tuple = (None, None)
														
 
															         # 如果有需要处理的,先进行代码处理,在进行打标签
														
 
															-        exec_code = get_trans_exec_code(self.paths_and_table.batch_no, self.paths_and_table.read_type)
														
 
															-        if exec_code:
														
 
															-            if 'import ' in exec_code:
														
 
															-                raise Exception("执行代码不支持导入包")
														
 
															-            exec(exec_code)
														
 
															+        # exec_code = get_trans_exec_code(self.paths_and_table.exec_id, self.paths_and_table.read_type)
														
 
															+        # if exec_code:
														
 
															+        #     if 'import ' in exec_code:
														
 
															+        #         raise Exception("执行代码不支持导入包")
														
 
															+        #     exec(exec_code)
														
 
															-        class_identifiler = ClassIdentifier(wind_turbine_number=wind_col_name, origin_df=df,
														
 
															+        class_identifiler = ClassIdentifier(wind_turbine_number=origin_wind_name, origin_df=df,
														
 
															                                             rated_power=rated_power_and_cutout_speed_tuple[0],
														
 
															                                             cut_out_speed=rated_power_and_cutout_speed_tuple[1])
														
 
															         df = class_identifiler.run()
														
 
															         df['year'] = df['time_stamp'].dt.year
														
 
															         df['month'] = df['time_stamp'].dt.month
														
 
															         df['day'] = df['time_stamp'].dt.day
														
 
															-        df['time_stamp'] = df['time_stamp'].apply(
														
 
															-            lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
														
 
															+        df['time_stamp'] = df['time_stamp'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
														
 
															         df['wind_turbine_name'] = str(origin_wind_name)
														
 
															+        df['year_month'] = df[['year', 'month']].apply(lambda x: str(x['year']) + str(x['month']).zfill(2), axis=1)
														
 
															+        cols = df.columns
														
 
															-        if self.paths_and_table.save_zip:
														
 
															-            save_path = path.join(self.paths_and_table.get_save_path(), str(wind_col_name) + '.csv.gz')
														
 
															+        if self.paths_and_table.read_type == 'second':
														
 
															+            type_col = 'year_month'
														
 
															         else:
														
 
															-            save_path = path.join(self.paths_and_table.get_save_path(), str(wind_col_name) + '.csv')
														
 
															-        create_file_path(save_path, is_file_path=True)
														
 
															-        if self.paths_and_table.save_zip:
														
 
															-            df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8')
														
 
															-        else:
														
 
															-            df.to_csv(save_path, index=False, encoding='utf-8')
														
 
															+            type_col = 'year'
														
 
															+
														
 
															+        date_strs = df[type_col].unique().tolist()
														
 
															+        for date_str in date_strs:
														
 
															+            save_path = path.join(self.paths_and_table.get_tmp_formal_path(), str(date_str),
														
 
															+                                  str(origin_wind_name) + '.csv')
														
 
															+            create_file_path(save_path, is_file_path=True)
														
 
															+            now_df = df[df[type_col] == date_str][cols]
														
 
															+            if self.paths_and_table.save_zip:
														
 
															+                save_path = save_path + '.gz'
														
 
															+                now_df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8')
														
 
															+            else:
														
 
															+                now_df.to_csv(save_path, index=False, encoding='utf-8')
														
 
															+
														
 
															+            del now_df
														
 
															         self.set_statistics_data(df)
														
@@ -166,8 +174,8 @@ class StatisticsAndSaveFile(object):
 
															             for index, arr in enumerate(all_arrays):
														
 
															                 with multiprocessing.Pool(split_count) as pool:
														
 
															                     pool.starmap(self.save_to_csv, [(i,) for i in arr])
														
 
															-                update_trans_transfer_progress(self.paths_and_table.batch_no, self.paths_and_table.read_type,
														
 
															-                                               round(50 + 20 * (index + 1) / len(all_arrays), 2),
														
 
															+                update_trans_transfer_progress(self.paths_and_table.id, self.paths_and_table.read_type,
														
 
															+                                               round(50 + 15 * (index + 1) / len(all_arrays), 2),
														
 
															                                                self.paths_and_table.save_db)
														
 
															         except Exception as e:
														
@@ -177,5 +185,5 @@ class StatisticsAndSaveFile(object):
 
															     def run(self):
														
 
															         self.mutiprocessing_to_save_file()
														
 
															-        update_trans_transfer_progress(self.paths_and_table.batch_no, self.paths_and_table.read_type, 70,
														
 
															+        update_trans_transfer_progress(self.paths_and_table.id, self.paths_and_table.read_type, 65,
														
 
															                                        self.paths_and_table.save_db)
														
--- a/etl/wind_power/wave/WaveTrans.py
+++ b/etl/wind_power/wave/WaveTrans.py
@@ -1,9 +1,12 @@
 
															+import datetime
														
 
															 import json
														
 
															 import multiprocessing
														
 
															 from service.plt_service import get_all_wind
														
 
															 from service.trans_service import get_wave_conf, save_df_to_db, get_or_create_wave_table, \
														
 
															     get_wave_data, delete_exist_wave_data
														
 
															+from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
														
 
															+    update_trans_status_success
														
 
															 from utils.file.trans_methods import *
														
 
															 from utils.log.trans_log import set_trance_id
														
 
															 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
														
@@ -13,46 +16,53 @@ exec("from os.path import *")
 
															 class WaveTrans(object):
														
 
															-    def __init__(self, field_code, read_path, save_path: str):
														
 
															-        self.field_code = field_code
														
 
															-        self.read_path = read_path
														
 
															-        self.save_path = save_path
														
 
															+    def __init__(self, id, wind_farm_code, read_dir):
														
 
															+        self.id = id
														
 
															+        self.wind_farm_code = wind_farm_code
														
 
															+        self.read_dir = read_dir
														
 
															         self.begin = datetime.datetime.now()
														
 
															-    def get_data_exec(self, func_code, arg):
														
 
															+        self.engine_count = 0
														
 
															+        self.min_date = None
														
 
															+        self.max_date = None
														
 
															+        self.data_count = 0
														
 
															+
														
 
															+    def get_data_exec(self, func_code, filepath, measupoint_names: set):
														
 
															         exec(func_code)
														
 
															-        return locals()['get_data'](arg)
														
 
															+        return locals()['get_data'](filepath, measupoint_names)
														
 
															     def del_exists_data(self, df):
														
 
															         min_date, max_date = df['time_stamp'].min(), df['time_stamp'].max()
														
 
															-        db_df = get_wave_data(self.field_code + '_wave', min_date, max_date)
														
 
															+        db_df = get_wave_data(self.wind_farm_code + '_wave', min_date, max_date)
														
 
															         exists_df = pd.merge(db_df, df,
														
 
															                              on=['wind_turbine_name', 'time_stamp', 'sampling_frequency', 'mesure_point_name'],
														
 
															                              how='inner')
														
 
															         ids = [int(i) for i in exists_df['id'].to_list()]
														
 
															         if ids:
														
 
															-            delete_exist_wave_data(self.field_code + "_wave", ids)
														
 
															+            delete_exist_wave_data(self.wind_farm_code + "_wave", ids)
														
 
															     def run(self):
														
 
															-        trance_id = '-'.join([self.field_code, 'wave'])
														
 
															+        update_trans_status_running(self.id)
														
 
															+        trance_id = '-'.join([self.wind_farm_code, 'wave'])
														
 
															         set_trance_id(trance_id)
														
 
															-        all_files = read_files(self.read_path, ['csv'])
														
 
															-        print(len)
														
 
															+        all_files = read_files(self.read_dir, ['txt'])
														
 
															+        self.data_count = len(all_files)
														
 
															+        update_trans_transfer_progress(self.id, 5)
														
 
															         # 最大取系统cpu的 1/2
														
 
															         split_count = get_available_cpu_count_with_percent(1 / 2)
														
 
															-        all_wind, _ = get_all_wind(self.field_code, False)
														
 
															+        all_wind, _ = get_all_wind(self.wind_farm_code, False)
														
 
															-        get_or_create_wave_table(self.field_code + '_wave')
														
 
															+        get_or_create_wave_table(self.wind_farm_code + '_wave')
														
 
															-        wave_conf = get_wave_conf(self.field_code)
														
 
															+        wave_conf = get_wave_conf(self.wind_farm_code)
														
 
															         base_param_exec = wave_conf['base_param_exec']
														
 
															         map_dict = {}
														
 
															         if base_param_exec:
														
 
															             base_param_exec = base_param_exec.replace('\r\n', '\n').replace('\t', '    ')
														
 
															-            print(base_param_exec)
														
 
															+            trans_print(base_param_exec)
														
 
															             if 'import ' in base_param_exec:
														
 
															                 raise Exception("方法不支持import方法")
														
@@ -60,36 +70,57 @@ class WaveTrans(object):
 
															         for point in mesure_poins:
														
 
															             map_dict[wave_conf[point]] = point.replace('conf_', '')
														
 
															-        with multiprocessing.Pool(split_count) as pool:
														
 
															-            file_datas = pool.starmap(self.get_data_exec, [(base_param_exec, i) for i in all_files])
														
 
															-
														
 
															-        print("读取文件耗时:", datetime.datetime.now() - self.begin)
														
 
															-
														
 
															-        result_list = list()
														
 
															-        for file_data in file_datas:
														
 
															-            wind_turbine_name, time_stamp, sampling_frequency, rotational_speed, mesure_point_name, mesure_data = \
														
 
															-                file_data[0], file_data[1], file_data[2], file_data[3], file_data[4], file_data[5]
														
 
															-
														
 
															-            if mesure_point_name in map_dict.keys():
														
 
															-                result_list.append(
														
 
															-                    [wind_turbine_name, time_stamp, rotational_speed, sampling_frequency, mesure_point_name,
														
 
															-                     mesure_data])
														
 
															-
														
 
															-        df = pd.DataFrame(result_list,
														
 
															-                          columns=['wind_turbine_name', 'time_stamp', 'rotational_speed', 'sampling_frequency',
														
 
															-                                   'mesure_point_name', 'mesure_data'])
														
 
															-        df['time_stamp'] = pd.to_datetime(df['time_stamp'], errors='coerce')
														
 
															-        df['mesure_point_name'] = df['mesure_point_name'].map(map_dict)
														
 
															-        df.dropna(subset=['mesure_point_name'], inplace=True)
														
 
															-
														
 
															-        df['wind_turbine_number'] = df['wind_turbine_name'].map(all_wind).fillna(df['wind_turbine_name'])
														
 
															-
														
 
															-        df['mesure_data'] = df['mesure_data'].apply(lambda x: json.dumps(x))
														
 
															-
														
 
															-        df.sort_values(by=['time_stamp', 'mesure_point_name'], inplace=True)
														
 
															-
														
 
															-        self.del_exists_data(df)
														
 
															-
														
 
															-        save_df_to_db(self.field_code + '_wave', df, batch_count=1000)
														
 
															-
														
 
															-        print("总耗时:", datetime.datetime.now() - self.begin)
														
 
															+        wind_turbine_name_set = set()
														
 
															+
														
 
															+        all_array = split_array(all_files, split_count * 10)
														
 
															+        total_index = len(all_array)
														
 
															+        for index, now_array in enumerate(all_array):
														
 
															+            index_begin = datetime.datetime.now()
														
 
															+            with multiprocessing.Pool(split_count) as pool:
														
 
															+                file_datas = pool.starmap(self.get_data_exec,
														
 
															+                                          [(base_param_exec, i, list(map_dict.keys())) for i in now_array])
														
 
															+
														
 
															+            update_trans_transfer_progress(self.id, 20 + int(index / total_index * 60))
														
 
															+            trans_print("读取文件耗时:", datetime.datetime.now() - self.begin)
														
 
															+
														
 
															+            result_list = list()
														
 
															+            for file_data in file_datas:
														
 
															+                if file_data:
														
 
															+                    wind_turbine_name, time_stamp, sampling_frequency, rotational_speed, mesure_point_name, type, mesure_data = \
														
 
															+                        file_data[0], file_data[1], file_data[2], file_data[3], file_data[4], file_data[5], file_data[6]
														
 
															+
														
 
															+                    if mesure_point_name in map_dict.keys():
														
 
															+                        wind_turbine_name_set.add(wind_turbine_name)
														
 
															+                        if self.min_date is None or self.min_date > time_stamp:
														
 
															+                            self.min_date = time_stamp
														
 
															+                        if self.max_date is None or self.max_date < time_stamp:
														
 
															+                            self.max_date = time_stamp
														
 
															+
														
 
															+                        result_list.append(
														
 
															+                            [wind_turbine_name, time_stamp, rotational_speed, sampling_frequency, mesure_point_name,
														
 
															+                             type,
														
 
															+                             mesure_data])
														
 
															+
														
 
															+            if result_list:
														
 
															+                df = pd.DataFrame(result_list,
														
 
															+                                  columns=['wind_turbine_name', 'time_stamp', 'rotational_speed', 'sampling_frequency',
														
 
															+                                           'mesure_point_name', 'type', 'mesure_data'])
														
 
															+                df['time_stamp'] = pd.to_datetime(df['time_stamp'], errors='coerce')
														
 
															+                df['mesure_point_name'] = df['mesure_point_name'].map(map_dict)
														
 
															+                df.dropna(subset=['mesure_point_name'], inplace=True)
														
 
															+
														
 
															+                df['wind_turbine_number'] = df['wind_turbine_name'].map(all_wind).fillna(df['wind_turbine_name'])
														
 
															+
														
 
															+                df['mesure_data'] = df['mesure_data'].apply(lambda x: json.dumps(x))
														
 
															+
														
 
															+                df.sort_values(by=['time_stamp', 'mesure_point_name'], inplace=True)
														
 
															+                # self.del_exists_data(df)
														
 
															+                save_df_to_db(self.wind_farm_code + '_wave', df, batch_count=400)
														
 
															+            trans_print(f"总共{total_index}组,当前{index + 1}", "本次写入耗时:", datetime.datetime.now() - index_begin,
														
 
															+                        "总耗时:", datetime.datetime.now() - self.begin)
														
 
															+
														
 
															+        update_trans_status_success(self.id, 'wave', len(wind_turbine_name_set), None,
														
 
															+                                    self.min_date, self.max_date, self.data_count)
														
 
															+
														
 
															+        # update_trans_status_success(self.id)
														
 
															+        trans_print("总耗时:", datetime.datetime.now() - self.begin)
														
--- a/package.sh
+++ b/package.sh
@@ -1,3 +0,0 @@
 
															-pyinstaller --clean -F -n etl_tool app_run.py
														
 
															-
														
 
															-#python -m nuitka --onefile --remove-output app_run.py
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,29 +1,268 @@
 
															+aiofiles==22.1.0
														
 
															+aiohttp==3.9.5
														
 
															+aiosignal==1.3.1
														
 
															+alabaster==0.7.16
														
 
															+aliyun-python-sdk-core==2.15.1
														
 
															+aliyun-python-sdk-kms==2.16.3
														
 
															+aniso8601==9.0.1
														
 
															+anyio==4.6.0
														
 
															+APScheduler==3.10.4
														
 
															+argcomplete==1.10.3
														
 
															+arrow==1.3.0
														
 
															+asgiref==3.8.1
														
 
															+astroid==3.2.2
														
 
															+asttokens==2.4.1
														
 
															+async-timeout==4.0.3
														
 
															+asyncio==3.4.3
														
 
															+atomicwrites==1.4.1
														
 
															+attrs==23.2.0
														
 
															+autopep8==2.0.4
														
 
															+Babel==2.15.0
														
 
															+backcall==0.2.0
														
 
															+backports-datetime-fromisoformat==2.0.1
														
 
															+backports.tarfile==1.2.0
														
 
															+bcrypt==4.1.3
														
 
															+beautifulsoup4==4.8.2
														
 
															+binaryornot==0.4.4
														
 
															+black==24.4.2
														
 
															+bleach==6.1.0
														
 
															+blinker==1.8.2
														
 
															+cache==1.0.3
														
 
															+cachelib==0.9.0
														
 
															+certifi==2024.6.2
														
 
															+cffi==1.16.0
														
 
															 chardet==5.2.0
														
 
															-contourpy==1.3.0
														
 
															+charset-normalizer==3.3.2
														
 
															+click==8.1.7
														
 
															+cloudpickle==3.0.0
														
 
															+colorama==0.4.6
														
 
															+comm==0.2.2
														
 
															+compressed_rtf==1.0.6
														
 
															+contourpy==1.2.1
														
 
															+cookiecutter==2.6.0
														
 
															+crcmod==1.7
														
 
															+cryptography==41.0.2
														
 
															 cycler==0.12.1
														
 
															 DBUtils==3.1.0
														
 
															+debugpy==1.8.2
														
 
															+decorator==5.1.1
														
 
															+defusedxml==0.7.1
														
 
															+diff-match-patch==20230430
														
 
															+dill==0.3.8
														
 
															+distro==1.9.0
														
 
															+Django==4.1.13
														
 
															+docopt==0.6.2
														
 
															+docstring-to-markdown==0.15
														
 
															+docutils==0.21.2
														
 
															+docx2txt==0.8
														
 
															+ebcdic==1.1.1
														
 
															 et-xmlfile==1.1.0
														
 
															-fonttools==4.53.1
														
 
															+exceptiongroup==1.2.1
														
 
															+executing==2.0.1
														
 
															+extract-msg==0.28.7
														
 
															+fastapi==0.115.0
														
 
															+fastapi-offline==1.7.3
														
 
															+fastjsonschema==2.20.0
														
 
															+flake8==7.1.0
														
 
															+Flask==3.0.3
														
 
															+Flask-APScheduler==1.13.1
														
 
															+Flask-Caching==2.3.0
														
 
															+Flask-Cors==4.0.1
														
 
															+Flask-Excel==0.0.7
														
 
															+Flask-Executor==1.0.0
														
 
															+Flask-HTTPAuth==4.8.0
														
 
															+Flask-Login==0.6.3
														
 
															+flask-restx==1.3.0
														
 
															+Flask-Script==2.0.6
														
 
															+flask-siwadoc==0.2.2
														
 
															+Flask-SQLAlchemy==3.1.1
														
 
															+Flask-WTF==1.2.1
														
 
															+fonttools==4.53.0
														
 
															+frozenlist==1.4.1
														
 
															+fsspec==2024.12.0
														
 
															 greenlet==3.0.3
														
 
															-importlib_resources==6.4.5
														
 
															-kiwisolver==1.4.7
														
 
															-matplotlib==3.9.2
														
 
															+h11==0.14.0
														
 
															+idna==3.7
														
 
															+imagesize==1.4.1
														
 
															+IMAPClient==2.1.0
														
 
															+importlib_metadata==8.0.0
														
 
															+importlib_resources==6.4.0
														
 
															+inflection==0.5.1
														
 
															+iniconfig==2.0.0
														
 
															+intervaltree==3.1.0
														
 
															+ipykernel==6.29.4
														
 
															+ipython==8.12.3
														
 
															+isort==5.13.2
														
 
															+itsdangerous==2.2.0
														
 
															+jaraco.classes==3.4.0
														
 
															+jaraco.context==5.3.0
														
 
															+jaraco.functools==4.0.1
														
 
															+jedi==0.19.1
														
 
															+jellyfish==1.0.4
														
 
															+Jinja2==3.1.4
														
 
															+jmespath==0.10.0
														
 
															+joblib==1.4.2
														
 
															+jsonschema==4.22.0
														
 
															+jsonschema-specifications==2023.12.1
														
 
															+jupyter_client==8.6.2
														
 
															+jupyter_core==5.7.2
														
 
															+jupyterlab_pygments==0.3.0
														
 
															+keyring==25.2.1
														
 
															+kiwisolver==1.4.5
														
 
															+lml==0.1.0
														
 
															+loguru==0.7.2
														
 
															+lxml==5.2.2
														
 
															+m3u8==5.1.0
														
 
															+markdown-it-py==3.0.0
														
 
															+MarkupSafe==2.1.5
														
 
															+matplotlib==3.9.0
														
 
															+matplotlib-inline==0.1.7
														
 
															+mccabe==0.7.0
														
 
															+mdurl==0.1.2
														
 
															+mistune==3.0.2
														
 
															+more-itertools==10.3.0
														
 
															+multidict==6.0.5
														
 
															+mypy-extensions==1.0.0
														
 
															+mysqlclient==2.2.4
														
 
															+nbclient==0.10.0
														
 
															+nbconvert==7.16.4
														
 
															+nbformat==5.10.4
														
 
															+nest-asyncio==1.6.0
														
 
															+networkx==3.2.1
														
 
															 numpy==2.0.0
														
 
															-openpyxl==3.1.5
														
 
															+numpydoc==1.7.0
														
 
															+olefile==0.47
														
 
															+openpyxl==3.1.4
														
 
															+oss2==2.18.6
														
 
															 packaging==24.1
														
 
															 pandas==2.2.2
														
 
															-pillow==10.4.0
														
 
															+pandocfilters==1.5.1
														
 
															+paramiko==3.4.0
														
 
															+parso==0.8.4
														
 
															+pathspec==0.12.1
														
 
															+pdfminer==20191125
														
 
															+pdfminer.six==20191110
														
 
															+pdfminer3k==1.3.4
														
 
															+peewee==3.17.5
														
 
															+pexpect==4.9.0
														
 
															+pickleshare==0.7.5
														
 
															+pillow==10.3.0
														
 
															+pipreqs==0.5.0
														
 
															+platformdirs==4.2.2
														
 
															+pluggy==1.5.0
														
 
															+ply==3.11
														
 
															+prompt_toolkit==3.0.47
														
 
															 psutil==6.0.0
														
 
															+ptyprocess==0.7.0
														
 
															+pure-eval==0.2.2
														
 
															+pyarrow==18.1.0
														
 
															+pycodestyle==2.11.1
														
 
															+pycparser==2.22
														
 
															+pycryptodome==3.20.0
														
 
															+pydantic==1.9.0
														
 
															+pydocstyle==6.3.0
														
 
															+pyexcel==0.7.0
														
 
															+pyexcel-io==0.6.6
														
 
															+pyexcel-webio==0.1.4
														
 
															+pyflakes==3.2.0
														
 
															+Pygments==2.18.0
														
 
															+PyJWT==2.8.0
														
 
															+pylint==3.2.3
														
 
															+pylint-venv==3.0.3
														
 
															+pyls-spyder==0.4.0
														
 
															 PyMySQL==1.1.0
														
 
															-pyparsing==3.1.4
														
 
															+PyNaCl==1.5.0
														
 
															+pyparsing==3.1.2
														
 
															+PyPDF2==3.0.1
														
 
															+pypdfium2==4.30.0
														
 
															+pyperclip==1.9.0
														
 
															+PyQt-SiliconUI==1.0.1
														
 
															+PyQt5==5.15.10
														
 
															+PyQt5-Qt5==5.15.2
														
 
															+PyQt5-sip==12.13.0
														
 
															+PyQtWebEngine==5.15.6
														
 
															+PyQtWebEngine-Qt5==5.15.2
														
 
															+pytest==8.3.2
														
 
															 python-calamine==0.2.3
														
 
															 python-dateutil==2.9.0.post0
														
 
															+python-lsp-black==2.0.0
														
 
															+python-lsp-jsonrpc==1.1.2
														
 
															+python-lsp-server==1.11.0
														
 
															+python-pptx==0.6.23
														
 
															+python-slugify==8.0.4
														
 
															+pytoolconfig==1.3.1
														
 
															 pytz==2024.1
														
 
															+pywin32==306
														
 
															+pywin32-ctypes==0.2.2
														
 
															+pyxxl==0.3.6
														
 
															 PyYAML==6.0.1
														
 
															+pyzmq==26.0.3
														
 
															+QDarkStyle==3.2.3
														
 
															+qstylizer==0.2.3
														
 
															+QtAwesome==1.3.1
														
 
															+qtconsole==5.5.2
														
 
															+QtPy==2.4.1
														
 
															 rarfile==4.2
														
 
															-six==1.16.0
														
 
															+redis==5.0.7
														
 
															+referencing==0.35.1
														
 
															+requests==2.32.3
														
 
															+rich==13.7.1
														
 
															+rope==1.13.0
														
 
															+rpds-py==0.18.1
														
 
															+Rtree==1.2.0
														
 
															+scikit-learn==1.5.1
														
 
															+scipy==1.13.1
														
 
															+six==1.12.0
														
 
															+sniffio==1.3.1
														
 
															+snowballstemmer==2.2.0
														
 
															+sortedcontainers==2.4.0
														
 
															+soupsieve==2.5
														
 
															+SpeechRecognition==3.8.1
														
 
															+Sphinx==7.3.7
														
 
															+sphinxcontrib-applehelp==1.0.8
														
 
															+sphinxcontrib-devhelp==1.0.6
														
 
															+sphinxcontrib-htmlhelp==2.0.5
														
 
															+sphinxcontrib-jsmath==1.0.1
														
 
															+sphinxcontrib-qthelp==1.0.7
														
 
															+sphinxcontrib-serializinghtml==1.1.10
														
 
															+spyder==5.5.5
														
 
															+spyder-kernels==2.5.2
														
 
															 SQLAlchemy==2.0.30
														
 
															+sqlparse==0.5.0
														
 
															+stack-data==0.6.3
														
 
															+starlette==0.38.6
														
 
															+tabula-py==2.9.3
														
 
															+tabulate==0.9.0
														
 
															+text-unidecode==1.3
														
 
															+textdistance==4.6.2
														
 
															+textract==1.6.5
														
 
															+texttable==1.7.0
														
 
															+threadpoolctl==3.5.0
														
 
															+three-merge==0.1.1
														
 
															+tinycss2==1.3.0
														
 
															+tomli==2.0.1
														
 
															+tomlkit==0.12.5
														
 
															+tornado==6.4.1
														
 
															+traitlets==5.14.3
														
 
															+types-python-dateutil==2.9.0.20240316
														
 
															 typing_extensions==4.12.2
														
 
															 tzdata==2024.1
														
 
															+tzlocal==5.2
														
 
															+ufile==3.2.9
														
 
															+ujson==5.10.0
														
 
															+urllib3==2.2.2
														
 
															+uvicorn==0.32.1
														
 
															+watchdog==4.0.1
														
 
															+wcwidth==0.2.13
														
 
															+web.py==0.40.dev1
														
 
															+webencodings==0.5.1
														
 
															+Werkzeug==3.0.3
														
 
															+whatthepatch==1.0.5
														
 
															+win32-setctime==1.1.0
														
 
															+WTForms==3.1.2
														
 
															 xlrd==2.0.1
														
 
															-zipp==3.20.1
														
 
															+XlsxWriter==3.2.0
														
 
															+yapf==0.40.2
														
 
															+yarg==0.1.9
														
 
															+yarl==1.9.4
														
 
															+zipp==3.19.2
														
--- a/service/plt_service.py
+++ b/service/plt_service.py
@@ -6,139 +6,6 @@ import datetime
 
															 from service.common_connect import plt
														
 
															-def update_timeout_trans_data():
														
 
															-    sql = """
														
 
															-    UPDATE data_transfer  
														
 
															-    SET trans_sys_status = 2,err_info='运行超时失败',transfer_state=2
														
 
															-    WHERE   
														
 
															-        (  
														
 
															-            (transfer_type = 'second' AND TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 24)  
														
 
															-            OR  
														
 
															-            (transfer_type = 'minute' AND TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 6)  
														
 
															-            OR  
														
 
															-            (transfer_type = 'warn' AND TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 6)  
														
 
															-            OR  
														
 
															-            (transfer_type = 'fault' AND TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 6)  
														
 
															-        )  
														
 
															-        AND trans_sys_status = 0
														
 
															-    """
														
 
															-    plt.execute(sql)
														
 
															-
														
 
															-
														
 
															-def update_trans_status_running(batch_no, trans_type, schedule_exec=True):
														
 
															-    if schedule_exec:
														
 
															-        exec_sql = """
														
 
															-        update data_transfer set transfer_state = 0,trans_sys_status = 0 ,transfer_start_time = now(),err_info='',
														
 
															-        engine_count =0,time_granularity=0,transfer_finish_time=null,
														
 
															-        data_min_time= null,data_max_time= null,transfer_data_count=null
														
 
															-        where batch_code = %s  and transfer_type = %s
														
 
															-        """
														
 
															-        plt.execute(exec_sql, (batch_no, trans_type))
														
 
															-
														
 
															-
														
 
															-def update_trans_status_error(batch_no, trans_type, message="", save_db=True):
														
 
															-    if save_db:
														
 
															-        exec_sql = """
														
 
															-        update data_transfer set transfer_state = 2,trans_sys_status=2 ,err_info= %s,transfer_finish_time=now() 
														
 
															-        where batch_code = %s  and  transfer_type = %s
														
 
															-        """
														
 
															-
														
 
															-        message = message if len(message) <= 200 else message[0:200]
														
 
															-        plt.execute(exec_sql, (message, batch_no, trans_type))
														
 
															-
														
 
															-
														
 
															-def update_trans_status_success(batch_no, trans_type, wind_count=0, time_granularity=0,
														
 
															-                                min_date=datetime.datetime.now(),
														
 
															-                                max_date=datetime.datetime.now(),
														
 
															-                                total_count=0, save_db=True):
														
 
															-    if save_db:
														
 
															-        if min_date is not None:
														
 
															-            exec_sql = """
														
 
															-            update data_transfer set transfer_state = 1,trans_sys_status = 1,transfer_progress=100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now(),
														
 
															-            data_min_time= %s,data_max_time= %s,transfer_data_count=%s
														
 
															-            where batch_code = %s  and transfer_type = %s
														
 
															-            """
														
 
															-            plt.execute(exec_sql, (wind_count, time_granularity, min_date, max_date, total_count, batch_no, trans_type))
														
 
															-        else:
														
 
															-            exec_sql = """
														
 
															-            update data_transfer set transfer_state = 1,trans_sys_status = 1,transfer_progress = 100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now()
														
 
															-            where batch_code = %s  and transfer_type = %s
														
 
															-            """
														
 
															-            plt.execute(exec_sql, (wind_count, time_granularity, batch_no, trans_type))
														
 
															-
														
 
															-
														
 
															-def update_trans_transfer_progress(batch_no, trans_type, transfer_progress=0, save_db=True):
														
 
															-    if save_db:
														
 
															-        exec_sql = """
														
 
															-        update data_transfer set transfer_progress =%s where batch_code = %s  and transfer_type = %s
														
 
															-        """
														
 
															-        plt.execute(exec_sql, (int(transfer_progress), batch_no, trans_type))
														
 
															-
														
 
															-
														
 
															-# 获取执行的数据
														
 
															-def get_batch_exec_data(run_count: int = 1) -> dict:
														
 
															-    query_running_sql = "select count(1) as count from data_transfer where trans_sys_status = 0"
														
 
															-    query_next_exec_sql = """
														
 
															-    SELECT
														
 
															-        t.*,a.field_name,b.batch_name
														
 
															-    FROM
														
 
															-        data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
														
 
															-        inner join wind_field_batch b on t.batch_code = b.batch_code
														
 
															-    WHERE
														
 
															-         t.trans_sys_status in (-1,1,2) and t.transfer_state = 0
														
 
															-    AND t.transfer_addr != ''
														
 
															-    ORDER BY
														
 
															-        t.update_time
														
 
															-    LIMIT 1
														
 
															-    """
														
 
															-    data = plt.execute(query_running_sql)
														
 
															-    now_count = int(data[0]['count'])
														
 
															-    if now_count >= run_count:
														
 
															-        return None
														
 
															-    else:
														
 
															-        data = plt.execute(query_next_exec_sql)
														
 
															-        if type(data) == tuple:
														
 
															-            return {}
														
 
															-        return data[0]
														
 
															-
														
 
															-
														
 
															-def get_data_by_batch_no_and_type(batch_no, transfer_type):
														
 
															-    query_exec_sql = f"""
														
 
															-    SELECT
														
 
															-        t.*,a.field_name,b.batch_name
														
 
															-    FROM
														
 
															-        data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
														
 
															-        inner join wind_field_batch b on t.batch_code = b.batch_code
														
 
															-    WHERE
														
 
															-         t.trans_sys_status in (-1,1,2) and t.transfer_state = 2 and t.batch_code = '{batch_no}' and t.transfer_type = '{transfer_type}'
														
 
															-    AND t.transfer_addr != ''
														
 
															-    """
														
 
															-
														
 
															-    data = plt.execute(query_exec_sql)
														
 
															-    if type(data) == tuple:
														
 
															-        return None
														
 
															-    return data[0]
														
 
															-
														
 
															-
														
 
															-## 合并多个batch_使用
														
 
															-def get_hebing_data_by_batch_no_and_type(batch_no, transfer_type):
														
 
															-    query_exec_sql = f"""
														
 
															-    SELECT
														
 
															-        t.*,a.field_name,b.batch_name
														
 
															-    FROM
														
 
															-        data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
														
 
															-        inner join wind_field_batch b on t.batch_code = b.batch_code
														
 
															-    WHERE
														
 
															-         t.trans_sys_status = 1 and t.transfer_state = 1 and t.batch_code = '{batch_no}' and t.transfer_type = '{transfer_type}'
														
 
															-    AND t.transfer_addr != ''
														
 
															-    """
														
 
															-
														
 
															-    data = plt.execute(query_exec_sql)
														
 
															-    if type(data) == tuple:
														
 
															-        return None
														
 
															-    return data[0]
														
 
															-
														
 
															-
														
 
															 def get_all_wind(field_code, need_rated_param=True):
														
 
															     query_sql = """
														
 
															     SELECT t.engine_code,t.engine_name,t.rated_capacity,a.rated_cut_out_windspeed 
														
@@ -173,15 +40,3 @@ def get_base_wind_and_power(wind_turbine_number):
 
															         return None
														
 
															     return dict_datas
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    # print(get_batch_exec_data(run_count=1))
														
 
															-    #
														
 
															-    # print("**********************")
														
 
															-    # print(get_batch_exec_data(run_count=2))
														
 
															-    # print("**********************")
														
 
															-    print(get_data_by_batch_no_and_type("test_", "second"))
														
 
															-    # print(update_trans_status_success("test_唐龙-定时任务测试", "second", 10))
														
 
															-    begin = datetime.datetime.now()
														
 
															-
														
 
															-    print(get_all_wind('WOF034900024'))
														
--- a/service/trans_conf_service.py
+++ b/service/trans_conf_service.py
@@ -0,0 +1,145 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+# @Time    : 2025/1/9
														
 
															+# @Author  : 魏志亮
														
 
															+from datetime import datetime
														
 
															+
														
 
															+from service.common_connect import trans
														
 
															+
														
 
															+
														
 
															+def update_timeout_trans_data():
														
 
															+    sql = """
														
 
															+    UPDATE data_transfer  
														
 
															+    SET trans_sys_status = 2,err_info='运行超时失败',transfer_status=2
														
 
															+    WHERE   
														
 
															+        TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 24 
														
 
															+        AND trans_sys_status = 0
														
 
															+    """
														
 
															+    trans.execute(sql)
														
 
															+
														
 
															+
														
 
															+def update_trans_status_running(id, trans_type, save_db=True):
														
 
															+    if save_db:
														
 
															+        exec_sql = """
														
 
															+        update data_transfer set transfer_status = 0,trans_sys_status = 0 ,transfer_start_time = now(),err_info='',
														
 
															+        engine_count =0,time_granularity=0,transfer_finish_time=null,transfer_progress=0,
														
 
															+        data_min_time= null,data_max_time= null,transfer_data_count=null
														
 
															+        where id = %s  and transfer_type = %s
														
 
															+        """
														
 
															+        trans.execute(exec_sql, (id, trans_type))
														
 
															+
														
 
															+
														
 
															+def update_archive_success(id, trans_type, archive_path, save_db=True):
														
 
															+    if save_db:
														
 
															+        exec_sql = """
														
 
															+        update data_transfer set transfer_progress=70,archive_path = %s
														
 
															+        where id = %s  and transfer_type = %s
														
 
															+        """
														
 
															+        trans.execute(exec_sql, (archive_path, id, trans_type))
														
 
															+
														
 
															+
														
 
															+def update_trans_status_error(id, trans_type, message="", save_db=True):
														
 
															+    if save_db:
														
 
															+        exec_sql = """
														
 
															+        update data_transfer set transfer_status = 2,trans_sys_status=2 ,err_info= %s,transfer_finish_time=now() 
														
 
															+        where id = %s  and  transfer_type = %s
														
 
															+        """
														
 
															+
														
 
															+        message = message if len(message) <= 200 else message[0:200]
														
 
															+        trans.execute(exec_sql, (message, id, trans_type))
														
 
															+
														
 
															+
														
 
															+def update_trans_status_success(id, trans_type, wind_count=0, time_granularity=0,
														
 
															+                                min_date=datetime.now(),
														
 
															+                                max_date=datetime.now(),
														
 
															+                                total_count=0, save_db=True):
														
 
															+    if save_db:
														
 
															+        if min_date is not None:
														
 
															+            exec_sql = """
														
 
															+            update data_transfer set transfer_status = 1,trans_sys_status = 1,transfer_progress=100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now(),
														
 
															+            data_min_time= %s,data_max_time= %s,transfer_data_count=%s
														
 
															+            where id = %s  and transfer_type = %s
														
 
															+            """
														
 
															+            trans.execute(exec_sql, (wind_count, time_granularity, min_date, max_date, total_count, id, trans_type))
														
 
															+        else:
														
 
															+            exec_sql = """
														
 
															+            update data_transfer set transfer_status = 1,trans_sys_status = 1,transfer_progress = 100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now()
														
 
															+            where id = %s  and transfer_type = %s
														
 
															+            """
														
 
															+            trans.execute(exec_sql, (wind_count, time_granularity, id, trans_type))
														
 
															+
														
 
															+
														
 
															+def update_trans_transfer_progress(id, trans_type, transfer_progress=0, save_db=True):
														
 
															+    print(id, trans_type, transfer_progress)
														
 
															+    if save_db:
														
 
															+        exec_sql = """
														
 
															+        update data_transfer set transfer_progress =%s where id = %s  and transfer_type = %s
														
 
															+        """
														
 
															+        trans.execute(exec_sql, (int(transfer_progress), id, trans_type))
														
 
															+
														
 
															+
														
 
															+def get_now_running_count():
														
 
															+    query_running_sql = """
														
 
															+    select count(1) as count from data_transfer where trans_sys_status = 0
														
 
															+    """
														
 
															+    data = trans.execute(query_running_sql)
														
 
															+    now_count = int(data[0]['count'])
														
 
															+    return now_count
														
 
															+
														
 
															+
														
 
															+# 获取执行的数据
														
 
															+def get_batch_exec_data() -> dict:
														
 
															+    query_next_exec_sql = """
														
 
															+    SELECT
														
 
															+        *
														
 
															+    FROM
														
 
															+        data_transfer t 
														
 
															+    WHERE
														
 
															+         t.trans_sys_status in (-1,1,2) and t.transfer_status = -1
														
 
															+    AND t.read_dir != ''
														
 
															+    ORDER BY
														
 
															+        t.update_time
														
 
															+    LIMIT 1
														
 
															+    """
														
 
															+    data = trans.execute(query_next_exec_sql)
														
 
															+    if type(data) == tuple:
														
 
															+        return None
														
 
															+    return data[0]
														
 
															+
														
 
															+
														
 
															+def get_data_by_id(id):
														
 
															+    query_exec_sql = f"""
														
 
															+    SELECT
														
 
															+        t.*,a.field_name,b.batch_name
														
 
															+    FROM
														
 
															+        data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
														
 
															+        inner join wind_field_batch b on t.id = b.id
														
 
															+    WHERE
														
 
															+         t.trans_sys_status in (-1,1,2) and t.transfer_status = 2 and t.id = '{id}'
														
 
															+    AND t.read_dir != ''
														
 
															+    """
														
 
															+
														
 
															+    data = trans.execute(query_exec_sql)
														
 
															+    if type(data) == tuple:
														
 
															+        return None
														
 
															+    return data[0]
														
 
															+
														
 
															+def create_wave_table(table_name, save_db=True):
														
 
															+    if save_db:
														
 
															+        exec_sql = f"""
														
 
															+        CREATE TABLE `{table_name}` (
														
 
															+          `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
														
 
															+          `wind_turbine_number` varchar(20) DEFAULT NULL COMMENT '风机编号',
														
 
															+          `wind_turbine_name` varchar(20) DEFAULT NULL COMMENT '原始风机编号',
														
 
															+          `time_stamp` datetime DEFAULT NULL COMMENT '时间',
														
 
															+          `rotational_speed` float DEFAULT NULL COMMENT '转速',
														
 
															+          `sampling_frequency` varchar(50) DEFAULT NULL COMMENT '采样频率',
														
 
															+          `mesure_point_name` varchar(100) DEFAULT NULL COMMENT '测点名称',
														
 
															+          `type` int(11) DEFAULT '-1' COMMENT '-1:不存在 0:角度 1:速度 2:加速度 3:位移,默认 -1',
														
 
															+          `mesure_data` longtext COMMENT '测点数据',
														
 
															+          PRIMARY KEY (`id`),
														
 
															+          KEY `wind_turbine_number` (`wind_turbine_number`),
														
 
															+          KEY `time_stamp` (`time_stamp`),
														
 
															+          KEY `mesure_point_name` (`mesure_point_name`)
														
 
															+        ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4
														
 
															+        """
														
 
															+        trans.execute(exec_sql)
														
--- a/service/trans_service.py
+++ b/service/trans_service.py
@@ -6,6 +6,7 @@ from os import *
 
															 import pandas as pd
														
 
															+from service.trans_conf_service import create_wave_table
														
 
															 from utils.file.trans_methods import split_array
														
 
															 from utils.log.trans_log import trans_print
														
 
															 from service.common_connect import trans
														
@@ -57,106 +58,141 @@ def get_wave_conf(field_code) -> dict:
 
															     return res[0]
														
 
															-def creat_min_sec_table(table_name, win_names, read_type):
														
 
															-    create_sql = f"""
														
 
															-    CREATE TABLE
														
 
															-    IF NOT EXISTS `{table_name}` (
														
 
															-        `wind_turbine_number` VARCHAR (20) DEFAULT NULL COMMENT '风机编号',
														
 
															-        `wind_turbine_name` VARCHAR(20) DEFAULT NULL COMMENT '风机原始名称',
														
 
															-        `time_stamp` datetime NOT NULL COMMENT '时间戳',
														
 
															-        `active_power` DOUBLE DEFAULT NULL COMMENT '有功功率',
														
 
															-        `rotor_speed` DOUBLE DEFAULT NULL COMMENT '风轮转速',
														
 
															-        `generator_speed` DOUBLE DEFAULT NULL COMMENT '发电机转速',
														
 
															-        `wind_velocity` DOUBLE DEFAULT NULL COMMENT '风速',
														
 
															-        `pitch_angle_blade_1` DOUBLE DEFAULT NULL COMMENT '桨距角1',
														
 
															-        `pitch_angle_blade_2` DOUBLE DEFAULT NULL COMMENT '桨距角2',
														
 
															-        `pitch_angle_blade_3` DOUBLE DEFAULT NULL COMMENT '桨距角3',
														
 
															-        `cabin_position` DOUBLE DEFAULT NULL COMMENT '机舱位置',
														
 
															-        `true_wind_direction` DOUBLE DEFAULT NULL COMMENT '绝对风向',
														
 
															-        `yaw_error1` DOUBLE DEFAULT NULL COMMENT '对风角度',
														
 
															-        `set_value_of_active_power` DOUBLE DEFAULT NULL COMMENT '有功功率设定值',
														
 
															-        `gearbox_oil_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱油温',
														
 
															-        `generatordrive_end_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '发电机驱动端轴承温度',
														
 
															-        `generatornon_drive_end_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '发电机非驱动端轴承温度',
														
 
															-        `cabin_temperature` DOUBLE DEFAULT NULL COMMENT '机舱内温度',
														
 
															-        `twisted_cable_angle` DOUBLE DEFAULT NULL COMMENT '扭缆角度',
														
 
															-        `front_back_vibration_of_the_cabin` DOUBLE DEFAULT NULL COMMENT '机舱前后振动',
														
 
															-        `side_to_side_vibration_of_the_cabin` DOUBLE DEFAULT NULL COMMENT '机舱左右振动',
														
 
															-        `actual_torque` DOUBLE DEFAULT NULL COMMENT '实际力矩',
														
 
															-        `given_torque` DOUBLE DEFAULT NULL COMMENT '给定力矩',
														
 
															-        `clockwise_yaw_count` DOUBLE DEFAULT NULL COMMENT '顺时针偏航次数',
														
 
															-        `counterclockwise_yaw_count` DOUBLE DEFAULT NULL COMMENT '逆时针偏航次数',
														
 
															-        `unusable` DOUBLE DEFAULT NULL COMMENT '不可利用',
														
 
															-        `power_curve_available` DOUBLE DEFAULT NULL COMMENT '功率曲线可用',
														
 
															-        `required_gearbox_speed` DOUBLE DEFAULT NULL COMMENT '齿轮箱转速',
														
 
															-        `inverter_speed_master_control` DOUBLE DEFAULT NULL COMMENT '变频器转速(主控)',
														
 
															-        `outside_cabin_temperature` DOUBLE DEFAULT NULL COMMENT '环境温度',
														
 
															-        `main_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '主轴承轴承温度',
														
 
															-        `gearbox_high_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱高速轴轴承温度',
														
 
															-        `gearboxmedium_speed_shaftbearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱中速轴轴承温度',
														
 
															-        `gearbox_low_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱低速轴轴承温度',
														
 
															-        `generator_winding1_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组1温度',
														
 
															-        `generator_winding2_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组2温度',
														
 
															-        `generator_winding3_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组3温度',
														
 
															-        `wind_turbine_status` DOUBLE DEFAULT NULL COMMENT '风机状态1',
														
 
															-        `wind_turbine_status2` DOUBLE DEFAULT NULL COMMENT '风机状态2',
														
 
															-        `turbulence_intensity` DOUBLE DEFAULT NULL COMMENT '湍流强度',
														
 
															-        `lab` int DEFAULT NULL COMMENT '-1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电',
														
 
															-        `year` INT (4) DEFAULT NULL COMMENT '年',
														
 
															-        `month` INT (2) DEFAULT NULL COMMENT '月',
														
 
															-        `day` INT (2) DEFAULT NULL COMMENT '日',
														
 
															-        `param1` DOUBLE DEFAULT NULL COMMENT '预留1',
														
 
															-        `param2` DOUBLE DEFAULT NULL COMMENT '预留2',
														
 
															-        `param3` DOUBLE DEFAULT NULL COMMENT '预留3',
														
 
															-        `param4` DOUBLE DEFAULT NULL COMMENT '预留4',
														
 
															-        `param5` DOUBLE DEFAULT NULL COMMENT '预留5',
														
 
															-        `param6` VARCHAR (20) DEFAULT NULL COMMENT '预留6',
														
 
															-        `param7` VARCHAR (20) DEFAULT NULL COMMENT '预留7',
														
 
															-        `param8` VARCHAR (20) DEFAULT NULL COMMENT '预留8',
														
 
															-        `param9` VARCHAR (20) DEFAULT NULL COMMENT '预留9',
														
 
															-        `param10` VARCHAR (20) DEFAULT NULL COMMENT '预留10',
														
 
															-         KEY `time_stamp` (`time_stamp`),
														
 
															-         KEY `wind_turbine_number` (`wind_turbine_number`)
														
 
															-    ) ENGINE = myisam DEFAULT CHARSET = utf8mb4
														
 
															+def creat_min_sec_table(table_name, trans_type):
														
 
															+    exists_table_sql = f"""
														
 
															+    select count(1) as count from information_schema.tables where table_schema = '{trans.database}' and table_name = '{table_name}'
														
 
															     """
														
 
															+    count = trans.execute(exists_table_sql)[0]['count']
														
 
															+    if count > 0:
														
 
															+        trans_print(f"{table_name}已存在")
														
 
															+
														
 
															+    if trans_type == 'second':
														
 
															+        add_key = 'KEY `year_month` (`year_month`)'
														
 
															+        key = '`year_month`'
														
 
															+    else:
														
 
															+        add_key = 'KEY `year` (`year`)'
														
 
															+        key = '`year`'
														
 
															+
														
 
															+    if count == 0:
														
 
															+        create_sql = f"""
														
 
															+        CREATE TABLE
														
 
															+        IF NOT EXISTS `{table_name}` (
														
 
															+            `wind_turbine_number` VARCHAR (20) DEFAULT NULL COMMENT '风机编号',
														
 
															+            `wind_turbine_name` VARCHAR(20) DEFAULT NULL COMMENT '风机原始名称',
														
 
															+            `time_stamp` datetime NOT NULL COMMENT '时间戳',
														
 
															+            `active_power` DOUBLE DEFAULT NULL COMMENT '有功功率',
														
 
															+            `rotor_speed` DOUBLE DEFAULT NULL COMMENT '风轮转速',
														
 
															+            `generator_speed` DOUBLE DEFAULT NULL COMMENT '发电机转速',
														
 
															+            `wind_velocity` DOUBLE DEFAULT NULL COMMENT '风速',
														
 
															+            `pitch_angle_blade_1` DOUBLE DEFAULT NULL COMMENT '桨距角1',
														
 
															+            `pitch_angle_blade_2` DOUBLE DEFAULT NULL COMMENT '桨距角2',
														
 
															+            `pitch_angle_blade_3` DOUBLE DEFAULT NULL COMMENT '桨距角3',
														
 
															+            `cabin_position` DOUBLE DEFAULT NULL COMMENT '机舱位置',
														
 
															+            `true_wind_direction` DOUBLE DEFAULT NULL COMMENT '绝对风向',
														
 
															+            `yaw_error1` DOUBLE DEFAULT NULL COMMENT '对风角度',
														
 
															+            `set_value_of_active_power` DOUBLE DEFAULT NULL COMMENT '有功功率设定值',
														
 
															+            `gearbox_oil_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱油温',
														
 
															+            `generatordrive_end_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '发电机驱动端轴承温度',
														
 
															+            `generatornon_drive_end_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '发电机非驱动端轴承温度',
														
 
															+            `cabin_temperature` DOUBLE DEFAULT NULL COMMENT '机舱内温度',
														
 
															+            `twisted_cable_angle` DOUBLE DEFAULT NULL COMMENT '扭缆角度',
														
 
															+            `front_back_vibration_of_the_cabin` DOUBLE DEFAULT NULL COMMENT '机舱前后振动',
														
 
															+            `side_to_side_vibration_of_the_cabin` DOUBLE DEFAULT NULL COMMENT '机舱左右振动',
														
 
															+            `actual_torque` DOUBLE DEFAULT NULL COMMENT '实际力矩',
														
 
															+            `given_torque` DOUBLE DEFAULT NULL COMMENT '给定力矩',
														
 
															+            `clockwise_yaw_count` DOUBLE DEFAULT NULL COMMENT '顺时针偏航次数',
														
 
															+            `counterclockwise_yaw_count` DOUBLE DEFAULT NULL COMMENT '逆时针偏航次数',
														
 
															+            `unusable` DOUBLE DEFAULT NULL COMMENT '不可利用',
														
 
															+            `power_curve_available` DOUBLE DEFAULT NULL COMMENT '功率曲线可用',
														
 
															+            `required_gearbox_speed` DOUBLE DEFAULT NULL COMMENT '齿轮箱转速',
														
 
															+            `inverter_speed_master_control` DOUBLE DEFAULT NULL COMMENT '变频器转速(主控)',
														
 
															+            `outside_cabin_temperature` DOUBLE DEFAULT NULL COMMENT '环境温度',
														
 
															+            `main_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '主轴承轴承温度',
														
 
															+            `gearbox_high_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱高速轴轴承温度',
														
 
															+            `gearboxmedium_speed_shaftbearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱中速轴轴承温度',
														
 
															+            `gearbox_low_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱低速轴轴承温度',
														
 
															+            `generator_winding1_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组1温度',
														
 
															+            `generator_winding2_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组2温度',
														
 
															+            `generator_winding3_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组3温度',
														
 
															+            `wind_turbine_status` DOUBLE DEFAULT NULL COMMENT '风机状态1',
														
 
															+            `wind_turbine_status2` DOUBLE DEFAULT NULL COMMENT '风机状态2',
														
 
															+            `turbulence_intensity` DOUBLE DEFAULT NULL COMMENT '湍流强度',
														
 
															+            `lab` int DEFAULT NULL COMMENT '-1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电',
														
 
															+            `year` INT (4) DEFAULT NULL COMMENT '年',
														
 
															+            `month` INT (2) DEFAULT NULL COMMENT '月',
														
 
															+            `day` INT (2) DEFAULT NULL COMMENT '日',
														
 
															+            `year_month` int(6) DEFAULT NULL COMMENT '年-月',
														
 
															+            `param1` DOUBLE DEFAULT NULL COMMENT '预留1',
														
 
															+            `param2` DOUBLE DEFAULT NULL COMMENT '预留2',
														
 
															+            `param3` DOUBLE DEFAULT NULL COMMENT '预留3',
														
 
															+            `param4` DOUBLE DEFAULT NULL COMMENT '预留4',
														
 
															+            `param5` DOUBLE DEFAULT NULL COMMENT '预留5',
														
 
															+            `param6` VARCHAR (20) DEFAULT NULL COMMENT '预留6',
														
 
															+            `param7` VARCHAR (20) DEFAULT NULL COMMENT '预留7',
														
 
															+            `param8` VARCHAR (20) DEFAULT NULL COMMENT '预留8',
														
 
															+            `param9` VARCHAR (20) DEFAULT NULL COMMENT '预留9',
														
 
															+            `param10` VARCHAR (20) DEFAULT NULL COMMENT '预留10',
														
 
															+             KEY `time_stamp` (`time_stamp`),
														
 
															+             KEY `wind_turbine_number` (`wind_turbine_number`),
														
 
															+             {add_key}
														
 
															+        )
														
 
															+        PARTITION BY LIST COLUMNS ({key}, `wind_turbine_number`) (
														
 
															+        PARTITION pDefault VALUES IN ((000000, 'wind_turbine_number'))
														
 
															+        ) 
														
 
															+        """
														
 
															+        trans.execute(create_sql)
														
 
															-    if read_type == 'second' and win_names and len(win_names) > 1:
														
 
															-        create_sql = create_sql + f" PARTITION BY LIST COLUMNS(`wind_turbine_number`) ("
														
 
															-        partition_strs = list()
														
 
															-        for wind_name in win_names:
														
 
															-            partition_strs.append(f" PARTITION p{wind_name} VALUES IN('{wind_name}')")
														
 
															+def add_partation(table_name: str, date_str: str, wind_turbine_number):
														
 
															+    p_name = f'p{date_str}_{wind_turbine_number}'
														
 
															+    add_sql = f"""
														
 
															+    alter table {table_name} add partition (
														
 
															+        partition {p_name} VALUES IN (({date_str}, '{wind_turbine_number}'))
														
 
															+    )
														
 
															+    """
														
 
															+    trans.execute(add_sql)
														
 
															-        create_sql = create_sql + ",".join(partition_strs) + ")"
														
 
															-    trans.execute(create_sql)
														
 
															+def remove_partation(table_name: str, date_str: str, wind_turbine_number):
														
 
															+    p_name = f'p{date_str}_{wind_turbine_number}'
														
 
															+    remove_sql = f"""
														
 
															+    alter table {table_name} DROP PARTITION {p_name}
														
 
															+    """
														
 
															+    trans.execute(remove_sql)
														
 
															-def rename_table(table_name, renamed_table_name, save_db=True):
														
 
															-    if save_db:
														
 
															-        rename_sql = f"RENAME TABLE {table_name} TO {renamed_table_name}"
														
 
															-        try:
														
 
															-            trans.execute(rename_sql)
														
 
															-        except:
														
 
															-            trans_print(traceback.format_exc())
														
 
															+def add_or_remove_partation(table_name: str, date_str: str, wind_turbine_number):
														
 
															+    p_name = f'p{date_str}_{wind_turbine_number}'
														
 
															+    query_partation = f"""
														
 
															+    SELECT count(1) as count from information_schema.`PARTITIONS` t 
														
 
															+    where t.TABLE_SCHEMA = '{trans.database}' 
														
 
															+    and t.TABLE_NAME = '{table_name}' 
														
 
															+    and t.PARTITION_NAME = '{p_name}'
														
 
															+    """
														
 
															+    count = trans.execute(query_partation)[0]['count']
														
 
															+    if count == 0:
														
 
															+        add_partation(table_name, date_str, wind_turbine_number)
														
 
															+    else:
														
 
															+        remove_partation(table_name, date_str, wind_turbine_number)
														
 
															+        add_partation(table_name, date_str, wind_turbine_number)
														
 
															-def drop_table(table_name, save_db=True):
														
 
															-    if save_db:
														
 
															-        rename_sql = f"drop TABLE `{table_name}`"
														
 
															-        try:
														
 
															-            trans.execute(rename_sql)
														
 
															-        except:
														
 
															-            trans_print(traceback.format_exc())
														
 
															+def save_partation_file_to_db(table_name: str, file: str, wind_turbine_number, date_str, batch_count=100000):
														
 
															+    base_name = path.basename(file)
														
 
															+    # wind_turbine_number = path.basename(file).split(".")[0]
														
 
															+    # date_str = path.basename(path.dirname(file))
														
 
															+    add_or_remove_partation(table_name, date_str, wind_turbine_number)
														
 
															-def clear_table(table_name, save_db=True):
														
 
															-    if save_db:
														
 
															-        rename_sql = f"truncate TABLE `{table_name}`"
														
 
															-        try:
														
 
															-            trans.execute(rename_sql)
														
 
															-        except:
														
 
															-            trans_print(traceback.format_exc())
														
 
															+    try:
														
 
															+        for i, df in enumerate(pd.read_csv(file, chunksize=batch_count)):
														
 
															+            trans.execute_df_save(df, table_name)
														
 
															+            count = (i + 1) * batch_count
														
 
															+            trans_print(base_name, f"Chunk {count} written to MySQL.")
														
 
															+    except Exception as e:
														
 
															+        trans_print(traceback.format_exc())
														
 
															+        message = base_name + str(e)
														
 
															+        raise Exception(message)
														
 
															 def save_file_to_db(table_name: str, file: str, batch_count=100000):
														
@@ -218,6 +254,14 @@ def create_warn_fault_table(table_name):
 
															     trans.execute(sql)
														
 
															+def drop_table(table_name):
														
 
															+    drop_sql = f"DROP TABLE `{table_name}`"
														
 
															+    try:
														
 
															+        trans.execute(drop_sql)
														
 
															+    except:
														
 
															+        pass
														
 
															+
														
 
															+
														
 
															 def get_or_create_wave_table(table_name):
														
 
															     create_table = False
														
 
															     query_sql = f"select 1 from `{table_name}` limit 1"
														
@@ -227,23 +271,7 @@ def get_or_create_wave_table(table_name):
 
															         create_table = True
														
 
															     if create_table:
														
 
															-        sql = f"""
														
 
															-        CREATE TABLE `{table_name}` (
														
 
															-          `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
														
 
															-          `wind_turbine_number` varchar(20) DEFAULT NULL COMMENT '风机编号',
														
 
															-          `wind_turbine_name` varchar(20) DEFAULT NULL COMMENT '原始风机编号',
														
 
															-          `time_stamp` datetime DEFAULT NULL COMMENT '时间',
														
 
															-          `sampling_frequency` varchar(50) DEFAULT NULL COMMENT '分析频率',
														
 
															-          `mesure_point_name` varchar(100) DEFAULT NULL COMMENT '测点名称',
														
 
															-          `mesure_data` mediumtext COMMENT '测点数据',
														
 
															-          PRIMARY KEY (`id`),
														
 
															-          KEY `wind_turbine_number` (`wind_turbine_number`),
														
 
															-          KEY `time_stamp` (`time_stamp`),
														
 
															-          KEY `mesure_point_name` (`mesure_point_name`)
														
 
															-        ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4
														
 
															-        """
														
 
															-
														
 
															-        trans.execute(sql)
														
 
															+        create_wave_table(table_name)
														
 
															 def get_wave_data(table_name, min_data, max_data):
														
@@ -261,34 +289,15 @@ def delete_exist_wave_data(table_name, ids):
 
															         trans.execute(delete_sql, array)
														
 
															-def get_trans_exec_code(batch_no, query_type):
														
 
															-    query_sql = f"SELECT * from batch_exec_code t where t.batch_no = '{batch_no}' and type='{query_type}' and t.`status` = 1 limit 1"
														
 
															+def get_trans_exec_code(id, query_type):
														
 
															+    query_sql = f"SELECT * from batch_exec_code t where t.id = '{id}' and type='{query_type}' and t.`status` = 1 limit 1"
														
 
															     res = trans.execute(query_sql)
														
 
															     if type(res) == tuple or type(res) == str:
														
 
															         return None
														
 
															     exec_code = res[0]['exec_code']
														
 
															-    trans_print("批次", batch_no, '类型', type, '获取到执行代码:', exec_code)
														
 
															+    trans_print("任务ID", id, '类型', type, '获取到执行代码:', exec_code)
														
 
															     return exec_code
														
 
															 if __name__ == '__main__':
														
 
															-    # path_prix = r"/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF063100040-WOB00013/second"
														
 
															-    # files = ["WOG00030.csv", "WOG00034.csv"]
														
 
															-    # for path in files:
														
 
															-    #     save_file_to_db("WOF063100040-WOB00013_second", path_prix + sep + path, batch_count=100000)
														
 
															-
														
 
															-    # sql = """
														
 
															-    # SELECT wind_turbine_number, time_stamp, wind_velocity, active_power
														
 
															-    #                            FROM `WOF085500002-WOB000001_second`
														
 
															-    #                            WHERE  time_stamp >= '2024-02-17 00:00:00' AND time_stamp <= '2024-05-14 00:00:00' AND lab = 0
														
 
															-    # """
														
 
															-    #
														
 
															-    # begin = datetime.datetime.now()
														
 
															-    # df = trans.read_sql_to_df(sql)
														
 
															-    # end = datetime.datetime.now()
														
 
															-    # print(df.shape)
														
 
															-    # print(df.info())
														
 
															-    # print("Time used:", (end - begin).seconds)
														
 
															-    # get_fault_warn_conf("test", "fault")
														
 
															-
														
 
															     delete_exist_wave_data('SKF001_wave', [1, 2, 3])
														
--- a/service/wave_service.py
+++ b/service/wave_service.py
@@ -1,111 +0,0 @@
 
															-import datetime
														
 
															-
														
 
															-from service.common_connect import plt
														
 
															-
														
 
															-
														
 
															-def update_timeout_wave_trans_data():
														
 
															-    sql = """
														
 
															-    UPDATE wave_data_transfer  
														
 
															-    SET trans_sys_status = 2,err_info='运行超时失败',transfer_state=2
														
 
															-    WHERE  TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 6  
														
 
															-        AND trans_sys_status = 0
														
 
															-    """
														
 
															-    plt.execute(sql)
														
 
															-
														
 
															-
														
 
															-def update_wave_trans_status_running(id, schedule_exec=True):
														
 
															-    if schedule_exec:
														
 
															-        exec_sql = """
														
 
															-        update wave_data_transfer set transfer_state = 0,trans_sys_status = 0 ,transfer_start_time = now(),err_info='',
														
 
															-        engine_count =0,time_granularity=0,transfer_finish_time=null,
														
 
															-        data_min_time= null,data_max_time= null,transfer_data_count=null
														
 
															-        where id = %s 
														
 
															-        """
														
 
															-        plt.execute(exec_sql, id)
														
 
															-
														
 
															-
														
 
															-def update_wave_trans_status_error(id, message="", save_db=True):
														
 
															-    if save_db:
														
 
															-        exec_sql = """
														
 
															-        update wave_data_transfer set transfer_state = 2,trans_sys_status=2 ,err_info= %s,transfer_finish_time=now() 
														
 
															-        where id = %s  
														
 
															-        """
														
 
															-
														
 
															-        message = message if len(message) <= 200 else message[0:200]
														
 
															-        plt.execute(exec_sql, (message, id))
														
 
															-
														
 
															-
														
 
															-def update_wave_trans_status_success(id, wind_count=0, time_granularity=0,
														
 
															-                                     min_date=datetime.datetime.now(),
														
 
															-                                     max_date=datetime.datetime.now(),
														
 
															-                                     total_count=0, save_db=True):
														
 
															-    if save_db:
														
 
															-        if min_date is not None:
														
 
															-            exec_sql = """
														
 
															-            update wave_data_transfer set transfer_state = 1,trans_sys_status = 1,transfer_progress=100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now(),
														
 
															-            data_min_time= %s,data_max_time= %s,transfer_data_count=%s
														
 
															-            where id = %s  
														
 
															-            """
														
 
															-            plt.execute(exec_sql, (wind_count, time_granularity, min_date, max_date, total_count, id))
														
 
															-        else:
														
 
															-            exec_sql = """
														
 
															-            update wave_data_transfer set transfer_state = 1,trans_sys_status = 1,transfer_progress = 100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now()
														
 
															-            where id = %s  
														
 
															-            """
														
 
															-            plt.execute(exec_sql, (wind_count, time_granularity, id))
														
 
															-
														
 
															-
														
 
															-def update_wave_trans_transfer_progress(id, transfer_progress=0, save_db=True):
														
 
															-    if save_db:
														
 
															-        exec_sql = """
														
 
															-        update wave_data_transfer set transfer_progress = %s where id = %s
														
 
															-        """
														
 
															-        plt.execute(exec_sql, (int(transfer_progress), id))
														
 
															-
														
 
															-
														
 
															-def create_wave_table(table_name, save_db=True):
														
 
															-    if save_db:
														
 
															-        exec_sql = f"""
														
 
															-        CREATE TABLE `{table_name}` (
														
 
															-          `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
														
 
															-          `wind_turbine_number` varchar(20) DEFAULT NULL COMMENT '风机编号',
														
 
															-          `wind_turbine_name` varchar(20) DEFAULT NULL COMMENT '原始风机编号',
														
 
															-          `time_stamp` datetime DEFAULT NULL COMMENT '时间',
														
 
															-          `rotational_speed` float DEFAULT NULL COMMENT '转速',
														
 
															-          `sampling_frequency` varchar(50) DEFAULT NULL COMMENT '采样频率',
														
 
															-          `mesure_point_name` varchar(100) DEFAULT NULL COMMENT '测点名称',
														
 
															-          `mesure_data` mediumtext COMMENT '测点数据',
														
 
															-          PRIMARY KEY (`id`),
														
 
															-          KEY `wind_turbine_number` (`wind_turbine_number`),
														
 
															-          KEY `time_stamp` (`time_stamp`),
														
 
															-          KEY `mesure_point_name` (`mesure_point_name`)
														
 
															-        ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4
														
 
															-        """
														
 
															-        plt.execute(exec_sql)
														
 
															-
														
 
															-
														
 
															-# 获取执行的数据
														
 
															-def get_wave_exec_data(run_count: int = 1) -> dict:
														
 
															-    query_running_sql = "select count(1) as count from data_transfer where trans_sys_status = 0"
														
 
															-    query_next_exec_sql = """
														
 
															-    SELECT
														
 
															-        t.*,a.field_name,b.batch_name
														
 
															-    FROM
														
 
															-        wave_data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
														
 
															-        inner join wind_field_batch b on t.batch_code = b.batch_code
														
 
															-    WHERE
														
 
															-         t.trans_sys_status in (-1,1,2) and t.transfer_state = 0
														
 
															-    AND t.transfer_addr != ''
														
 
															-    ORDER BY
														
 
															-        t.update_time
														
 
															-    LIMIT 1
														
 
															-    """
														
 
															-    data = plt.execute(query_running_sql)
														
 
															-    now_count = int(data[0]['count'])
														
 
															-    if now_count >= run_count:
														
 
															-        return None
														
 
															-    else:
														
 
															-        data = plt.execute(query_next_exec_sql)
														
 
															-        if type(data) == tuple:
														
 
															-            return {}
														
 
															-        return data[0]
														
--- a/test_run_local.py
+++ b/test_run_local.py
@@ -6,8 +6,6 @@ import sys
 
															 import traceback
														
 
															 from os import *
														
 
															-from utils.conf.read_conf import yaml_conf, read_conf
														
 
															-
														
 
															 def get_exec_data(batch_no=None, read_type=None, run_count=1):
														
 
															     if batch_no and read_type:
														
@@ -41,7 +39,9 @@ def run(data: dict = dict(), save_db=False, step=0, end=4):
 
															 if __name__ == '__main__':
														
 
															-    env = 'dev'
														
 
															+    from utils.conf.read_conf import yaml_conf, read_conf
														
 
															+
														
 
															+    env = 'prod'
														
 
															     if len(sys.argv) >= 2:
														
 
															         env = sys.argv[1]
														
@@ -55,18 +55,14 @@ if __name__ == '__main__':
 
															     from service.plt_service import get_batch_exec_data, get_data_by_batch_no_and_type
														
 
															     from etl.wind_power.fault_warn.FaultWarnTrans import FaultWarnTrans
														
 
															     from etl.wind_power.min_sec.MinSecTrans import MinSecTrans
														
 
															+    from etl.wind_power.wave.WaveTrans import WaveTrans
														
 
															     begin = datetime.datetime.now()
														
 
															-    data = dict()
														
 
															-
														
 
															-    data['batch_code'] = 'xinhuashuidian'
														
 
															-    data['batch_name'] = '新华水电故障'
														
 
															-    data['transfer_type'] = 'fault'
														
 
															-    data['transfer_addr'] = r'D:\data\新华水电\收资数据\故障告警\汇能机组数据-故障'
														
 
															-    data['field_code'] = 'xinhuashuidian'
														
 
															-    data['field_name'] = '新华水电'
														
 
															+
														
 
															     try:
														
 
															-        run(data=data, save_db=False, step=0, end=3)
														
 
															+        exec_process = WaveTrans(1, 'WOF091200030',
														
 
															+                                 r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/振动/CMSFTPServer/ZYXFDC2')
														
 
															+        exec_process.run()
														
 
															     except Exception as e:
														
 
															         trans_print(traceback.format_exc())
														
--- a/test_run_local_piliang.py
+++ b/test_run_local_piliang.py
@@ -1,91 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Time    : 2024/6/11
														
 
															-# @Author  : 魏志亮
														
 
															-import datetime
														
 
															-import sys
														
 
															-import traceback
														
 
															-from os import *
														
 
															-
														
 
															-from utils.conf.read_conf import yaml_conf, read_conf
														
 
															-
														
 
															-
														
 
															-def get_exec_data(batch_no=None, read_type=None, run_count=1):
														
 
															-    if batch_no and read_type:
														
 
															-        data = get_data_by_batch_no_and_type(batch_no, read_type)
														
 
															-        if data is None:
														
 
															-            raise ValueError(f"未找到批次号:{batch_no},类型:{read_type}")
														
 
															-
														
 
															-    else:
														
 
															-        data = get_batch_exec_data(run_count)
														
 
															-        if data is None:
														
 
															-            trans_print("当前有任务在执行")
														
 
															-            sys.exit(0)
														
 
															-        elif len(data.keys()) == 0:
														
 
															-            trans_print("当前无任务")
														
 
															-            sys.exit(0)
														
 
															-
														
 
															-    return data
														
 
															-
														
 
															-
														
 
															-def run(data: dict = dict(), save_db=False):
														
 
															-    exec_process = None
														
 
															-    if data['transfer_type'] in ['second', 'minute']:
														
 
															-        exec_process = MinSecTrans(data=data, save_db=save_db)
														
 
															-
														
 
															-    if data['transfer_type'] in ['fault', 'warn']:
														
 
															-        exec_process = FaultWarnTrans(data=data, save_db=save_db)
														
 
															-
														
 
															-    if exec_process is None:
														
 
															-        raise Exception("No exec process")
														
 
															-    exec_process.run()
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    env = 'dev'
														
 
															-    if len(sys.argv) >= 2:
														
 
															-        env = sys.argv[1]
														
 
															-
														
 
															-    conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
														
 
															-    environ['ETL_CONF'] = conf_path
														
 
															-    yaml_config = yaml_conf(conf_path)
														
 
															-    environ['env'] = env
														
 
															-    run_count = int(read_conf(yaml_config, "run_batch_count", 1))
														
 
															-
														
 
															-    from utils.log.trans_log import trans_print
														
 
															-    from service.plt_service import get_batch_exec_data, get_data_by_batch_no_and_type
														
 
															-    from etl.wind_power.fault_warn.FaultWarnTrans import FaultWarnTrans
														
 
															-    from etl.wind_power.min_sec.MinSecTrans import MinSecTrans
														
 
															-    from utils.file.trans_methods import read_file_to_df
														
 
															-
														
 
															-    begin = datetime.datetime.now()
														
 
															-    df = read_file_to_df("tmp_file/rebuild_data.csv")
														
 
															-    results = list()
														
 
															-    data = dict()
														
 
															-    for batch_code, batch_name, transfer_type, transfer_addr, field_code, field_name \
														
 
															-            in zip(df['batch_code'], df['batch_name'], df['transfer_type'], df['transfer_addr'], df['field_code'],
														
 
															-                   df['field_name']):
														
 
															-        batch_begin = datetime.datetime.now()
														
 
															-        transfer_addr = transfer_addr.replace(r"/data/download/collection_data",
														
 
															-                                              r"/data/download/datang_shangxian")
														
 
															-        trans_print("开始执行批次:", batch_code, batch_name, transfer_type, field_code, field_name)
														
 
															-        trans_print("批次路径:", transfer_addr)
														
 
															-
														
 
															-        data['batch_code'] = batch_code
														
 
															-        data['batch_name'] = batch_name
														
 
															-        data['transfer_type'] = transfer_type
														
 
															-        data['transfer_addr'] = transfer_addr
														
 
															-        data['field_code'] = field_code
														
 
															-        data['field_name'] = field_name
														
 
															-        try:
														
 
															-            run(data=data, save_db=True)
														
 
															-            results.append((batch_code, batch_name, transfer_type, field_code, field_name, 'success'))
														
 
															-        except Exception as e:
														
 
															-            results.append((batch_code, batch_name, transfer_type, field_code, field_name, 'error'))
														
 
															-            trans_print(traceback.format_exc())
														
 
															-        finally:
														
 
															-            trans_print("执行结束,耗时:", datetime.datetime.now() - batch_begin, "总耗时:", datetime.datetime.now() - begin)
														
 
															-
														
 
															-    for data in results:
														
 
															-        trans_print(data)
														
 
															-
														
 
															-    trans_print("执行结束,总耗时:", datetime.datetime.now() - begin)
														
--- a/tmp_file/ClassIdentifier_1.py_bak
+++ b/tmp_file/ClassIdentifier_1.py_bak
@@ -1,755 +0,0 @@
 
															-import numpy as np
														
 
															-from pandas import DataFrame
														
 
															-
														
 
															-from service.plt_service import get_base_wind_and_power
														
 
															-from utils.file.trans_methods import read_file_to_df
														
 
															-
														
 
															-
														
 
															-class ClassIdentifier(object):
														
 
															-
														
 
															-    def __init__(self, wind_turbine_number, file_path: str = None, origin_df: DataFrame = None, index='time_stamp',
														
 
															-                 wind_velocity='wind_velocity',
														
 
															-                 active_power='active_power'):
														
 
															-        """
														
 
															-        :param wind_turbine_number: The wind turbine number.
														
 
															-        :param file_path: The file path of the input data.
														
 
															-        :param origin_df: The pandas DataFrame containing the input data.
														
 
															-        :param index: 索引字段
														
 
															-        :param wind_velocity: 风速字段
														
 
															-        :param active_power: 有功功率字段
														
 
															-        """
														
 
															-        self.wind_turbine_number = wind_turbine_number
														
 
															-        self.index = index
														
 
															-        self.wind_velocity = wind_velocity
														
 
															-        self.active_power = active_power
														
 
															-
														
 
															-        self.rated_wind_speed = 'rated_wind_speed'
														
 
															-        self.rated_capacity = 'rated_capacity'
														
 
															-
														
 
															-        if file_path is None and origin_df is None:
														
 
															-            raise ValueError("Either file_path or origin_df should be provided.")
														
 
															-
														
 
															-        if file_path:
														
 
															-            self.df = read_file_to_df(file_path)
														
 
															-        else:
														
 
															-            self.df = origin_df
														
 
															-
														
 
															-        self.df = self.df.set_index(keys=self.index)
														
 
															-
														
 
															-    def identifier(self):
														
 
															-        # 风速 和 有功功率 df
														
 
															-        wind_and_power_df = self.df[[self.wind_velocity, self.active_power]]
														
 
															-        wind_and_power_df.reset_index(inplace=True)
														
 
															-        wind_and_power_df_count = wind_and_power_df.shape[0]
														
 
															-        PowerMax = wind_and_power_df[self.active_power].max()
														
 
															-        PowerRated = np.ceil(PowerMax / 100) * 100
														
 
															-        PRated = 1500  # 额定功率1500kw,可改为2000kw
														
 
															-        VCutOut = 25
														
 
															-        VCutIn = 3
														
 
															-        VRated = 10
														
 
															-        # 网格法确定风速风向分区数量，功率方向分区数量，
														
 
															-        # PNum = (PRated+100)/25  #功率分区间隔25kW
														
 
															-        PNum = int(np.ceil(PowerRated / 25))  # 功率分区间隔25kW
														
 
															-        VNum = int(np.ceil(VCutOut / 0.25))  # 风速分区间隔0.25m/s
														
 
															-
														
 
															-        # 实发电量
														
 
															-        EPActualTotal = 0  # 实发电量
														
 
															-        for i in range(wind_and_power_df_count):
														
 
															-            if wind_and_power_df.loc[i, self.active_power] >= 0:
														
 
															-                EPActualTotal = EPActualTotal + wind_and_power_df.loc[i, self.active_power] / 6
														
 
															-
														
 
															-        print("EPActualTotal", EPActualTotal)
														
 
															-        # 平均风速
														
 
															-        WindSpeedAvr = 0
														
 
															-        WindSum = 0
														
 
															-        for i in range(wind_and_power_df_count):
														
 
															-            if wind_and_power_df.loc[i, self.wind_velocity] >= 0:
														
 
															-                WindSum = WindSum + wind_and_power_df.loc[i, self.wind_velocity]
														
 
															-        WindSpeedAvr = WindSum / wind_and_power_df_count
														
 
															-        print("windSpeedAvr", WindSpeedAvr)
														
 
															-        # 用于计算损失电量的标杆功率曲线，可更换为风机设计功率曲线
														
 
															-        # base_wind_and_power_df = get_base_wind_and_power(self.wind_turbine_number)
														
 
															-        base_wind_and_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\好点坏点matlib计算\A型风机设计功率曲线.csv", header=None)
														
 
															-        base_wind_and_power_df.columns = [self.rated_wind_speed, self.rated_capacity]
														
 
															-        if base_wind_and_power_df.empty:
														
 
															-            raise ValueError("风场编号:" + self.wind_turbine_number + "未查询到风速功率信息")
														
 
															-        base_wind_and_power_count = base_wind_and_power_df.shape[0]
														
 
															-
														
 
															-        # 风机可利用率，计算方法：大于切入风速但发电功率小于0
														
 
															-        TurbineRunRate = 0
														
 
															-        nShouldGP = 0
														
 
															-        nRealGP = 0
														
 
															-        for i in range(wind_and_power_df_count):
														
 
															-            if wind_and_power_df.loc[i, self.wind_velocity] >= VCutIn:
														
 
															-                nShouldGP = nShouldGP + 1
														
 
															-                if wind_and_power_df.loc[i, self.active_power] > 0:
														
 
															-                    nRealGP = nRealGP + 1
														
 
															-        if nShouldGP > 0:
														
 
															-            TurbineRunRate = nRealGP / nShouldGP * 100
														
 
															-
														
 
															-        print("disp(TurbineRunRate)", TurbineRunRate)
														
 
															-        # 理论电量-
														
 
															-        EPIdealTotalAAA = 0  # 理论电量-
														
 
															-        nWhichBin = 0
														
 
															-        IdealPower = 0
														
 
															-        for i in range(wind_and_power_df_count):
														
 
															-            # 应发电量-理论
														
 
															-            nWhichBin = 0
														
 
															-            for m in range(base_wind_and_power_count - 1):
														
 
															-                if base_wind_and_power_df.loc[m, self.rated_wind_speed] < wind_and_power_df.loc[
														
 
															-                    i, self.wind_velocity] <= \
														
 
															-                        base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
														
 
															-                    nWhichBin = m
														
 
															-                    break
														
 
															-
														
 
															-            # 插值计算对应设计功率
														
 
															-            if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
														
 
															-                continue
														
 
															-
														
 
															-            IdealPower = (wind_and_power_df.loc[i, self.wind_velocity] - base_wind_and_power_df.loc[nWhichBin,
														
 
															-                                                                                                    self.rated_wind_speed]) / (
														
 
															-                                 base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
														
 
															-                                 base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) * (
														
 
															-                                 base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
														
 
															-                                 base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) \
														
 
															-                         + base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
														
 
															-            EPIdealTotalAAA = EPIdealTotalAAA + IdealPower / 6
														
 
															-
														
 
															-        print('EPIdealTotalAAA', EPIdealTotalAAA)
														
 
															-        #
														
 
															-        # 存储功率大于零的运行数据
														
 
															-        DzMarch809 = np.zeros([wind_and_power_df_count, 2], dtype=float)
														
 
															-        nCounter1 = 0
														
 
															-        for i in range(wind_and_power_df_count):
														
 
															-            if wind_and_power_df.loc[i, self.active_power] > 0:
														
 
															-                DzMarch809[nCounter1, 0] = wind_and_power_df.loc[i, self.wind_velocity]
														
 
															-                DzMarch809[nCounter1, 1] = wind_and_power_df.loc[i, self.active_power]
														
 
															-
														
 
															-                nCounter1 = nCounter1 + 1
														
 
															-
														
 
															-        print('nCounter1', nCounter1)
														
 
															-
														
 
															-        # 统计各网格落入的散点个数
														
 
															-        XBoxNumber = np.ones([PNum, VNum], dtype=int)
														
 
															-        nWhichP = -1
														
 
															-        nWhichV = -1
														
 
															-        for i in range(nCounter1):
														
 
															-            for m in range(PNum):
														
 
															-                if m * 25 < DzMarch809[i, 1] <= (m + 1) * 25:
														
 
															-                    nWhichP = m
														
 
															-                    break
														
 
															-            for n in range(VNum):
														
 
															-                if ((n + 1) * 0.25 - 0.125) < DzMarch809[i, 0] <= ((n + 1) * 0.25 + 0.125):
														
 
															-                    nWhichV = n
														
 
															-                    break
														
 
															-
														
 
															-            if nWhichP > -1 and nWhichV > -1:
														
 
															-                XBoxNumber[nWhichP, nWhichV] = XBoxNumber[nWhichP, nWhichV] + 1
														
 
															-
														
 
															-        for m in range(PNum):
														
 
															-            for n in range(VNum):
														
 
															-                XBoxNumber[m, n] = XBoxNumber[m, n] - 1
														
 
															-
														
 
															-        print('XBoxNumber', XBoxNumber)
														
 
															-        # 在功率方向将网格内散点绝对个数转换为相对百分比，备用
														
 
															-        PBoxPercent = np.zeros([PNum, VNum], dtype=float)
														
 
															-        PBinSum = np.zeros(PNum, dtype=int)
														
 
															-
														
 
															-        for i in range(PNum):
														
 
															-            for m in range(VNum):
														
 
															-                PBinSum[i] = PBinSum[i] + XBoxNumber[i, m]
														
 
															-
														
 
															-            for m in range(VNum):
														
 
															-                if PBinSum[i] > 0:
														
 
															-                    PBoxPercent[i, m] = XBoxNumber[i, m] / PBinSum[i] * 100
														
 
															-
														
 
															-        # 在风速方向将网格内散点绝对个数转换为相对百分比，备用
														
 
															-        VBoxPercent = np.zeros([PNum, VNum], dtype=float)
														
 
															-        VBinSum = np.zeros(VNum, dtype=int)
														
 
															-
														
 
															-        for i in range(VNum):
														
 
															-            for m in range(PNum):
														
 
															-                VBinSum[i] = VBinSum[i] + XBoxNumber[m, i]
														
 
															-
														
 
															-            for m in range(PNum):
														
 
															-                if VBinSum[i] > 0:
														
 
															-                    VBoxPercent[m, i] = XBoxNumber[m, i] / VBinSum[i] * 100
														
 
															-
														
 
															-        # 以水平功率带方向为准，分析每个水平功率带中，功率主带中心，即找百分比最大的网格位置。
														
 
															-        PBoxMaxIndex = np.zeros(PNum, dtype=int)  # 水平功率带最大网格位置索引
														
 
															-        PBoxMaxP = np.zeros(PNum, dtype=int)  # 水平功率带最大网格百分比
														
 
															-
														
 
															-        for m in range(PNum):
														
 
															-            # 确定每一水平功率带的最大网格位置索引即百分比值
														
 
															-            PBoxMaxP[m], PBoxMaxIndex[m] = PBoxPercent[m, :].max(), PBoxPercent[m, :].argmax()
														
 
															-
														
 
															-        # 以垂直风速方向为准，分析每个垂直风速带中，功率主带中心，即找百分比最大的网格位置。
														
 
															-        VBoxMaxIndex = np.zeros(VNum, dtype=int)
														
 
															-        VBoxMaxV = np.zeros(VNum, dtype=int)
														
 
															-
														
 
															-        for m in range(VNum):
														
 
															-            [VBoxMaxV[m], VBoxMaxIndex[m]] = VBoxPercent[:, m].max(), VBoxPercent[:, m].argmax()
														
 
															-
														
 
															-        # 切入风速特殊处理，如果切入风速过于偏右，向左拉回
														
 
															-        if PBoxMaxIndex[0] > 14:
														
 
															-            PBoxMaxIndex[0] = 9
														
 
															-
														
 
															-        # 以水平功率带方向为基准，进行分析
														
 
															-        DotDense = np.zeros(PNum, dtype=int)  # 每一水平功率带的功率主带包含的网格数
														
 
															-        DotDenseLeftRight = np.zeros([PNum, 2], dtype=int)  # 存储每一水平功率带的功率主带以最大网格为中心，向向左，向右扩展的网格数
														
 
															-        DotValve = 90  # 从中心向左右对称扩展网格的散点百分比和的阈值。
														
 
															-        PDotDenseSum = 0
														
 
															-
														
 
															-        iSpreadLeft = 1  # 向左扩展网格计数，初值为1
														
 
															-        iSpreadRight = 1  # 向右扩展网格技术，初值为1
														
 
															-        for i in range(PNum - 6):  # 从最下层水平功率带1开始，向上到第PNum-6个水平功率带（额定功率一下水平功率带），逐一分析
														
 
															-            PDotDenseSum = PBoxMaxP[i]  # 以中心最大水平功率带为基准，向左向右对称扩展网格，累加各网格散点百分比
														
 
															-            iSpreadRight = 1
														
 
															-            iSpreadLeft = 1
														
 
															-            while PDotDenseSum < DotValve:
														
 
															-
														
 
															-                if (PBoxMaxIndex[i] + iSpreadRight) < VNum - 1:
														
 
															-                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]  # 向右侧扩展
														
 
															-                    iSpreadRight = iSpreadRight + 1
														
 
															-
														
 
															-                if (PBoxMaxIndex[i] + iSpreadRight) > VNum - 1:
														
 
															-                    break
														
 
															-
														
 
															-                if (PBoxMaxIndex[i] - iSpreadLeft) > 0:
														
 
															-                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]  # 向左侧扩展
														
 
															-                    iSpreadLeft = iSpreadLeft + 1
														
 
															-
														
 
															-                if (PBoxMaxIndex[i] - iSpreadLeft) <= 0:
														
 
															-                    break
														
 
															-
														
 
															-            iSpreadRight = iSpreadRight - 1
														
 
															-
														
 
															-            iSpreadLeft = iSpreadLeft - 1
														
 
															-            # 向左右对称扩展完毕
														
 
															-
														
 
															-            DotDenseLeftRight[i, 0] = iSpreadLeft
														
 
															-            DotDenseLeftRight[i, 1] = iSpreadRight
														
 
															-            DotDense[i] = iSpreadLeft + iSpreadRight + 1
														
 
															-
														
 
															-        # 各行功率主带右侧宽度的中位数最具有代表性
														
 
															-        DotDenseWidthLeft = np.zeros([PNum - 6, 1], dtype=int)
														
 
															-        for i in range(PNum - 6):
														
 
															-            DotDenseWidthLeft[i] = DotDenseLeftRight[i, 1]
														
 
															-
														
 
															-        MainBandRight = np.median(DotDenseWidthLeft)
														
 
															-
														
 
															-        # 散点向右显著延展分布的水平功率带为限功率水平带
														
 
															-        PowerLimit = np.zeros([PNum, 1], dtype=int)  # 各水平功率带是否为限功率标识，==1：是；==0：不是
														
 
															-        WidthAverage = 0  # 功率主带平均宽度
														
 
															-        WidthVar = 0  # 功率主带方差
														
 
															-        # PowerLimitValve = 6    #限功率主带判别阈值
														
 
															-        PowerLimitValve = np.ceil(MainBandRight) + 3  # 限功率主带判别阈值
														
 
															-
														
 
															-        nCounterLimit = 0
														
 
															-        nCounter = 0
														
 
															-
														
 
															-        for i in range(PNum - 6):
														
 
															-            if DotDenseLeftRight[i, 1] > PowerLimitValve and PBinSum[i] > 20:  # 如果向右扩展网格数大于阈值，且该水平功率带点总数>20，是
														
 
															-                PowerLimit[i] = 1
														
 
															-                nCounterLimit = nCounterLimit + 1
														
 
															-
														
 
															-            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
														
 
															-                WidthAverage = WidthAverage + DotDenseLeftRight[i, 1]  # 统计正常水平功率带右侧宽度
														
 
															-                nCounter = nCounter + 1
														
 
															-
														
 
															-        WidthAverage = WidthAverage / nCounter  # 功率主带平均宽度
														
 
															-
														
 
															-        print("WidthAverage", WidthAverage)
														
 
															-
														
 
															-        # 各水平功率带的功率主带宽度的方差，反映从下到上宽度是否一致，或是否下宽上窄等异常情况
														
 
															-        for i in range(PNum - 6):
														
 
															-            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
														
 
															-                WidthVar = WidthVar + (DotDenseLeftRight[i, 1] - WidthAverage) * (
														
 
															-                        DotDenseLeftRight[i, 1] - WidthAverage)
														
 
															-
														
 
															-        WidthVar = np.sqrt(WidthVar / nCounter)
														
 
															-
														
 
															-        # 各水平功率带，功率主带的风速范围，右侧扩展网格数*2*0.25
														
 
															-        PowerBandWidth = WidthAverage * 2 * 0.25
														
 
															-
														
 
															-        # 对限负荷水平功率带的最大网格较下面相邻层显著偏右，拉回
														
 
															-        for i in range(1, PNum - 6):
														
 
															-            if PowerLimit[i] == 1 and abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5:
														
 
															-                PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1
														
 
															-
														
 
															-        # 输出各层功率主带的左右边界网格索引
														
 
															-        DotDenseInverse = np.zeros([PNum, 2], dtype=int)
														
 
															-
														
 
															-        for i in range(PNum):
														
 
															-            DotDenseInverse[i, :] = DotDenseLeftRight[PNum - i - 1, :]
														
 
															-
														
 
															-        # print('DotDenseInverse', DotDenseInverse)
														
 
															-
														
 
															-        # 功率主带的右边界
														
 
															-        CurveWidthR = int(np.ceil(WidthAverage) + 2)
														
 
															-
														
 
															-        # CurveWidthL = 6    #功率主带的左边界
														
 
															-        CurveWidthL = CurveWidthR
														
 
															-
														
 
															-        BBoxLimit = np.zeros([PNum, VNum], dtype=int)  # 网格是否为限功率网格的标识，如果为限功率水平功率带，从功率主带右侧边缘向右的网格为限功率网格
														
 
															-        for i in range(2, PNum - 6):
														
 
															-            if PowerLimit[i] == 1:
														
 
															-                for j in range(PBoxMaxIndex[i] + CurveWidthR, VNum):
														
 
															-                    BBoxLimit[i, j] = 1
														
 
															-
														
 
															-        BBoxRemove = np.zeros([PNum, VNum], dtype=int)  # 数据异常需要剔除的网格标识，标识==1：功率主带右侧的欠发网格；==2：功率主带左侧的超发网格
														
 
															-        for m in range(PNum - 6):
														
 
															-            for n in range(PBoxMaxIndex[m] + CurveWidthR - 1, VNum):
														
 
															-                BBoxRemove[m, n] = 1
														
 
															-
														
 
															-            for n in range(PBoxMaxIndex[m] - CurveWidthL - 1, 0, -1):
														
 
															-                BBoxRemove[m, n] = 2
														
 
															-
														
 
															-        # 确定功率主带的左上拐点，即额定风速位置的网格索引
														
 
															-        CurveTop = np.zeros(2, dtype=int)
														
 
															-        CurveTopValve = 3  # 网格的百分比阈值
														
 
															-        BTopFind = 0
														
 
															-        for m in range(PNum - 4 - 1, 0, -1):
														
 
															-            for n in range(VNum):
														
 
															-                if VBoxPercent[m, n] > CurveTopValve and XBoxNumber[m, n] >= 10:  # 如左上角网格的百分比和散点个数大于阈值。
														
 
															-                    CurveTop[0] = m
														
 
															-                    CurveTop[1] = n
														
 
															-                    BTopFind = 1
														
 
															-                    break
														
 
															-
														
 
															-            if BTopFind == 1:
														
 
															-                break
														
 
															-
														
 
															-        IsolateValve = 3
														
 
															-        for m in range(PNum - 6):
														
 
															-            for n in range(PBoxMaxIndex[m] + CurveWidthR - 1, VNum):
														
 
															-                if PBoxPercent[m, n] < IsolateValve:
														
 
															-                    BBoxRemove[m, n] = 1
														
 
															-
														
 
															-        # 功率主带顶部宽度
														
 
															-        CurveWidthT = 2
														
 
															-        for m in range(PNum - CurveWidthT - 1, PNum):
														
 
															-            for n in range(VNum):
														
 
															-                BBoxRemove[m, n] = 3  # 网格为额定功率以上的超发点
														
 
															-
														
 
															-        # 功率主带拐点左侧的欠发网格标识
														
 
															-        for m in range(PNum - 5 - 1, PNum):
														
 
															-            for n in range(CurveTop[1] - 2 - 1):
														
 
															-                BBoxRemove[m, n] = 2
														
 
															-
														
 
															-        # 以网格的标识，决定该网格内数据的标识。Dzwind_and_power_dfSel功率非零数据的标识位。散点在哪个网格，此网格的标识即为该点的标识
														
 
															-        Dzwind_and_power_dfSel = np.zeros(nCounter1, dtype=int)  # is ==1,欠发功率点；==2，超发功率点；==3，额定风速以上的超发功率点 ==4, 限电
														
 
															-        nWhichP = 0
														
 
															-        nWhichV = 0
														
 
															-        nBadA = 0
														
 
															-
														
 
															-        for i in range(nCounter1):
														
 
															-            for m in range(PNum):
														
 
															-                if DzMarch809[i, 1] > (m - 1) * 25 and DzMarch809[i, 1] <= m * 25:
														
 
															-                    nWhichP = m
														
 
															-                    break
														
 
															-
														
 
															-            for n in range(VNum):
														
 
															-                if DzMarch809[i, 0] > (n * 0.25 - 0.125) and DzMarch809[i, 0] <= (n * 0.25 + 0.125):
														
 
															-                    nWhichV = n
														
 
															-                    break
														
 
															-
														
 
															-            if nWhichP > 0 and nWhichV > 0:
														
 
															-
														
 
															-                if BBoxRemove[nWhichP, nWhichV] == 1:
														
 
															-                    Dzwind_and_power_dfSel[i] = 1
														
 
															-                    nBadA = nBadA + 1
														
 
															-
														
 
															-                if BBoxRemove[nWhichP, nWhichV] == 2:
														
 
															-                    Dzwind_and_power_dfSel[i] = 2
														
 
															-
														
 
															-                if BBoxRemove[nWhichP, nWhichV] == 3:
														
 
															-                    Dzwind_and_power_dfSel[i] = 0  # 3  # 额定风速以上的超发功率点认为是正常点，不再标识。
														
 
															-
														
 
															-                if BBoxLimit[nWhichP, nWhichV] == 1 and nWhichP>16:
														
 
															-                    Dzwind_and_power_dfSel[i] = 4
														
 
															-
														
 
															-        print("nWhichP", nWhichP)
														
 
															-        print("nWhichV", nWhichV)
														
 
															-        print("nBadA", nBadA)
														
 
															-
														
 
															-        # 限负荷数据标识方法2：把数据切割为若干个窗口。对每一窗口，以第一个点为基准，连续nWindowLength个数据的功率在方差范围内，呈现显著水平分布的点
														
 
															-        PVLimit = np.zeros([nCounter1, 2], dtype=int)  # 存储限负荷数据
														
 
															-        nLimitTotal = 0
														
 
															-        nWindowLength = 3
														
 
															-        LimitWindow = np.zeros(nWindowLength, dtype=int)
														
 
															-        UpLimit = 0  # 上限
														
 
															-        LowLimit = 0  # 下限
														
 
															-        PowerStd = 15  # 功率波动方差
														
 
															-        bAllInUpLow = 1  # ==1:窗口内所有数据均在方差上下限之内，限负荷==0,不满足条件
														
 
															-        bAllInAreas = 1  # ==1：窗口所有数据均在200~PRated-300kW范围内；==0：不满足此条件
														
 
															-        nWindowNum = int(np.floor(nCounter1 / nWindowLength))
														
 
															-        PowerLimitUp = PRated - 300
														
 
															-        PowerLimitLow = 200
														
 
															-        for i in range(nWindowNum):
														
 
															-            for j in range(nWindowLength):
														
 
															-                LimitWindow[j] = DzMarch809[i * nWindowLength + j, 1]
														
 
															-
														
 
															-            bAllInAreas = 1
														
 
															-            for j in range(nWindowLength):
														
 
															-                if LimitWindow[j] < PowerLimitLow or LimitWindow[j] > PowerLimitUp:
														
 
															-                    bAllInAreas = 0
														
 
															-
														
 
															-            if bAllInAreas == 0:
														
 
															-                continue
														
 
															-
														
 
															-            UpLimit = LimitWindow[0] + PowerStd
														
 
															-            LowLimit = LimitWindow[0] - PowerStd
														
 
															-            bAllInUpLow = 1
														
 
															-            for j in range(1, nWindowLength):
														
 
															-                if LimitWindow[j] < LowLimit or LimitWindow[j] > UpLimit:
														
 
															-                    bAllInUpLow = 0
														
 
															-
														
 
															-            if bAllInUpLow == 1:
														
 
															-                for j in range(nWindowLength):
														
 
															-                    Dzwind_and_power_dfSel[i * nWindowLength + j] = 4  # 标识窗口内的数据为限负荷数据
														
 
															-
														
 
															-                for j in range(nWindowLength):
														
 
															-                    PVLimit[nLimitTotal, :] = DzMarch809[i * nWindowLength + j, :]
														
 
															-                    nLimitTotal = nLimitTotal + 1
														
 
															-
														
 
															-        print("nLimitTotal", nLimitTotal)
														
 
															-
														
 
															-        # 相邻水平功率主带的锯齿平滑
														
 
															-        PVLeftDown = np.zeros(2, dtype=int)
														
 
															-        PVRightUp = np.zeros(2, dtype=int)
														
 
															-        nSmooth = 0
														
 
															-        for i in range(PNum - 6 - 1):
														
 
															-            PVLeftDown = np.zeros(2, dtype=int)
														
 
															-            PVRightUp = np.zeros(2, dtype=int)
														
 
															-
														
 
															-            if (PBoxMaxIndex[i + 1] - PBoxMaxIndex[i]) >= 1:
														
 
															-                PVLeftDown[0] = (PBoxMaxIndex[i] + CurveWidthR) * 0.25 - 0.125
														
 
															-                PVLeftDown[1] = (i - 1) * 25
														
 
															-
														
 
															-                PVRightUp[0] = (PBoxMaxIndex[i + 1] + CurveWidthR) * 0.25 - 0.125
														
 
															-                PVRightUp[1] = (i + 1 - 1) * 25
														
 
															-
														
 
															-                for m in range(nCounter1):
														
 
															-                    if DzMarch809[m, 0] > PVLeftDown[0] and DzMarch809[m, 0] < PVRightUp[0] and PVLeftDown[1] < \
														
 
															-                            DzMarch809[m, 1] < PVRightUp[1]:  # 在该锯齿中
														
 
															-                        if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (
														
 
															-                                PVRightUp[1] - PVLeftDown[1]) / (
														
 
															-                                PVRightUp[0] - PVLeftDown[0]):  # 斜率大于对角连线，则在锯齿左上三角形中，选中
														
 
															-                            Dzwind_and_power_dfSel[m] = 0
														
 
															-                            nSmooth = nSmooth + 1
														
 
															-
														
 
															-        print("nSmooth", nSmooth)
														
 
															-
														
 
															-        # 存储好点
														
 
															-        nCounterPV = 0
														
 
															-        PVDot = np.zeros([nCounter1, 2], dtype=int)
														
 
															-        for i in range(nCounter1):
														
 
															-            if Dzwind_and_power_dfSel[i] == 0:
														
 
															-                PVDot[nCounterPV, :] = DzMarch809[i, :]
														
 
															-                nCounterPV = nCounterPV + 1
														
 
															-
														
 
															-        nCounterVP = nCounterPV
														
 
															-        print("nCounterVP", nCounterVP)
														
 
															-
														
 
															-        # 存储坏点
														
 
															-        nCounterBad = 0
														
 
															-        PVBad = np.zeros([nCounter1, 2], dtype=int)
														
 
															-        for i in range(nCounter1):
														
 
															-            if Dzwind_and_power_dfSel[i] == 1 or Dzwind_and_power_dfSel[i] == 2 or Dzwind_and_power_dfSel[i] == 3:
														
 
															-                PVBad[nCounterBad, :] = DzMarch809[i, :]
														
 
															-                nCounterBad = nCounterBad + 1
														
 
															-
														
 
															-        print("nCounterBad", nCounterBad)
														
 
															-
														
 
															-        # 用功率主带中的好点绘制实测功率曲
														
 
															-        XBinNumber = np.ones(50, dtype=int)
														
 
															-        PCurve = np.zeros([50, 2], dtype=int)
														
 
															-        PCurve[:, 0] = [i / 2 for i in range(1, 51)]
														
 
															-        XBinSum = np.zeros([50, 2], dtype=int)
														
 
															-        nWhichBin = 0
														
 
															-
														
 
															-        for i in range(nCounterVP):
														
 
															-            nWhichBin = 0
														
 
															-
														
 
															-            for b in range(50):
														
 
															-                if PVDot[i, 0] > (b * 0.5 - 0.25) and PVDot[i, 0] <= (b * 0.5 + 0.25):
														
 
															-                    nWhichBin = b
														
 
															-                    break
														
 
															-
														
 
															-            if nWhichBin > 0:
														
 
															-                XBinSum[nWhichBin, 0] = XBinSum[nWhichBin, 0] + PVDot[i, 0]  # wind speed
														
 
															-                XBinSum[nWhichBin, 1] = XBinSum[nWhichBin, 1] + PVDot[i, 1]  # Power
														
 
															-                XBinNumber[nWhichBin] = XBinNumber[nWhichBin] + 1
														
 
															-
														
 
															-        for b in range(50):
														
 
															-            XBinNumber[b] = XBinNumber[b] - 1
														
 
															-
														
 
															-        for b in range(50):
														
 
															-            if XBinNumber[b] > 0:
														
 
															-                PCurve[b, 0] = XBinSum[b, 0] / XBinNumber[b]
														
 
															-                PCurve[b, 1] = XBinSum[b, 1] / XBinNumber[b]
														
 
															-
														
 
															-        # 对额定风速以上的功率直接赋额定功率
														
 
															-        VRatedNum = int(VRated / 0.5)
														
 
															-        for m in range(VRatedNum, 50):
														
 
															-            if PCurve[m, 1] == 0:
														
 
															-                PCurve[m, 1] = PRated
														
 
															-
														
 
															-        # print("PCurve", PCurve)
														
 
															-
														
 
															-        # 绘制标准正则功率曲线，以0.5m/s标准为间隔
														
 
															-        # 15m/s以上为额定功率，15m/s以下为计算得到
														
 
															-        PCurveNorm = np.zeros([50, 2], dtype=int)
														
 
															-        for i in range(30, 50):
														
 
															-            PCurveNorm[i, 0] = i * 0.5
														
 
															-            PCurveNorm[i, 1] = PRated
														
 
															-
														
 
															-        # 15m/s一下正则功率曲线
														
 
															-        CurveData = np.zeros([30, 2], dtype=int)
														
 
															-        for i in range(30):
														
 
															-            CurveData[i, :] = PCurve[i, :]
														
 
															-
														
 
															-        CurveNorm = np.zeros([30, 2], dtype=int)
														
 
															-        VSpeed = [i / 2 for i in range(1, 31)]
														
 
															-
														
 
															-        WhichBin = 0
														
 
															-
														
 
															-        K = 0
														
 
															-        a = 0
														
 
															-        for m in range(30):
														
 
															-            K = 0
														
 
															-            a = 0
														
 
															-
														
 
															-            for n in range(30):
														
 
															-                if abs(CurveData[n, 0] - VSpeed[m]) < 0.1:
														
 
															-                    WhichBin = n
														
 
															-                    break
														
 
															-
														
 
															-            if WhichBin > 1:
														
 
															-                if CurveData[WhichBin, 0] - CurveData[WhichBin - 1, 0] > 0:
														
 
															-                    K = (CurveData[WhichBin, 1] - CurveData[WhichBin - 1, 1]) / (
														
 
															-                            CurveData[WhichBin, 0] - CurveData[WhichBin - 1, 0])
														
 
															-                    a = CurveData[WhichBin, 1] - K * CurveData[WhichBin, 0]
														
 
															-
														
 
															-            CurveNorm[m, 0] = VSpeed[m]
														
 
															-            CurveNorm[m, 1] = a + K * VSpeed[m]
														
 
															-
														
 
															-        for i in range(30):
														
 
															-            PCurveNorm[i, :] = CurveNorm[i, :]
														
 
															-
														
 
															-        # 子模块3：损失电量计算及发电性能评价
														
 
															-        CC = len(PCurve[:, 0])
														
 
															-        EPIdealTotal = 0
														
 
															-        # 计算停机损失
														
 
															-        EPLostStopTotal = 0
														
 
															-        EPLost = 0
														
 
															-
														
 
															-        nWhichBin = 0
														
 
															-        IdealPower = 0
														
 
															-        nStopTotal = 0
														
 
															-        for i in range(wind_and_power_df_count):
														
 
															-            if wind_and_power_df.loc[i, self.active_power] <= 0:
														
 
															-                nWhichBin = 0
														
 
															-                for m in range(base_wind_and_power_count - 1):
														
 
															-                    if wind_and_power_df.loc[i, self.wind_velocity] > base_wind_and_power_df.loc[
														
 
															-                        m, self.rated_wind_speed] and wind_and_power_df.loc[i, self.wind_velocity] <= \
														
 
															-                            base_wind_and_power_df.loc[
														
 
															-                                m + 1, self.rated_wind_speed]:
														
 
															-                        nWhichBin = m
														
 
															-                        break
														
 
															-
														
 
															-                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
														
 
															-                    continue
														
 
															-
														
 
															-                IdealPower = (wind_and_power_df.loc[i, self.wind_velocity] - base_wind_and_power_df.loc[
														
 
															-                    nWhichBin, self.rated_wind_speed]) / (
														
 
															-                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
														
 
															-                                     base_wind_and_power_df.loc[
														
 
															-                                         nWhichBin, self.rated_wind_speed]) * (
														
 
															-                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity]
														
 
															-                                     - base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) \
														
 
															-                             + base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
														
 
															-
														
 
															-                EPLost = IdealPower / 6
														
 
															-                EPLostStopTotal = EPLostStopTotal + EPLost
														
 
															-                nStopTotal = nStopTotal + 1
														
 
															-
														
 
															-        print("EPLost", EPLost)
														
 
															-        print("nStopTotal", nStopTotal)
														
 
															-        print("EPLostStopTotal", EPLostStopTotal)
														
 
															-
														
 
															-        nWhichP = 0
														
 
															-        nWhichV = 0
														
 
															-        nWhichBin = 0
														
 
															-        IdealPower = 0
														
 
															-
														
 
															-        # 计算欠发损失，此欠发损失已不包括限电损失，限电点在前面已经从欠发点中去除。
														
 
															-        EPLostBadTotal = 0
														
 
															-        EPLost = 0
														
 
															-
														
 
															-        nBadTotal = 0
														
 
															-
														
 
															-        LostBadPercent = 0
														
 
															-
														
 
															-        EPOverTotal = 0
														
 
															-        EPOver = 0
														
 
															-        nOverTotal = 0
														
 
															-
														
 
															-        for i in range(nCounter1):
														
 
															-            if Dzwind_and_power_dfSel[i] == 1:
														
 
															-                nWhichBin = 0
														
 
															-                for m in range(base_wind_and_power_count - 1):
														
 
															-                    if DzMarch809[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] \
														
 
															-                            and DzMarch809[i, 0] <= base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
														
 
															-                        nWhichBin = m
														
 
															-                        break
														
 
															-
														
 
															-                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
														
 
															-                    continue
														
 
															-
														
 
															-                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
														
 
															-                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
														
 
															-                    nWhichBin, self.rated_wind_speed]) * (
														
 
															-                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
														
 
															-                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
														
 
															-                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
														
 
															-                EPLost = abs(IdealPower - DzMarch809[i, 1]) / 6
														
 
															-                EPLostBadTotal = EPLostBadTotal + EPLost
														
 
															-                nBadTotal = nBadTotal + 1
														
 
															-
														
 
															-            # 额定风速以上超发电量
														
 
															-            if Dzwind_and_power_dfSel[i] == 3:
														
 
															-                EPOver = (DzMarch809[i, 1] - PRated) / 6
														
 
															-                EPOverTotal = EPOverTotal + EPOver
														
 
															-                nOverTotal = nOverTotal + 1
														
 
															-
														
 
															-        print("EPLost", EPLost)
														
 
															-        print("nBadTotal", nBadTotal)
														
 
															-        print("EPLostBadTotal", EPLostBadTotal)
														
 
															-        print("EPOverTotal", EPOverTotal)
														
 
															-        print("nOverTotal", nOverTotal)
														
 
															-
														
 
															-        # 功率曲线未达标损失
														
 
															-        EPLostPerformTotal = 0
														
 
															-        nWhichBinI = 0
														
 
															-        IdealPower = 0
														
 
															-
														
 
															-        for i in range(nCounterVP):
														
 
															-
														
 
															-            for m in range(base_wind_and_power_count - 1):
														
 
															-                if PVDot[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] and PVDot[i, 0] <= \
														
 
															-                        base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
														
 
															-                    nWhichBinI = m
														
 
															-                    break
														
 
															-
														
 
															-            if nWhichBinI > base_wind_and_power_count - 1 or nWhichBinI == 0:
														
 
															-                continue
														
 
															-
														
 
															-            IdealPower = (PVDot[i, 0] - base_wind_and_power_df.loc[nWhichBinI, self.rated_wind_speed]) / (
														
 
															-                    base_wind_and_power_df.loc[nWhichBinI + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
														
 
															-                nWhichBinI, self.rated_wind_speed]) * \
														
 
															-                         (base_wind_and_power_df.loc[nWhichBinI + 1, self.rated_capacity] -
														
 
															-                          base_wind_and_power_df.loc[nWhichBinI, self.rated_capacity]) + \
														
 
															-                         base_wind_and_power_df.loc[nWhichBinI, self.rated_capacity]
														
 
															-
														
 
															-            EPLostPerformTotal = EPLostPerformTotal + (IdealPower - PVDot[i, 1]) / 6
														
 
															-
														
 
															-        print("EPLostPerformTotal", EPLostPerformTotal)
														
 
															-
														
 
															-        # 限电损失
														
 
															-        EPLostLimitTotal = 0
														
 
															-        EPLost = 0
														
 
															-        nLimitTotal = 0
														
 
															-
														
 
															-        PVLimit = np.zeros([nCounter1, 2])
														
 
															-
														
 
															-        for i in range(nCounter1):
														
 
															-            if Dzwind_and_power_dfSel[i] == 4:
														
 
															-                nWhichBin = 0
														
 
															-                for m in range(base_wind_and_power_count - 1):
														
 
															-                    if DzMarch809[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] and DzMarch809[i, 0] <= \
														
 
															-                            base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
														
 
															-                        nWhichBin = m
														
 
															-                        break
														
 
															-
														
 
															-                # 插值计算对应设计功率
														
 
															-                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
														
 
															-                    continue
														
 
															-
														
 
															-                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
														
 
															-                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
														
 
															-                        base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) * (
														
 
															-                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
														
 
															-                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
														
 
															-                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
														
 
															-                EPLost = np.abs(IdealPower - DzMarch809[i, 1]) / 6
														
 
															-                EPLostLimitTotal = EPLostLimitTotal + EPLost
														
 
															-
														
 
															-                PVLimit[nLimitTotal, :] = DzMarch809[i, :]
														
 
															-                nLimitTotal = nLimitTotal + 1
														
 
															-
														
 
															-        nLimitTotal = nLimitTotal - 1
														
 
															-
														
 
															-        print("nLimitTotal", nLimitTotal)
														
 
															-
														
 
															-        # 欠发和限点损失总和
														
 
															-        EPLostBadLimitTotal = EPLostBadTotal + EPLostLimitTotal
														
 
															-
														
 
															-        # 如果功率曲线未达标损失为正
														
 
															-        if EPLostPerformTotal >= 0:
														
 
															-            EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal + EPLostPerformTotal
														
 
															-
														
 
															-        # 如果功率曲线未达标损失为负
														
 
															-        if EPLostPerformTotal < 0:
														
 
															-            EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal
														
 
															-
														
 
															-        print("EPIdealTotal", EPIdealTotal)
														
 
															-        # 可以比较求和得到的应发功率EPIdealTotal与理论计算得到的应发功率EPIdealTotalAAA的差别
														
 
															-        # 需要去除的超发功率：（1）功率主带左侧的超发点；（2）额定风速以上的超发点。
														
 
															-        RemoveOverEP = 0
														
 
															-        nType2 = 0
														
 
															-        for i in range(nCounter1):
														
 
															-            if Dzwind_and_power_dfSel[i] == 2:  # 功率主带左侧的超发坏点
														
 
															-                nWhichBin = 0
														
 
															-                for m in range(base_wind_and_power_count - 1):
														
 
															-                    if base_wind_and_power_df.loc[m, self.rated_wind_speed] < DzMarch809[i, 0] <= base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
														
 
															-                        nWhichBin = m
														
 
															-                        break
														
 
															-
														
 
															-                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
														
 
															-                    continue
														
 
															-
														
 
															-                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
														
 
															-                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
														
 
															-                    nWhichBin, self.rated_wind_speed]) * (
														
 
															-                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
														
 
															-                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
														
 
															-                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
														
 
															-
														
 
															-                RemoveOverEP = RemoveOverEP + (DzMarch809[i, 1] - IdealPower) / 6
														
 
															-                nType2 = nType2 + 1
														
 
															-
														
 
															-        print("RemoveOverEP", RemoveOverEP)
														
 
															-        print("nType2", nType2)
														
 
															-        # 额定功率以上的超发点
														
 
															-        nTypeOver = 0
														
 
															-        for i in range(nCounter1):
														
 
															-            if DzMarch809[i, 1] > PRated:
														
 
															-                RemoveOverEP = RemoveOverEP + (DzMarch809[i, 1] - PRated) / 6
														
 
															-                nTypeOver = nTypeOver + 1
														
 
															-
														
 
															-        print("RemoveOverEP", RemoveOverEP)
														
 
															-        print("nTypeOver", nTypeOver)
														
 
															-
														
 
															-    def run(self):
														
 
															-        # Implement your class identification logic here
														
 
															-        self.identifier()
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    test = ClassIdentifier('test', r"D:\中能智能\matlib计算相关\好点坏点matlib计算\A01.csv", index='时间',
														
 
															-                           wind_velocity='风速',
														
 
															-                           active_power='功率')
														
 
															-
														
 
															-    test.run()
														
--- a/tmp_file/baiyushan_20240906.py
+++ b/tmp_file/baiyushan_20240906.py
@@ -1,196 +0,0 @@
 
															-from multiprocessing import Pool
														
 
															-from os import *
														
 
															-
														
 
															-import chardet
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
														
 
															-        encoding = 'gb18030'
														
 
															-    return encoding
														
 
															-
														
 
															-
														
 
															-# 读取数据到df
														
 
															-def read_file_to_df(file_path, read_cols=list(), header=0):
														
 
															-    df = pd.DataFrame()
														
 
															-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
														
 
															-        encoding = detect_file_encoding(file_path)
														
 
															-        end_with_gz = str(file_path).lower().endswith("gz")
														
 
															-        if read_cols:
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
														
 
															-        else:
														
 
															-
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
														
 
															-
														
 
															-    else:
														
 
															-        xls = pd.ExcelFile(file_path)
														
 
															-        # 获取所有的sheet名称
														
 
															-        sheet_names = xls.sheet_names
														
 
															-        for sheet in sheet_names:
														
 
															-            if read_cols:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
														
 
															-            else:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-    # 读取所有文件
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def read_status(status_path):
														
 
															-    all_files = read_excel_files(status_path)
														
 
															-
														
 
															-    with Pool(20) as pool:
														
 
															-        dfs = pool.starmap(read_file_to_df, [(file, ['设备名称', '状态码', '开始时间'], 2) for file in all_files])
														
 
															-
														
 
															-    df = pd.concat(dfs)
														
 
															-    df = df[df['状态码'].isin([3, 5])]
														
 
															-    df['开始时间'] = pd.to_datetime(df['开始时间'])
														
 
															-
														
 
															-    df['处理后时间'] = (df['开始时间'] + pd.Timedelta(minutes=10)).apply(
														
 
															-        lambda x: f"{x.year}-{str(x.month).zfill(2)}-{str(x.day).zfill(2)} {str(x.hour).zfill(2)}:{x.minute // 10}0:00")
														
 
															-
														
 
															-    df['处理后时间'] = pd.to_datetime(df['处理后时间'])
														
 
															-    df = df[(df['处理后时间'] >= '2023-09-01 00:00:00')]
														
 
															-    df[df['处理后时间'] >= '2024-09-01 00:00:00'] = '2024-09-01 00:00:00'
														
 
															-    df.sort_values(by=['设备名称', '处理后时间'], inplace=True)
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def read_fault_data(fault_path):
														
 
															-    all_files = read_excel_files(fault_path)
														
 
															-
														
 
															-    with Pool(20) as pool:
														
 
															-        dfs = pool.starmap(read_file_to_df, [(file, ['设备名称', '故障开始时间'], 2) for file in all_files])
														
 
															-
														
 
															-    df = pd.concat(dfs)
														
 
															-    df = df[df['设备名称'].str.startswith("#")]
														
 
															-    df['故障开始时间'] = pd.to_datetime(df['故障开始时间'])
														
 
															-
														
 
															-    df['处理后故障开始时间'] = (df['故障开始时间'] + pd.Timedelta(minutes=10)).apply(
														
 
															-        lambda x: f"{x.year}-{str(x.month).zfill(2)}-{str(x.day).zfill(2)} {str(x.hour).zfill(2)}:{x.minute // 10}0:00")
														
 
															-
														
 
															-    df['处理后故障开始时间'] = pd.to_datetime(df['处理后故障开始时间'])
														
 
															-    df = df[(df['处理后故障开始时间'] >= '2023-09-01 00:00:00') & (df['处理后故障开始时间'] < '2024-09-01 00:00:00')]
														
 
															-    df.sort_values(by=['设备名称', '处理后故障开始时间'], inplace=True)
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def read_10min_data(data_path):
														
 
															-    all_files = read_excel_files(data_path)
														
 
															-
														
 
															-    with Pool(20) as pool:
														
 
															-        dfs = pool.starmap(read_file_to_df,
														
 
															-                           [(file, ['设备名称', '时间', '平均风速(m/s)', '平均网侧有功功率(kW)'], 1) for file in all_files])
														
 
															-
														
 
															-    df = pd.concat(dfs)
														
 
															-    df['时间'] = pd.to_datetime(df['时间'])
														
 
															-
														
 
															-    df = df[(df['时间'] >= '2023-09-01 00:00:00') & (df['时间'] < '2024-09-01 00:00:00')]
														
 
															-    df.sort_values(by=['设备名称', '时间'], inplace=True)
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def select_data_and_save(name, fault_df, origin_df):
														
 
															-    df = pd.DataFrame()
														
 
															-    for i in range(fault_df.shape[0]):
														
 
															-        fault = fault_df.iloc[i]
														
 
															-        con1 = origin_df['时间'] >= fault['处理后故障开始时间']
														
 
															-        con2 = origin_df['时间'] <= fault['结束时间']
														
 
															-        df = pd.concat([df, origin_df[con1 & con2]])
														
 
															-
														
 
															-    name = name.replace('#', 'F')
														
 
															-    df.drop_duplicates(inplace=True)
														
 
															-    df.to_csv(save_path + sep + name + '.csv', index=False, encoding='utf8')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    base_path = r'/data/download/白玉山/需要整理的数据'
														
 
															-    save_path = base_path + sep + 'sele_data_202409261135'
														
 
															-    create_file_path(save_path)
														
 
															-    status_df = read_status(base_path + sep + '设备状态')
														
 
															-    fault_df = read_fault_data(base_path + sep + '故障')
														
 
															-    data_df = read_10min_data(base_path + sep + '十分钟')
														
 
															-
														
 
															-    status_df.to_csv(base_path + sep + '设备状态' + '.csv', index=False, encoding='utf8')
														
 
															-    fault_df.to_csv(base_path + sep + '故障' + '.csv', index=False, encoding='utf8')
														
 
															-    data_df.to_csv(base_path + sep + '十分钟' + '.csv', index=False, encoding='utf8')
														
 
															-
														
 
															-    print(status_df.shape)
														
 
															-    print(fault_df.shape)
														
 
															-    print(data_df.shape)
														
 
															-
														
 
															-    fault_list = list()
														
 
															-    for i in range(fault_df.shape[0]):
														
 
															-        data = fault_df.iloc[i]
														
 
															-        con1 = status_df['设备名称'] == data['设备名称']
														
 
															-        con2 = status_df['处理后时间'] >= data['处理后故障开始时间']
														
 
															-        fault_list.append(status_df[con1 & con2]['处理后时间'].min())
														
 
															-    fault_df['结束时间'] = fault_list
														
 
															-
														
 
															-    status_df.to_csv(base_path + sep + '设备状态' + '.csv', index=False, encoding='utf8')
														
 
															-    fault_df.to_csv(base_path + sep + '故障' + '.csv', index=False, encoding='utf8')
														
 
															-    data_df.to_csv(base_path + sep + '十分钟' + '.csv', index=False, encoding='utf8')
														
 
															-
														
 
															-    names = set(fault_df['设备名称'])
														
 
															-    fault_map = dict()
														
 
															-    data_map = dict()
														
 
															-    for name in names:
														
 
															-        fault_map[name] = fault_df[fault_df['设备名称'] == name]
														
 
															-        data_map[name] = data_df[data_df['设备名称'] == name]
														
 
															-
														
 
															-    with Pool(20) as pool:
														
 
															-        pool.starmap(select_data_and_save, [(name, fault_map[name], data_map[name]) for name in names])
														
--- a/tmp_file/changing_hebing_guzhang.py
+++ b/tmp_file/changing_hebing_guzhang.py
@@ -1,48 +0,0 @@
 
															-import copy
														
 
															-import datetime
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-read_path = r'D:\data\长清\故障记录_20230420_20240419.csv'
														
 
															-
														
 
															-df = pd.read_csv(read_path, encoding='gb18030')
														
 
															-
														
 
															-df['风机名'] = df['风机名'].apply(lambda wind_name: 'A' + wind_name.replace('号风机', '').zfill(2))
														
 
															-
														
 
															-df = df[~df['状态码描述'].isin(['高偏航误差穿越', '手动偏航'])]
														
 
															-
														
 
															-df['激活时间'] = pd.to_datetime(df['激活时间'].apply(lambda x: x[0:x.rfind(":")]), errors='coerce')
														
 
															-df['复位时间'] = pd.to_datetime(df['复位时间'].apply(lambda x: x[0:x.rfind(":")]), errors='coerce')
														
 
															-
														
 
															-df.dropna(subset=['激活时间', '复位时间'], inplace=True)
														
 
															-
														
 
															-
														
 
															-def generate_next_10_min(dt):
														
 
															-    minute = dt.minute
														
 
															-    chazhi = 10 - int(minute % 10)
														
 
															-    now = dt + datetime.timedelta(minutes=chazhi)
														
 
															-    now = now.replace(second=0, microsecond=0)
														
 
															-
														
 
															-    return now
														
 
															-
														
 
															-
														
 
															-df['begin_time'] = df['激活时间'].apply(generate_next_10_min)
														
 
															-df['end_time'] = df['复位时间'].apply(generate_next_10_min)
														
 
															-
														
 
															-df['chazhi_count'] = ((df['end_time'] - df['begin_time']).dt.seconds) // 600 + 1
														
 
															-
														
 
															-result_df = df[df['chazhi_count'] == 1]
														
 
															-
														
 
															-datas = [[]]
														
 
															-for index, row in df[df['chazhi_count'] > 1].iterrows():
														
 
															-    for i in range(row['chazhi_count']):
														
 
															-        data = copy.deepcopy(row.values)
														
 
															-        data[6] = data[6] + datetime.timedelta(minutes=10 * i)
														
 
															-        datas.append(data)
														
 
															-
														
 
															-now_df = pd.DataFrame(datas, columns=df.columns)
														
 
															-result_df = pd.concat([result_df, now_df])
														
 
															-
														
 
															-result_df.reset_index(inplace=True, drop=True)
														
 
															-result_df.sort_values(by=['风机名', '激活时间', 'begin_time'], inplace=True)
														
 
															-result_df.to_csv("故障记录.csv", encoding='utf8')
														
--- a/tmp_file/cp_online_data_to_other.py
+++ b/tmp_file/cp_online_data_to_other.py
@@ -1,94 +0,0 @@
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-import shutil
														
 
															-from os import *
														
 
															-
														
 
															-not_move_dir = ["乌梅山风电场-江西-大唐",
														
 
															-                "诺木洪风电场-甘肃-华电",
														
 
															-                "平陆风电场-山西-中广核",
														
 
															-                "泗洪协合风电场-安徽-深能南控",
														
 
															-                "诺木洪风电场-青海-华电",
														
 
															-                "长清风电场-山东-国电"
														
 
															-                ]
														
 
															-
														
 
															-read_dir = r"/data/download/collection_data"
														
 
															-# read_dir = r'Z:\collection_data'
														
 
															-save_base_dir = r"/data/download/datang_shangxian"
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        if item not in not_move_dir:
														
 
															-            item_path = path.join(path, item)
														
 
															-            if path.isdir(item_path):
														
 
															-                __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-            elif path.isfile(item_path):
														
 
															-                if path not in directory_dict:
														
 
															-                    directory_dict[path] = []
														
 
															-
														
 
															-                if filter_types is None or len(filter_types) == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-                elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                    if str(item_path).count("~$") == 0:
														
 
															-                        directory_dict[path].append(item_path)
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    if path.isfile(read_path):
														
 
															-        return [read_path]
														
 
															-
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的文件
														
 
															-def read_files(read_path):
														
 
															-    if path.isfile(read_path):
														
 
															-        return [read_path]
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    """
														
 
															-    创建路径
														
 
															-    :param path:创建文件夹的路径
														
 
															-    :param is_file_path: 传入的path是否包含具体的文件名
														
 
															-    """
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def copy_to_new(from_path):
														
 
															-    to_path = from_path.replace(read_dir, save_base_dir)
														
 
															-    is_file = False
														
 
															-    if to_path.count('.') > 0:
														
 
															-        is_file = True
														
 
															-
														
 
															-    create_file_path(to_path, is_file_path=is_file)
														
 
															-
														
 
															-    shutil.copy(from_path, to_path)
														
 
															-
														
 
															-
														
 
															-print("开始:", datetime.datetime.now())
														
 
															-begin = datetime.datetime.now()
														
 
															-read_all_files = [i for i in read_files(read_dir) if i.find("收资数据") > -1]
														
 
															-print(len(read_all_files))
														
 
															-print("统计耗时:", datetime.datetime.now() - begin)
														
 
															-cp_begin = datetime.datetime.now()
														
 
															-
														
 
															-with multiprocessing.Pool(40) as pool:
														
 
															-    pool.starmap(copy_to_new, [(path,) for path in read_all_files])
														
 
															-
														
 
															-print(len(read_all_files), "耗时:", datetime.datetime.now() - cp_begin, "总耗时:", datetime.datetime.now() - begin)
														
 
															-print("结束:", datetime.datetime.now())
														
--- a/tmp_file/curge_read.py
+++ b/tmp_file/curge_read.py
@@ -1,47 +0,0 @@
 
															-import os
														
 
															-
														
 
															-import chardet
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding.lower() in ['utf-8', 'ascii', 'utf8', 'utf-8-sig']:
														
 
															-        return 'utf-8'
														
 
															-
														
 
															-    return 'gb18030'
														
 
															-
														
 
															-
														
 
															-def read_file_to_df(file_path, nrows=None):
														
 
															-    df = pd.DataFrame()
														
 
															-    try:
														
 
															-        if str(file_path).lower().endswith("csv"):
														
 
															-            encoding = detect_file_encoding(file_path)
														
 
															-            df = pd.read_csv(file_path, encoding=encoding, on_bad_lines='warn', nrows=nrows)
														
 
															-        else:
														
 
															-            xls = pd.ExcelFile(file_path)
														
 
															-            sheet_names = xls.sheet_names
														
 
															-            for sheet_name in sheet_names:
														
 
															-                now_df = pd.read_excel(xls, sheet_name=sheet_name, nrows=nrows)
														
 
															-                now_df['sheet_name'] = sheet_name
														
 
															-                df = pd.concat([df, now_df])
														
 
															-            xls.close()
														
 
															-    except Exception as e:
														
 
															-        message = '文件:' + os.path.basename(file_path) + ',' + str(e)
														
 
															-        raise ValueError(message)
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    df = read_file_to_df(r"D:\data\11-12月.xls")
														
 
															-    print(df)
														
--- a/tmp_file/error_ms_data.py
+++ b/tmp_file/error_ms_data.py
@@ -1,40 +0,0 @@
 
															-from datetime import datetime
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-def convert_date(date_str):
														
 
															-    cut_index = str(date_str).rfind("_")
														
 
															-    date = date_str[0:cut_index].replace("_", "-")
														
 
															-    time = date_str[cut_index + 1:].replace(":", ".")
														
 
															-
														
 
															-    return datetime.strptime(f"{date} {time}", '%Y-%m-%d %H.%M.%S.%f')
														
 
															-
														
 
															-
														
 
															-df = pd.read_csv(r"d:/data/b2_240828_2324_Err 1.csv", header=1)
														
 
															-df.dropna(subset='TimeStamp', inplace=True)
														
 
															-df.drop_duplicates(subset='TimeStamp', keep="first", inplace=True)
														
 
															-
														
 
															-origin_columns = list(df.columns)
														
 
															-
														
 
															-df['TimeStamp1'] = df['TimeStamp'].apply(convert_date)
														
 
															-df.sort_values(by='TimeStamp1', inplace=True)
														
 
															-
														
 
															-# df['DateTime'] = pd.to_datetime(df['TimeStamp'], format="%Y-%m-%d %H:%M:%S")
														
 
															-df['DateTime'] = df['TimeStamp1'].apply(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
														
 
															-
														
 
															-print(df.shape)
														
 
															-
														
 
															-dateTime_count = df['DateTime'].value_counts()
														
 
															-
														
 
															-dateTime_count_1 = dateTime_count[dateTime_count == 1]
														
 
															-dateTime_count_gt1 = dateTime_count[dateTime_count > 1]
														
 
															-
														
 
															-df1 = df[df['DateTime'].isin(dateTime_count_1.index.values)]
														
 
															-df2 = df[df['DateTime'].isin(dateTime_count_gt1.index.values)]
														
 
															-
														
 
															-print(df1.shape)
														
 
															-print(df2.shape)
														
 
															-origin_columns.insert(0, 'DateTime')
														
 
															-df1.to_csv("1秒数据.csv", encoding='utf-8', index=False, columns=origin_columns, date_format="%Y-%m-%d %H:%M:%S.%f")
														
 
															-df2.to_csv("毫秒数据.csv", encoding='utf-8', index=False, columns=origin_columns, date_format="%Y-%m-%d %H:%M:%S.%f")
														
--- a/tmp_file/fengxiang_fengdianchang.py
+++ b/tmp_file/fengxiang_fengdianchang.py
@@ -1,57 +0,0 @@
 
															-import sys
														
 
															-from multiprocessing import Pool
														
 
															-from os import path
														
 
															-path = path.dirname(path.dirname(path.abspath(__file__)))
														
 
															-print(path)
														
 
															-sys.path.insert(0, path)
														
 
															-print(sys.path)
														
 
															-
														
 
															-from utils.file.trans_methods import *
														
 
															-from utils.systeminfo.sysinfo import use_files_get_max_cpu_count
														
 
															-
														
 
															-
														
 
															-def read_and_save_file(filename):
														
 
															-    try:
														
 
															-        basename = path.basename(filename)
														
 
															-        wind_number = basename.split("_")[0]
														
 
															-        df = read_file_to_df(filename, header=1)
														
 
															-        df['风机号'] = wind_number
														
 
															-        df['描述'] = pd.to_datetime(df['描述'], format='%d-%m-%Y %H:%M:%S')
														
 
															-        df.set_index(keys=['描述', '风机号'], inplace=True)
														
 
															-        return wind_number, df
														
 
															-    except Exception as e:
														
 
															-        print(basename, 'error')
														
 
															-        raise e
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    read_path = r'/data/download/collection_data/1进行中/枫香风电场-贵州-大唐/收资数据/枫香风电场收资表/1.10分钟SCADA数据'
														
 
															-    save_path = r'/data/download/collection_data/1进行中/枫香风电场-贵州-大唐/清理数据/枫香风电场收资表/1.10分钟SCADA数据'
														
 
															-    # read_path = r'D:\trans_data\枫香\收资数据\min'
														
 
															-    # save_path = r'D:\trans_data\枫香\清理数据\min'
														
 
															-    create_file_path(save_path, False)
														
 
															-    all_fils = read_excel_files(read_path)
														
 
															-    process_count = use_files_get_max_cpu_count(all_fils)
														
 
															-
														
 
															-    with Pool(process_count) as pool:
														
 
															-        results = pool.starmap(read_and_save_file, [(i,) for i in all_fils])
														
 
															-
														
 
															-    df_dict = dict()
														
 
															-    for result in results:
														
 
															-        wind_number, df = result
														
 
															-        cols = list(df.columns)
														
 
															-        cols.sort()
														
 
															-        cols_str = '-'.join(cols)
														
 
															-        if wind_number in df_dict.keys():
														
 
															-            if cols_str in df_dict[wind_number].keys():
														
 
															-                df_dict[wind_number][cols_str] = pd.concat([df_dict[wind_number][cols_str], df], axis=0)
														
 
															-            else:
														
 
															-                df_dict[wind_number][cols_str] = df
														
 
															-        else:
														
 
															-            df_dict[wind_number] = {cols_str: df}
														
 
															-
														
 
															-    for wind_number, cols_dict in df_dict.items():
														
 
															-        df = pd.concat(cols_dict.values(), axis=1)
														
 
															-        df.sort_index(inplace=True)
														
 
															-        df.reset_index(inplace=True)
														
 
															-        df.to_csv(path.join(save_path, f"{wind_number}.csv"), encoding="utf-8", index=False)
														
--- a/tmp_file/filter_lose_data.py
+++ b/tmp_file/filter_lose_data.py
@@ -1,48 +0,0 @@
 
															-import datetime
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-df = pd.read_csv("D:\data\白玉山后评估数据资料\十分钟.csv", encoding='utf8')
														
 
															-
														
 
															-df['时间'] = pd.to_datetime(df['时间'])
														
 
															-df['plus_10min'] = df['时间'] + pd.Timedelta(minutes=10)
														
 
															-
														
 
															-names = set(df['设备名称'])
														
 
															-
														
 
															-
														
 
															-def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
														
 
															-    """
														
 
															-    获取俩个时间之间的个数
														
 
															-    :return: 查询时间间隔
														
 
															-    """
														
 
															-    delta = end_time - start_time
														
 
															-    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
														
 
															-
														
 
															-    return abs(int(total_seconds / time_space))
														
 
															-
														
 
															-
														
 
															-result_dict = dict()
														
 
															-for name in names:
														
 
															-    q_df = df[df['设备名称'] == name]
														
 
															-    q_df['unshift'] = q_df['时间'].shift(-1)
														
 
															-    q_df.fillna('2024-09-01 00:00:00', inplace=True)
														
 
															-    result_df = q_df[~(q_df['plus_10min'] == q_df['unshift'])]
														
 
															-    result_df.reset_index(inplace=True)
														
 
															-    q_list = list()
														
 
															-    count = 0
														
 
															-    result_df.to_csv('test.csv', encoding='utf8')
														
 
															-    for i in range(result_df.shape[0]):
														
 
															-        data = result_df.iloc[i]
														
 
															-        begin = data['时间']
														
 
															-        end = data['unshift']
														
 
															-        count = count + get_time_space_count(begin, end, 600) - 1
														
 
															-        # if end is not None and end != np.nan:
														
 
															-        #     q_list.append(f"{begin} ~ {end}")
														
 
															-
														
 
															-    result_dict[name] = count
														
 
															-
														
 
															-with open("缺失_数量.csv", 'w', encoding='utf8') as f:
														
 
															-    for k, v in result_dict.items():
														
 
															-        # v.insert(0, k)
														
 
															-        # f.write(",".join(v) + "\n")
														
 
															-        f.write(f"{k},{v}\n")
														
--- a/tmp_file/gradio_web.py
+++ b/tmp_file/gradio_web.py
@@ -1,205 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Time    : 2024/6/3
														
 
															-# @Author  : 魏志亮
														
 
															-import copy
														
 
															-
														
 
															-import gradio as gr
														
 
															-import yaml
														
 
															-
														
 
															-from service.plt_service import get_all_wind_company
														
 
															-from service.trans_service import get_min_sec_conf_test
														
 
															-
														
 
															-
														
 
															-# from utils.db.trans_mysql import *
														
 
															-
														
 
															-
														
 
															-def test_click(wind_name, wind_full_name, type, is_vertical_table, merge_columns, vertical_read_cols,
														
 
															-               vertical_index_cols, vertical_col_key, vertical_col_value, resolve_col_prefix, wind_name_exec,
														
 
															-               wind_turbine_number, time_stamp, active_power, rotor_speed, generator_speed, wind_velocity,
														
 
															-               pitch_angle_blade_1, pitch_angle_blade_2, pitch_angle_blade_3, cabin_position, true_wind_direction,
														
 
															-               yaw_error1, set_value_of_active_power, gearbox_oil_temperature, generatordrive_end_bearing_temperature,
														
 
															-               generatornon_drive_end_bearing_temperature, wind_turbine_status, wind_turbine_status2, cabin_temperature,
														
 
															-               twisted_cable_angle, front_back_vibration_of_the_cabin, side_to_side_vibration_of_the_cabin,
														
 
															-               actual_torque, given_torque, clockwise_yaw_count, counterclockwise_yaw_count, unusable,
														
 
															-               power_curve_available, required_gearbox_speed, inverter_speed_master_control, outside_cabin_temperature,
														
 
															-               main_bearing_temperature, gearbox_high_speed_shaft_bearing_temperature,
														
 
															-               gearboxmedium_speed_shaftbearing_temperature, gearbox_low_speed_shaft_bearing_temperature,
														
 
															-               generator_winding1_temperature, generator_winding2_temperature, generator_winding3_temperature,
														
 
															-               turbulence_intensity, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10
														
 
															-               ):
														
 
															-    params = copy.deepcopy(vars())
														
 
															-
														
 
															-    error_message = ""
														
 
															-    if wind_name is None or wind_name.strip() == '':
														
 
															-        error_message += "风机名称必选"
														
 
															-        gr.Warning(error_message)
														
 
															-        return error_message
														
 
															-
														
 
															-    if wind_full_name is None or wind_full_name.strip() == '':
														
 
															-        error_message += "风机全称必选"
														
 
															-        gr.Warning(error_message)
														
 
															-        return error_message
														
 
															-
														
 
															-    # save_to_trans_conf(params)
														
 
															-    return yaml.dump(vars(), allow_unicode=True, sort_keys=False)
														
 
															-
														
 
															-
														
 
															-def fill_data(wind_name, type):
														
 
															-    select_cols = ['wind_full_name', 'is_vertical_table', 'merge_columns', 'vertical_read_cols',
														
 
															-                   'vertical_index_cols', 'vertical_col_key', 'vertical_col_value', 'resolve_col_prefix',
														
 
															-                   'wind_name_exec',
														
 
															-                   'wind_turbine_number', 'time_stamp', 'active_power', 'rotor_speed', 'generator_speed',
														
 
															-                   'wind_velocity', 'pitch_angle_blade_1', 'pitch_angle_blade_2', 'pitch_angle_blade_3',
														
 
															-                   'cabin_position', 'true_wind_direction', 'yaw_error1', 'set_value_of_active_power',
														
 
															-                   'gearbox_oil_temperature', 'generatordrive_end_bearing_temperature',
														
 
															-                   'generatornon_drive_end_bearing_temperature', 'wind_turbine_status', 'wind_turbine_status2',
														
 
															-                   'cabin_temperature', 'twisted_cable_angle', 'front_back_vibration_of_the_cabin',
														
 
															-                   'side_to_side_vibration_of_the_cabin', 'actual_torque', 'given_torque', 'clockwise_yaw_count',
														
 
															-                   'counterclockwise_yaw_count', 'unusable', 'power_curve_available', 'required_gearbox_speed',
														
 
															-                   'inverter_speed_master_control', 'outside_cabin_temperature', 'main_bearing_temperature',
														
 
															-                   'gearbox_high_speed_shaft_bearing_temperature', 'gearboxmedium_speed_shaftbearing_temperature',
														
 
															-                   'gearbox_low_speed_shaft_bearing_temperature', 'generator_winding1_temperature',
														
 
															-                   'generator_winding2_temperature', 'generator_winding3_temperature', 'turbulence_intensity', 'param1',
														
 
															-                   'param2', 'param3', 'param4', 'param5', 'param6', 'param7', 'param8', 'param9', 'param10']
														
 
															-    print(wind_name, type)
														
 
															-    df = get_min_sec_conf_test(wind_name, type)
														
 
															-    print(df)
														
 
															-    if df.keys() == 0:
														
 
															-        return [''] * len(select_cols)
														
 
															-    result = []
														
 
															-    for col in select_cols:
														
 
															-        result.append(df[col])
														
 
															-    return result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7], \
														
 
															-        result[8], result[9], \
														
 
															-        result[10], result[11], result[12], result[13], result[14], result[15], result[16], result[17], result[18], \
														
 
															-        result[19], result[20], result[21], result[22], result[23], result[24], result[25], result[26], result[27], \
														
 
															-        result[28], result[29], result[30], result[31], result[32], result[33], result[34], result[35], result[36], \
														
 
															-        result[37], result[38], result[39], result[40], result[41], result[42], result[43], result[44], result[45], \
														
 
															-        result[46], result[47], result[48], result[49], result[50], result[51], result[52], result[53], result[54], \
														
 
															-        result[55], result[56], result[57]
														
 
															-
														
 
															-
														
 
															-with gr.Blocks(css=".container.svelte-1sk0pyu.svelte-1sk0pyu {width: 300px}", title='中能智能') as demo:
														
 
															-    wind_name = gr.Dropdown(label="电场名称", choices=get_all_wind_company())
														
 
															-
														
 
															-    types = {
														
 
															-        '分钟映射': 'minute', '秒映射': 'second'
														
 
															-    }
														
 
															-
														
 
															-    for name in types.keys():
														
 
															-        with gr.Tab(label=name):
														
 
															-            type = gr.Text(label="映射类型", value=types[name], visible=False)
														
 
															-            wind_full_name = gr.Textbox(label="完整的电场名称")
														
 
															-            merge_columns = gr.Checkbox(label="是否需合并(多个excel列合并成一个才需要选择)", value=False)
														
 
															-            is_vertical_table = gr.Checkbox(label="是否是竖表", value=False)
														
 
															-            vertical_read_cols = gr.Textbox(label="竖表--读取的字段", placeholder="逗号分隔")
														
 
															-            vertical_index_cols = gr.Textbox(label="竖表--分组的字段", placeholder="逗号分隔,一般都是时间,机组")
														
 
															-            vertical_col_key = gr.Textbox(label="竖表--数据点字段")
														
 
															-            vertical_col_value = gr.Textbox(label="竖表--数据点数值")
														
 
															-            resolve_col_prefix = gr.Textbox(label="处理列名",
														
 
															-                                            placeholder="比如重庆海装 25_#桨距角,只需要 桨距角 可以用 column[column.find('#')+1:]")
														
 
															-
														
 
															-            wind_name_exec = gr.Textbox(label="风机编号代码处理",
														
 
															-                                        placeholder="比如 昌平001号风机,可以配置 wind_name.replace('昌平','').replace('号风机','')")
														
 
															-
														
 
															-            wind_turbine_number = gr.Textbox(label="风机编号(wind_turbine_number)")
														
 
															-            time_stamp = gr.Textbox(label="时间戳(time_stamp)")
														
 
															-            active_power = gr.Textbox(label="有功功率(active_power)")
														
 
															-            rotor_speed = gr.Textbox(label="风轮转速(rotor_speed)")
														
 
															-            generator_speed = gr.Textbox(label="发电机转速(generator_speed)")
														
 
															-            wind_velocity = gr.Textbox(label="风速(wind_velocity)")
														
 
															-            pitch_angle_blade_1 = gr.Textbox(label="桨距角1(pitch_angle_blade_1)")
														
 
															-            pitch_angle_blade_2 = gr.Textbox(label="桨距角2(pitch_angle_blade_2)")
														
 
															-            pitch_angle_blade_3 = gr.Textbox(label="桨距角3(pitch_angle_blade_3)")
														
 
															-            cabin_position = gr.Textbox(label="机舱位置(cabin_position)")
														
 
															-            true_wind_direction = gr.Textbox(label="绝对风向(true_wind_direction)")
														
 
															-            yaw_error1 = gr.Textbox(label="对风角度(yaw_error1)")
														
 
															-            set_value_of_active_power = gr.Textbox(label="有功功率设定值(set_value_of_active_power)")
														
 
															-            gearbox_oil_temperature = gr.Textbox(label="齿轮箱油温(gearbox_oil_temperature)")
														
 
															-            generatordrive_end_bearing_temperature = gr.Textbox(
														
 
															-                label="发电机驱动端轴承温度(generatordrive_end_bearing_temperature)")
														
 
															-            generatornon_drive_end_bearing_temperature = gr.Textbox(
														
 
															-                label="发电机非驱动端轴承温度(generatornon_drive_end_bearing_temperature)")
														
 
															-            wind_turbine_status = gr.Textbox(label="风机状态1(wind_turbine_status)")
														
 
															-            wind_turbine_status2 = gr.Textbox(label="风机状态2(wind_turbine_status2)")
														
 
															-            cabin_temperature = gr.Textbox(label="机舱内温度(cabin_temperature)")
														
 
															-            twisted_cable_angle = gr.Textbox(label="扭缆角度(twisted_cable_angle)")
														
 
															-            front_back_vibration_of_the_cabin = gr.Textbox(label="机舱前后振动(front_back_vibration_of_the_cabin)")
														
 
															-            side_to_side_vibration_of_the_cabin = gr.Textbox(label="机舱左右振动(side_to_side_vibration_of_the_cabin)")
														
 
															-            actual_torque = gr.Textbox(label="实际力矩(actual_torque)")
														
 
															-            given_torque = gr.Textbox(label="给定力矩(given_torque)")
														
 
															-            clockwise_yaw_count = gr.Textbox(label="顺时针偏航次数(clockwise_yaw_count)")
														
 
															-            counterclockwise_yaw_count = gr.Textbox(label="逆时针偏航次数(counterclockwise_yaw_count)")
														
 
															-            unusable = gr.Textbox(label="不可利用(unusable)")
														
 
															-            power_curve_available = gr.Textbox(label="功率曲线可用(power_curve_available)")
														
 
															-            required_gearbox_speed = gr.Textbox(label="齿轮箱转速(required_gearbox_speed)")
														
 
															-            inverter_speed_master_control = gr.Textbox(label="变频器转速(主控)(inverter_speed_master_control)")
														
 
															-            outside_cabin_temperature = gr.Textbox(label="环境温度(outside_cabin_temperature)")
														
 
															-            main_bearing_temperature = gr.Textbox(label="主轴承轴承温度(main_bearing_temperature)")
														
 
															-            gearbox_high_speed_shaft_bearing_temperature = gr.Textbox(
														
 
															-                label="齿轮箱高速轴轴承温度(gearbox_high_speed_shaft_bearing_temperature)")
														
 
															-            gearboxmedium_speed_shaftbearing_temperature = gr.Textbox(
														
 
															-                label="齿轮箱中速轴轴承温度(gearboxmedium_speed_shaftbearing_temperature)")
														
 
															-            gearbox_low_speed_shaft_bearing_temperature = gr.Textbox(
														
 
															-                label="齿轮箱低速轴轴承温度(gearbox_low_speed_shaft_bearing_temperature)")
														
 
															-            generator_winding1_temperature = gr.Textbox(label="发电机绕组1温度(generator_winding1_temperature)")
														
 
															-            generator_winding2_temperature = gr.Textbox(label="发电机绕组2温度(generator_winding2_temperature)")
														
 
															-            generator_winding3_temperature = gr.Textbox(label="发电机绕组3温度(generator_winding3_temperature)")
														
 
															-            turbulence_intensity = gr.Textbox(label="湍流强度(turbulence_intensity)")
														
 
															-            param1 = gr.Textbox(label="齿轮箱油压(param1)")
														
 
															-            param2 = gr.Textbox(label="预留字段2(param2)")
														
 
															-            param3 = gr.Textbox(label="预留字段3(param3)")
														
 
															-            param4 = gr.Textbox(label="预留字段4(param4)")
														
 
															-            param5 = gr.Textbox(label="预留字段5(param5)")
														
 
															-            param6 = gr.Textbox(label="预留字段6(param6)")
														
 
															-            param7 = gr.Textbox(label="预留字段7(param7)")
														
 
															-            param8 = gr.Textbox(label="预留字段8(param8)")
														
 
															-            param9 = gr.Textbox(label="预留字段9(param9)")
														
 
															-            param10 = gr.Textbox(label="预留字段10(param10)")
														
 
															-
														
 
															-            button = gr.Button(value="提交")
														
 
															-            result = gr.Textbox(label="结果")
														
 
															-
														
 
															-            button.click(fn=test_click,
														
 
															-                         inputs=[wind_name, wind_full_name, type, is_vertical_table, merge_columns, vertical_read_cols,
														
 
															-                                 vertical_index_cols, vertical_col_key, vertical_col_value, resolve_col_prefix,
														
 
															-                                 wind_name_exec, wind_turbine_number, time_stamp, active_power, rotor_speed,
														
 
															-                                 generator_speed, wind_velocity, pitch_angle_blade_1, pitch_angle_blade_2,
														
 
															-                                 pitch_angle_blade_3, cabin_position, true_wind_direction, yaw_error1,
														
 
															-                                 set_value_of_active_power, gearbox_oil_temperature,
														
 
															-                                 generatordrive_end_bearing_temperature, generatornon_drive_end_bearing_temperature,
														
 
															-                                 wind_turbine_status, wind_turbine_status2, cabin_temperature, twisted_cable_angle,
														
 
															-                                 front_back_vibration_of_the_cabin, side_to_side_vibration_of_the_cabin, actual_torque,
														
 
															-                                 given_torque, clockwise_yaw_count, counterclockwise_yaw_count, unusable,
														
 
															-                                 power_curve_available, required_gearbox_speed, inverter_speed_master_control,
														
 
															-                                 outside_cabin_temperature, main_bearing_temperature,
														
 
															-                                 gearbox_high_speed_shaft_bearing_temperature,
														
 
															-                                 gearboxmedium_speed_shaftbearing_temperature,
														
 
															-                                 gearbox_low_speed_shaft_bearing_temperature, generator_winding1_temperature,
														
 
															-                                 generator_winding2_temperature, generator_winding3_temperature, turbulence_intensity,
														
 
															-                                 param1, param2, param3, param4, param5, param6, param7, param8, param9, param10
														
 
															-                                 ], outputs=[result])
														
 
															-            wind_name.change(fill_data, inputs=[wind_name, type],
														
 
															-                             outputs=[wind_full_name, is_vertical_table, merge_columns, vertical_read_cols,
														
 
															-                                      vertical_index_cols, vertical_col_key, vertical_col_value, resolve_col_prefix,
														
 
															-                                      wind_name_exec, wind_turbine_number, time_stamp, active_power, rotor_speed,
														
 
															-                                      generator_speed, wind_velocity, pitch_angle_blade_1, pitch_angle_blade_2,
														
 
															-                                      pitch_angle_blade_3, cabin_position, true_wind_direction, yaw_error1,
														
 
															-                                      set_value_of_active_power, gearbox_oil_temperature,
														
 
															-                                      generatordrive_end_bearing_temperature,
														
 
															-                                      generatornon_drive_end_bearing_temperature,
														
 
															-                                      wind_turbine_status, wind_turbine_status2, cabin_temperature, twisted_cable_angle,
														
 
															-                                      front_back_vibration_of_the_cabin, side_to_side_vibration_of_the_cabin,
														
 
															-                                      actual_torque,
														
 
															-                                      given_torque, clockwise_yaw_count, counterclockwise_yaw_count, unusable,
														
 
															-                                      power_curve_available, required_gearbox_speed, inverter_speed_master_control,
														
 
															-                                      outside_cabin_temperature, main_bearing_temperature,
														
 
															-                                      gearbox_high_speed_shaft_bearing_temperature,
														
 
															-                                      gearboxmedium_speed_shaftbearing_temperature,
														
 
															-                                      gearbox_low_speed_shaft_bearing_temperature, generator_winding1_temperature,
														
 
															-                                      generator_winding2_temperature, generator_winding3_temperature,
														
 
															-                                      turbulence_intensity,
														
 
															-                                      param1, param2, param3, param4, param5, param6, param7, param8, param9, param10])
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    demo.launch(server_name='0.0.0.0', server_port=7860, auth=('znzn', "znzn123"))
														
--- a/tmp_file/hebing_matlib_result.py
+++ b/tmp_file/hebing_matlib_result.py
@@ -1,28 +0,0 @@
 
															-from os import *
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-read_path = r"D:\data\电量损失及散点图"
														
 
															-df = pd.DataFrame()
														
 
															-
														
 
															-cols = ['风机', '应发电量', '实发电量', '停机损失电量', '坏点+限电损失电量', '性能损失电量', '坏点损失电量', '限电损失电量', '超发电量', '应发电量百分比', '实发电量百分比',
														
 
															-        '停机损失电量百分比', '坏点+限电损失电量百分比', '性能损失电量百分比', '坏点损失电量百分比', '限电损失电量百分比', '超发电量百分比', '平均风速', '可利用率']
														
 
															-
														
 
															-for root, dir, files in walk(read_path):
														
 
															-    if files:
														
 
															-        base_name = path.basename(root)
														
 
															-        wind_df = pd.DataFrame()
														
 
															-        print(root)
														
 
															-        df1 = pd.read_excel(path.join(root, "EPPer.xls"), usecols=['应发电量百分比', '实发电量百分比',
														
 
															-                                                                     '停机损失电量百分比', '坏点+限电损失电量百分比', '性能损失电量百分比',
														
 
															-                                                                     '坏点损失电量百分比',
														
 
															-                                                                     '限电损失电量百分比', '超发电量百分比', '平均风速', '可利用率'])
														
 
															-        df2 = pd.read_excel(path.join(root, "EPKW.xls"),
														
 
															-                            usecols=['应发电量', '实发电量', '停机损失电量', '坏点+限电损失电量', '性能损失电量', '坏点损失电量', '限电损失电量', '超发电量'])
														
 
															-        wind_df = pd.concat([df1, df2], axis=1)
														
 
															-        wind_df['风机'] = base_name
														
 
															-        wind_df.reset_index(inplace=True)
														
 
															-        print(wind_df.columns)
														
 
															-        df = pd.concat([df, wind_df], ignore_index=True)
														
 
															-
														
 
															-df.to_csv("合并结果.csv", index=False, encoding='utf8', columns=cols)
														
--- a/tmp_file/hebing_muti_batch.py
+++ b/tmp_file/hebing_muti_batch.py
@@ -1,77 +0,0 @@
 
															-import multiprocessing
														
 
															-import sys
														
 
															-from os import *
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-sys.path.insert(0, path.abspath(__file__).split("tmp_file")[0])
														
 
															-
														
 
															-
														
 
															-def hebing_and_save(new_batch_save_path, name, paths):
														
 
															-    df = pd.DataFrame()
														
 
															-    for path in paths:
														
 
															-        now_df = read_file_to_df(path)
														
 
															-        df = pd.concat([df, now_df])
														
 
															-
														
 
															-    df.sort_values(by=['time_stamp'], inplace=True)
														
 
															-
														
 
															-    create_file_path(new_batch_save_path)
														
 
															-    df.to_csv(path.join(new_batch_save_path, name), index=False, encoding='utf8')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-
														
 
															-    env = 'prod'
														
 
															-    if len(sys.argv) >= 2:
														
 
															-        env = sys.argv[1]
														
 
															-
														
 
															-    from utils.conf.read_conf import yaml_conf
														
 
															-
														
 
															-    conf_path = path.abspath(__file__).split("tmp_file")[0] + f"/conf/etl_config_{env}.yaml"
														
 
															-    environ['ETL_CONF'] = conf_path
														
 
															-    yaml_config = yaml_conf(conf_path)
														
 
															-    environ['env'] = env
														
 
															-
														
 
															-    from utils.file.trans_methods import read_file_to_df, create_file_path
														
 
															-
														
 
															-    from etl.wind_power.fault_warn.FaultWarnTrans import FaultWarnTrans
														
 
															-    from etl.wind_power.min_sec.MinSecTrans import MinSecTrans
														
 
															-    from service.plt_service import get_hebing_data_by_batch_no_and_type
														
 
															-
														
 
															-    save_batch = 'WOF085500008-2-3'
														
 
															-    save_batch_name = '合并'
														
 
															-    trans_type = 'second'
														
 
															-    read_batchs = ['WOF085500008-WOB000003', 'WOF085500008-WOB000002']
														
 
															-    read_paths = list()
														
 
															-
														
 
															-    new_batch_save_path = ''
														
 
															-
														
 
															-    for read_data in read_batchs:
														
 
															-        data = get_hebing_data_by_batch_no_and_type(read_data, trans_type)
														
 
															-        save_db = True
														
 
															-
														
 
															-        exec_process = None
														
 
															-        if data['transfer_type'] in ['second', 'minute']:
														
 
															-            exec_process = MinSecTrans(data=data, save_db=save_db)
														
 
															-
														
 
															-        if data['transfer_type'] in ['fault', 'warn']:
														
 
															-            exec_process = FaultWarnTrans(data=data, save_db=save_db)
														
 
															-
														
 
															-        if exec_process is None:
														
 
															-            raise Exception("No exec process")
														
 
															-
														
 
															-        read_paths.append(exec_process.pathsAndTable.get_save_path())
														
 
															-        new_batch_save_path = path.join(exec_process.pathsAndTable.save_path, save_batch + "_" + save_batch_name,
														
 
															-                                           trans_type)
														
 
															-
														
 
															-    file_dict = dict()
														
 
															-
														
 
															-    for read_path in read_paths:
														
 
															-        for file in listdir(read_path):
														
 
															-            if file in file_dict:
														
 
															-                file_dict[file].append(path.join(read_path, file))
														
 
															-            else:
														
 
															-                file_dict[file] = [path.join(read_path, file)]
														
 
															-
														
 
															-    with multiprocessing.Pool(len(file_dict.keys())) as pool:
														
 
															-        pool.starmap(hebing_and_save, [(new_batch_save_path, name, paths) for name, paths in file_dict.items()])
														
--- a/tmp_file/organize_xinhua_files.py
+++ b/tmp_file/organize_xinhua_files.py
@@ -1,173 +0,0 @@
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-import warnings
														
 
															-from os import *
														
 
															-
														
 
															-import numpy as np
														
 
															-import pandas as pd
														
 
															-
														
 
															-warnings.filterwarnings("ignore")
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    if path.isfile(read_path):
														
 
															-        return [read_path]
														
 
															-
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    """
														
 
															-    创建路径
														
 
															-    :param path:创建文件夹的路径
														
 
															-    :param is_file_path: 传入的path是否包含具体的文件名
														
 
															-    """
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def boolean_is_check_data(df_cols):
														
 
															-    fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机', '工作模式', '风机自身故障停机', '限功率运行状态']
														
 
															-
														
 
															-    df_cols = [str(i).split('_')[-1] for i in df_cols]
														
 
															-    for fault in fault_list:
														
 
															-        if fault in df_cols:
														
 
															-            return True
														
 
															-
														
 
															-    return False
														
 
															-
														
 
															-
														
 
															-def read_fle_to_df(file_path):
														
 
															-    df = pd.read_excel(file_path)
														
 
															-    wind_name = [i for i in df.columns if i.find('_') > -1][0].split('_')[0]
														
 
															-    df.columns = [i.split('_')[-1] for i in df.columns]
														
 
															-    df['wind_name'] = wind_name
														
 
															-
														
 
															-    return boolean_is_check_data(df.columns), wind_name, df
														
 
															-
														
 
															-
														
 
															-def save_to_file(dfs, wind_name, save_path='', param='', is_check=False, all_cols=list(),
														
 
															-                 result_data_list=multiprocessing.Manager().list()):
														
 
															-    try:
														
 
															-        if is_check:
														
 
															-            df = pd.concat(dfs)
														
 
															-        else:
														
 
															-            df = dfs[0]
														
 
															-            for index, now_df in enumerate(dfs):
														
 
															-                if index > 0:
														
 
															-                    df = pd.merge(df, now_df, on=['采样时间', 'wind_name'], how='outer')
														
 
															-    except Exception as e:
														
 
															-        print(wind_name, e)
														
 
															-        raise e
														
 
															-
														
 
															-    df.reset_index(inplace=True)
														
 
															-    df.drop_duplicates(inplace=True, subset=['采样时间', 'wind_name'])
														
 
															-    if 'index' in df.columns:
														
 
															-        del df['index']
														
 
															-    create_file_path(save_path)
														
 
															-    df.sort_values(by='采样时间', inplace=True)
														
 
															-
														
 
															-    loss_cols = list([i for i in df.columns if i != 'wind_name'])
														
 
															-    loss_cols.sort()
														
 
															-
														
 
															-    loss_cols.insert(0, wind_name)
														
 
															-    loss_cols.insert(0, path.basename(save_path) + '-' + param)
														
 
															-
														
 
															-    result_data_list.append(loss_cols)
														
 
															-
														
 
															-    for col in set(all_cols):
														
 
															-        if col not in df.columns:
														
 
															-            df[col] = np.nan
														
 
															-
														
 
															-    df.to_csv(path.join(save_path, param, wind_name + '.csv'), encoding='utf8', index=False)
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    begin = datetime.datetime.now()
														
 
															-    # dir1 = r'D:\data\新华水电\测试'
														
 
															-    # save_path = r'D:\data\新华水电\整理数据'
														
 
															-    result_datas = [
														
 
															-        (r'/data/download/collection_data/1进行中/新华水电/风机SCADA数据/8月风机数据',
														
 
															-         r'/data/download/collection_data/1进行中/新华水电/整理数据/8月'),
														
 
															-        (r'/data/download/collection_data/1进行中/新华水电/风机SCADA数据/9月风机数据',
														
 
															-         r'/data/download/collection_data/1进行中/新华水电/整理数据/9月')
														
 
															-    ]
														
 
															-
														
 
															-    result_data_list = multiprocessing.Manager().list()
														
 
															-
														
 
															-    for dir1, save_path in result_datas:
														
 
															-        files = read_excel_files(dir1)
														
 
															-        with multiprocessing.Pool(30) as pool:
														
 
															-            datas = pool.starmap(read_fle_to_df, [(file,) for file in files])
														
 
															-        data_wind_name = dict()
														
 
															-        check_wind_name = dict()
														
 
															-
														
 
															-        data_all_cols = list()
														
 
															-        check_all_cols = list()
														
 
															-        for data in datas:
														
 
															-            check_data, wind_name, df = data[0], data[1], data[2]
														
 
															-
														
 
															-            if '工作模式' not in df.columns:
														
 
															-                # df.reset_index(inplace=True)
														
 
															-                # df.set_index(keys=['采样时间'], inplace=True)
														
 
															-                if check_data:
														
 
															-                    check_all_cols.extend(list(df.columns))
														
 
															-                    if wind_name in check_wind_name.keys():
														
 
															-                        check_wind_name[wind_name].append(df)
														
 
															-                    else:
														
 
															-                        check_wind_name[wind_name] = [df]
														
 
															-                else:
														
 
															-                    data_all_cols.extend(list(df.columns))
														
 
															-                    if wind_name in data_wind_name.keys():
														
 
															-                        data_wind_name[wind_name].append(df)
														
 
															-                    else:
														
 
															-                        data_wind_name[wind_name] = [df]
														
 
															-
														
 
															-        # with multiprocessing.Pool(30) as pool:
														
 
															-        #     pool.starmap(combine_df,
														
 
															-        #                  [(dfs, wind_name, save_path, "事件数据", True, check_all_cols, result_data_list) for wind_name, dfs
														
 
															-        #                   in
														
 
															-        #                   check_wind_name.items()])
														
 
															-
														
 
															-        with multiprocessing.Pool(30) as pool:
														
 
															-            pool.starmap(save_to_file,
														
 
															-                         [(dfs, wind_name, save_path, "数据", False, data_all_cols, result_data_list) for wind_name, dfs
														
 
															-                          in
														
 
															-                          data_wind_name.items()])
														
 
															-
														
 
															-        print(datetime.datetime.now() - begin)
														
 
															-
														
 
															-    normal_list = list(result_data_list)
														
 
															-    normal_list.sort(key=lambda x: (x[0], int(x[1][2:])))
														
 
															-
														
 
															-    with open('loss_col.csv', 'w', encoding='utf8') as f:
														
 
															-        for datas in normal_list:
														
 
															-            f.write(",".join(datas))
														
 
															-            f.write('\n')
														
 
															-
														
 
															-    print(datetime.datetime.now() - begin)
														
--- a/tmp_file/organize_xinhua_files_data.py
+++ b/tmp_file/organize_xinhua_files_data.py
@@ -1,205 +0,0 @@
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-import warnings
														
 
															-from os import *
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-warnings.filterwarnings("ignore")
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    if path.isfile(read_path):
														
 
															-        return [read_path]
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    """
														
 
															-    创建路径
														
 
															-    :param path:创建文件夹的路径
														
 
															-    :param is_file_path: 传入的path是否包含具体的文件名
														
 
															-    """
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def boolean_is_check_data(df_cols, need_valid=True):
														
 
															-    if not need_valid:
														
 
															-        return True
														
 
															-    fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机', '风机自身故障停机', '限功率运行状态']
														
 
															-    df_cols = [str(i).split('_')[-1] for i in df_cols]
														
 
															-    for fault in fault_list:
														
 
															-        if fault in df_cols:
														
 
															-            return True
														
 
															-    return False
														
 
															-
														
 
															-
														
 
															-def read_fle_to_df(file_path):
														
 
															-    df = pd.read_excel(file_path)
														
 
															-    wind_name = [i for i in df.columns if i.find('_') > -1][0].split('_')[0]
														
 
															-    df.columns = [i.split('_')[-1] for i in df.columns]
														
 
															-    df['wind_name'] = wind_name
														
 
															-    df['采样时间'] = pd.to_datetime(df['采样时间'])
														
 
															-    df['采样时间'] = df['采样时间'].dt.ceil('T')
														
 
															-    return boolean_is_check_data(df.columns, file_path.find('批次') > -1), wind_name, df
														
 
															-
														
 
															-
														
 
															-def read_guzhangbaojing(file_path):
														
 
															-    try:
														
 
															-        df = pd.read_excel(file_path)
														
 
															-        df.rename(columns={'风机名': 'wind_name'}, inplace=True)
														
 
															-        df['采样时间'] = pd.to_datetime(df['采样时间'])
														
 
															-        df['采样时间'] = df['采样时间'].dt.ceil('T')
														
 
															-        df = df[(df['采样时间'] >= '2024-08-01 00:00:00') & (df['采样时间'] < '2024-10-01 00:00:00')]
														
 
															-        return df
														
 
															-    except Exception as e:
														
 
															-        print(file_path, e)
														
 
															-        raise e
														
 
															-
														
 
															-
														
 
															-def combine_df(dfs, wind_name, save_path=''):
														
 
															-    print(wind_name)
														
 
															-    cols = list()
														
 
															-    col_map = dict()
														
 
															-    try:
														
 
															-        df = dfs[0]
														
 
															-        cols.extend(df.columns)
														
 
															-        for index, now_df in enumerate(dfs):
														
 
															-            if index > 0:
														
 
															-                for col in now_df.columns:
														
 
															-                    if col in cols and col not in ['采样时间', 'wind_name']:
														
 
															-                        if col in col_map.keys():
														
 
															-                            count = col_map[col]
														
 
															-                            col_map[col] = count + 1
														
 
															-                        else:
														
 
															-                            count = 1
														
 
															-                            col_map[col] = 1
														
 
															-                        now_df.rename(columns={col: col + '__' + str(count)}, inplace=True)
														
 
															-                df = pd.merge(df, now_df, on=['采样时间', 'wind_name'], how='outer')
														
 
															-                cols.extend(now_df.columns)
														
 
															-    except Exception as e:
														
 
															-        print(wind_name, e)
														
 
															-        raise e
														
 
															-    df.reset_index(inplace=True)
														
 
															-    df.drop_duplicates(inplace=True, subset=['采样时间', 'wind_name'])
														
 
															-    if 'index' in df.columns:
														
 
															-        del df['index']
														
 
															-    create_file_path(save_path)
														
 
															-    df.sort_values(by='采样时间', inplace=True)
														
 
															-    df.set_index(keys=['采样时间', 'wind_name'], inplace=True)
														
 
															-    return wind_name, df
														
 
															-
														
 
															-
														
 
															-def sae_to_csv(wind_name, df):
														
 
															-    try:
														
 
															-        col_tuples = [(col.split('__')[0], col) for col in df.columns if col.find('__') > -1]
														
 
															-        col_dict = dict()
														
 
															-        for origin, col in col_tuples:
														
 
															-            if origin in col_dict.keys():
														
 
															-                col_dict[origin].add(col)
														
 
															-            else:
														
 
															-                col_dict[origin] = {col}
														
 
															-
														
 
															-        for origin, cols in col_dict.items():
														
 
															-            print(wind_name, origin, cols)
														
 
															-            if pd.api.types.is_numeric_dtype(df[origin]):
														
 
															-                df[origin] = df[list(cols)].max(axis=1)
														
 
															-            else:
														
 
															-                df[origin] = df[list(cols)].apply(lambda x: [i for i in x.values if i][0], axis=1)
														
 
															-            for col in cols:
														
 
															-                if col != origin:
														
 
															-                    del df[col]
														
 
															-
														
 
															-        df.to_csv(path.join(save_path, wind_name + '.csv'), encoding='utf8')
														
 
															-
														
 
															-    except Exception as e:
														
 
															-        print(wind_name, df.columns)
														
 
															-        raise e
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    begin = datetime.datetime.now()
														
 
															-
														
 
															-    base_path = r'/data/download/collection_data/1进行中/新华水电/收资数据/风机SCADA数据'
														
 
															-
														
 
															-    dir1 = base_path + r'/data'
														
 
															-    dir2 = base_path + r'/故障报警/汇能机组数据-故障'
														
 
															-    dir3 = base_path + r'/故障报警/报警'
														
 
															-    save_path = r'/data/download/collection_data/1进行中/新华水电/清理数据/合并批次1-2故障报警'
														
 
															-
														
 
															-    create_file_path(save_path)
														
 
															-
														
 
															-    # result_datas = [
														
 
															-    #     (r'/data/download/collection_data/1进行中/新华水电/风机SCADA数据',
														
 
															-    #      r'/data/download/collection_data/1进行中/新华水电/整理数据/批次1-2合并'),
														
 
															-    # ]
														
 
															-
														
 
															-    data_wind_name = dict()
														
 
															-    files = read_excel_files(dir1)
														
 
															-    with multiprocessing.Pool(30) as pool:
														
 
															-        datas = pool.starmap(read_fle_to_df, [(file,) for file in files])
														
 
															-    for data in datas:
														
 
															-        check_data, wind_name, df = data[0], data[1], data[2]
														
 
															-        if wind_name in data_wind_name.keys():
														
 
															-            data_wind_name[wind_name].append(df)
														
 
															-        else:
														
 
															-            data_wind_name[wind_name] = [df]
														
 
															-
														
 
															-    with multiprocessing.Pool(30) as pool:
														
 
															-        data_dfs = pool.starmap(combine_df,
														
 
															-                                [(dfs, wind_name, save_path) for wind_name, dfs
														
 
															-                                 in
														
 
															-                                 data_wind_name.items()])
														
 
															-
														
 
															-    result_data_dict = dict()
														
 
															-    for wind_name, df in data_dfs:
														
 
															-        result_data_dict[wind_name] = df
														
 
															-
														
 
															-    for dir4 in [dir2, dir3]:
														
 
															-        guzhang_files = read_excel_files(dir4)
														
 
															-        with multiprocessing.Pool(30) as pool:
														
 
															-            guzhang_datas = pool.starmap(read_guzhangbaojing, [(file,) for file in guzhang_files])
														
 
															-        guzhang_df = pd.DataFrame()
														
 
															-        for df in guzhang_datas:
														
 
															-            if not df.empty:
														
 
															-                guzhang_df = pd.concat([guzhang_df, df])
														
 
															-        wind_names = set(list(guzhang_df['wind_name'].values))
														
 
															-        for wind_name in wind_names:
														
 
															-            now_df = guzhang_df[guzhang_df['wind_name'] == wind_name]
														
 
															-            if wind_name in result_data_dict.keys():
														
 
															-                now_df.reset_index(inplace=True)
														
 
															-                now_df.drop_duplicates(inplace=True, subset=['采样时间', 'wind_name'])
														
 
															-                if 'index' in now_df.columns:
														
 
															-                    del now_df['index']
														
 
															-                now_df.sort_values(by='采样时间', inplace=True)
														
 
															-                now_df.set_index(keys=['采样时间', 'wind_name'], inplace=True)
														
 
															-                res_df = result_data_dict[wind_name]
														
 
															-                result_data_dict[wind_name] = pd.concat([res_df, now_df], axis=1)
														
 
															-
														
 
															-    with multiprocessing.Pool(30) as pool:
														
 
															-        pool.starmap(sae_to_csv, [(wind_name, df) for wind_name, df in result_data_dict.items()])
														
 
															-
														
 
															-    print(datetime.datetime.now() - begin)
														
--- a/tmp_file/orgranize_hongyang.py
+++ b/tmp_file/orgranize_hongyang.py
@@ -1,97 +0,0 @@
 
															-import copy
														
 
															-import multiprocessing
														
 
															-import warnings
														
 
															-from os import *
														
 
															-
														
 
															-import chardet
														
 
															-import pandas as pd
														
 
															-
														
 
															-warnings.filterwarnings("ignore")
														
 
															-
														
 
															-# read_path = r'/home/wzl/test_data/红阳'
														
 
															-# save_dir = r'/home/wzl/test_data/整理'
														
 
															-
														
 
															-read_path = r'D:\data\红阳\红阳秒级分测点\红阳'
														
 
															-save_dir = r'D:\data\红阳\红阳秒级分测点\整理'
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    if path.isfile(read_path):
														
 
															-        return [read_path]
														
 
															-
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-all_files = read_excel_files(read_path)
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding.lower() in ['utf-8', 'ascii', 'utf8']:
														
 
															-        return 'utf-8'
														
 
															-
														
 
															-    return 'gb18030'
														
 
															-
														
 
															-
														
 
															-def read_and_organize(file):
														
 
															-    df = pd.read_csv(file, encoding=detect_file_encoding(file))
														
 
															-    return file, df
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-
														
 
															-    with multiprocessing.Pool(10) as pool:
														
 
															-        bak_datas = pool.starmap(read_and_organize, [(i,) for i in all_files])
														
 
															-
														
 
															-    datas = copy.deepcopy(bak_datas)
														
 
															-    wind_name_df = dict()
														
 
															-    for file, df in datas:
														
 
															-        all_cols = [i for i in df.columns if i.find('#') > -1]
														
 
															-        col = all_cols[0]
														
 
															-        cedian = str(col).split("_")[-1]
														
 
															-        wind_names = set([str(i).split("#")[0].replace("红阳风电场_", "") for i in all_cols])
														
 
															-
														
 
															-        print(file, df.columns)
														
 
															-        for wind_name in wind_names:
														
 
															-            cols = [i for i in all_cols if i.find('_' + wind_name) > -1]
														
 
															-            cols.insert(0, '统计时间')
														
 
															-            query_df = df[cols]
														
 
															-            query_df.columns = [str(i).split('_')[-1] for i in query_df.columns]
														
 
															-            query_df['风机编号'] = wind_name
														
 
															-            if wind_name in wind_name_df.keys():
														
 
															-                now_df = wind_name_df[wind_name]
														
 
															-                wind_name_df[wind_name] = pd.merge(now_df, query_df, on=['统计时间', '风机编号'], how='outer')
														
 
															-            else:
														
 
															-                wind_name_df[wind_name] = query_df
														
 
															-
														
 
															-    for wind_name, df in wind_name_df.items():
														
 
															-        df.to_csv(path.join(save_dir, wind_name + '#.csv'), index=False, encoding='utf8')
														
--- a/tmp_file/power_derating.py
+++ b/tmp_file/power_derating.py
@@ -1,91 +0,0 @@
 
															-import multiprocessing
														
 
															-from os import *
														
 
															-
														
 
															-import matplotlib
														
 
															-
														
 
															-matplotlib.use('Agg')
														
 
															-matplotlib.rcParams['font.family'] = 'SimHei'
														
 
															-matplotlib.rcParams['font.sans-serif'] = ['SimHei']
														
 
															-
														
 
															-import numpy as np
														
 
															-from matplotlib import pyplot as plt
														
 
															-
														
 
															-from utils.file.trans_methods import read_file_to_df
														
 
															-from utils.file.trans_methods import read_excel_files
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-def select_data(file, curve_wv, curve_ap, save_path):
														
 
															-    name = path.basename(file).split("@")[0]
														
 
															-    try:
														
 
															-        df = read_file_to_df(file)
														
 
															-        df.dropna(subset=['有功功率 kW均值', '风速 m/s均值', '有功功率设定 kW均值'], inplace=True)
														
 
															-        ap_gt_0_df = df[df['有功功率 kW均值'] > 0]
														
 
															-        ap_le_0_df = df[df['有功功率 kW均值'] <= 0]
														
 
															-        ap_le_0_df["marker"] = -1
														
 
															-
														
 
															-        ap = ap_gt_0_df['有功功率 kW均值'].values
														
 
															-        wv = ap_gt_0_df['风速 m/s均值'].values
														
 
															-        ap_set = ap_gt_0_df['有功功率设定 kW均值'].values
														
 
															-
														
 
															-        ap_gt_0_in = [0] * ap_gt_0_df.shape[0]
														
 
															-
														
 
															-        for i in range(len(ap_set)):
														
 
															-            wind_speed = wv[i]
														
 
															-            active_power = ap[i]
														
 
															-            active_power_set = ap_set[i]
														
 
															-
														
 
															-            if active_power >= 2200 - 200:
														
 
															-                ap_gt_0_in[i] = 1
														
 
															-            else:
														
 
															-                diffs = np.abs(curve_wv - wind_speed)
														
 
															-                # 找到差值最小的索引和对应的差值
														
 
															-                minDiff, idx = np.min(diffs), np.argmin(diffs)
														
 
															-
														
 
															-                # 使用找到的索引获取对应的值
														
 
															-                closestValue = curve_ap[idx]
														
 
															-                if active_power - closestValue >= -100:
														
 
															-                    ap_gt_0_in[i] = 1
														
 
															-
														
 
															-        ap_gt_0_df['marker'] = ap_gt_0_in
														
 
															-        df = pd.concat([ap_gt_0_df, ap_le_0_df])
														
 
															-
														
 
															-        df.to_csv(path.join(save_path, name + '.csv'), index=False, encoding='utf-8')
														
 
															-
														
 
															-        df = df[['时间', '风速 m/s均值', '有功功率 kW均值', '有功功率设定 kW均值', 'marker']]
														
 
															-
														
 
															-        df = df[df['marker'] == 1]
														
 
															-
														
 
															-        x = df['风速 m/s均值'].values
														
 
															-        y = df['有功功率 kW均值'].values
														
 
															-        # 使用scatter函数绘制散点图
														
 
															-        if not df.empty:
														
 
															-            plt.scatter(x, y, s=10, c='blue')
														
 
															-
														
 
															-            # 添加标题和坐标轴标签
														
 
															-            plt.title(name)
														
 
															-            plt.xlabel('风速均值')
														
 
															-            plt.ylabel('有功功率均值')
														
 
															-
														
 
															-            # 保存
														
 
															-            plt.savefig(path.join(save_path, name + '均值.png'))
														
 
															-
														
 
															-    except Exception as e:
														
 
															-        print(path.basename(file), "出错", str(e))
														
 
															-        raise e
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    wind_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\标记derating\PV_Curve.csv")
														
 
															-    curve_wv = wind_power_df["风速"].values
														
 
															-    curve_ap = wind_power_df["功率"].values
														
 
															-
														
 
															-    all_files = read_excel_files(r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666")
														
 
															-    save_path = r"D:\trans_data\诺木洪\清理数据\min-666-derating"
														
 
															-
														
 
															-    # save_path = r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666-marker"
														
 
															-
														
 
															-    # for file in all_files:
														
 
															-
														
 
															-    with multiprocessing.Pool(10) as pool:
														
 
															-        pool.starmap(select_data, [(i, curve_wv, curve_ap, save_path) for i in all_files])
														
--- a/tmp_file/power_derating_biaozhun.py
+++ b/tmp_file/power_derating_biaozhun.py
@@ -1,90 +0,0 @@
 
															-from os import *
														
 
															-
														
 
															-import matplotlib
														
 
															-import numpy as np
														
 
															-
														
 
															-from utils.draw.draw_file import scatter
														
 
															-
														
 
															-matplotlib.use('Agg')
														
 
															-matplotlib.rcParams['font.family'] = 'SimHei'  # 或者 'Microsoft YaHei'
														
 
															-matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 或者 ['Microsoft YaHei']
														
 
															-
														
 
															-from utils.file.trans_methods import read_file_to_df
														
 
															-from utils.file.trans_methods import read_excel_files
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-class ContractPowerCurve(object):
														
 
															-
														
 
															-    def __init__(self, df: pd.DataFrame, wind_velocity='风速', active_power='功率'):
														
 
															-        self.df = df
														
 
															-        self.wind_velocity = wind_velocity
														
 
															-        self.active_power = active_power
														
 
															-
														
 
															-
														
 
															-def marker_active_power(contract_power_curve_class: ContractPowerCurve, df: pd.DataFrame, active_power='有功功率 kW均值',
														
 
															-                        wind_velocity='风速 m/s均值'):
														
 
															-    """
														
 
															-    标记有功功率为正的记录
														
 
															-    :param contract_power_curve_class: 合同功率曲线
														
 
															-    :param df: 原始数据
														
 
															-    :return: 标记有功功率为正的原始数据
														
 
															-    """
														
 
															-    contract_power_curve_df = contract_power_curve_class.df
														
 
															-    curve_wv = contract_power_curve_df[contract_power_curve_class.wind_velocity].values
														
 
															-    curve_ap = contract_power_curve_df[contract_power_curve_class.active_power].values
														
 
															-
														
 
															-    df.dropna(subset=[active_power, wind_velocity], inplace=True)
														
 
															-    ap_gt_0_df = df[df[active_power] > 0]
														
 
															-    ap_le_0_df = df[df[active_power] <= 0]
														
 
															-    ap_le_0_df["marker"] = -1
														
 
															-
														
 
															-    active_power_values = ap_gt_0_df[active_power].values
														
 
															-    wind_speed_values = ap_gt_0_df[wind_velocity].values
														
 
															-    ap_gt_0_in = [0] * ap_gt_0_df.shape[0]
														
 
															-
														
 
															-    for i in range(len(ap_gt_0_in)):
														
 
															-        wind_speed = wind_speed_values[i]
														
 
															-        active_power = active_power_values[i]
														
 
															-
														
 
															-        # if active_power >= 2200 - 200:
														
 
															-        #     ap_gt_0_in[i] = 1
														
 
															-        # else:
														
 
															-        diffs = np.abs(curve_wv - wind_speed)
														
 
															-        # 找到差值最小的索引和对应的差值
														
 
															-        minDiff, idx = np.min(diffs), np.argmin(diffs)
														
 
															-
														
 
															-        # 使用找到的索引获取对应的值
														
 
															-        closestValue = curve_ap[idx]
														
 
															-        if active_power - closestValue >= -100:
														
 
															-            ap_gt_0_in[i] = 1
														
 
															-
														
 
															-    ap_gt_0_df['marker'] = ap_gt_0_in
														
 
															-    return pd.concat([ap_gt_0_df, ap_le_0_df])
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    wind_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\标记derating\PV_Curve.csv")
														
 
															-
														
 
															-    all_files = read_excel_files(r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666")
														
 
															-    save_path = r"D:\trans_data\诺木洪\清理数据\min-666-derating"
														
 
															-
														
 
															-    wind_power_df_class = ContractPowerCurve(wind_power_df)
														
 
															-
														
 
															-    for file in all_files:
														
 
															-        name = path.basename(file).split("@")[0]
														
 
															-        try:
														
 
															-            df = read_file_to_df(file)
														
 
															-            df = marker_active_power(wind_power_df_class, df)
														
 
															-            df = df[df['marker'] == 1]
														
 
															-            df.to_csv(path.join(save_path, name + '.csv'), index=False, encoding='utf-8')
														
 
															-
														
 
															-            # 使用scatter函数绘制散点图
														
 
															-            if not df.empty:
														
 
															-                scatter(name, x_label='风速均值', y_label='有功功率均值', x_values=df['风速 m/s均值'].values,
														
 
															-                        y_values=df['有功功率 kW均值'].values, color='green',
														
 
															-                        save_file_path=path.join(save_path, name + '均值.png'))
														
 
															-
														
 
															-        except Exception as e:
														
 
															-            print(path.basename(file), "出错", str(e))
														
 
															-            raise e
														
--- a/tmp_file/power_derating_for_chunlin.py
+++ b/tmp_file/power_derating_for_chunlin.py
@@ -1,213 +0,0 @@
 
															-from os import *
														
 
															-
														
 
															-import matplotlib
														
 
															-import numpy as np
														
 
															-from matplotlib import pyplot as plt
														
 
															-
														
 
															-matplotlib.use('Agg')
														
 
															-matplotlib.rcParams['font.family'] = 'SimHei'  # 或者 'Microsoft YaHei'
														
 
															-matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 或者 ['Microsoft YaHei']
														
 
															-
														
 
															-import pandas as pd
														
 
															-import chardet
														
 
															-import warnings
														
 
															-
														
 
															-warnings.filterwarnings("ignore")
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
														
 
															-        encoding = 'gb18030'
														
 
															-    return encoding
														
 
															-
														
 
															-
														
 
															-def del_blank(df=pd.DataFrame(), cols=list()):
														
 
															-    for col in cols:
														
 
															-        if df[col].dtype == object:
														
 
															-            df[col] = df[col].str.strip()
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-# 切割数组到多个数组
														
 
															-def split_array(array, num):
														
 
															-    return [array[i:i + num] for i in range(0, len(array), num)]
														
 
															-
														
 
															-
														
 
															-# 读取数据到df
														
 
															-def read_file_to_df(file_path, read_cols=list(), header=0):
														
 
															-    try:
														
 
															-        df = pd.DataFrame()
														
 
															-        if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
														
 
															-            encoding = detect_file_encoding(file_path)
														
 
															-            end_with_gz = str(file_path).lower().endswith("gz")
														
 
															-            if read_cols:
														
 
															-                if end_with_gz:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
														
 
															-                else:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header,
														
 
															-                                     on_bad_lines='warn')
														
 
															-            else:
														
 
															-
														
 
															-                if end_with_gz:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
														
 
															-                else:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
														
 
															-
														
 
															-        else:
														
 
															-            xls = pd.ExcelFile(file_path)
														
 
															-            # 获取所有的sheet名称
														
 
															-            sheet_names = xls.sheet_names
														
 
															-            for sheet in sheet_names:
														
 
															-                if read_cols:
														
 
															-                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)
														
 
															-                else:
														
 
															-                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header)
														
 
															-
														
 
															-                df = pd.concat([df, now_df])
														
 
															-
														
 
															-        print('文件读取成功', file_path, '文件数量', df.shape)
														
 
															-    except Exception as e:
														
 
															-        print('读取文件出错', file_path, str(e))
														
 
															-        message = '文件:' + path.basename(file_path) + ',' + str(e)
														
 
															-        raise ValueError(message)
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-    # 读取所有文件
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-class ContractPowerCurve(object):
														
 
															-
														
 
															-    def __init__(self, df: pd.DataFrame, wind_velocity='风速', active_power='功率'):
														
 
															-        self.df = df
														
 
															-        self.wind_velocity = wind_velocity
														
 
															-        self.active_power = active_power
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def scatter(title, x_label, y_label, x_values, y_values, color='blue', size=10, save_file_path=''):
														
 
															-    if save_file_path:
														
 
															-        create_file_path(save_file_path, True)
														
 
															-    else:
														
 
															-        save_file_path = title + '.png'
														
 
															-
														
 
															-    plt.figure(figsize=(8, 6))
														
 
															-    plt.title(title, fontsize=16)
														
 
															-    plt.xlabel(x_label, fontsize=14)
														
 
															-    plt.ylabel(y_label, fontsize=14)
														
 
															-    plt.scatter(x_values, y_values, s=size, c=color)
														
 
															-    plt.savefig(save_file_path)
														
 
															-    plt.close()
														
 
															-
														
 
															-
														
 
															-def marker_active_power(contract_power_curve_class: ContractPowerCurve, df: pd.DataFrame, active_power='有功功率 kW均值',
														
 
															-                        wind_velocity='风速 m/s均值'):
														
 
															-    """
														
 
															-    标记有功功率为正的记录
														
 
															-    :param contract_power_curve_class: 合同功率曲线
														
 
															-    :param df: 原始数据
														
 
															-    :return: 标记有功功率为正的原始数据
														
 
															-    """
														
 
															-    contract_power_curve_df = contract_power_curve_class.df
														
 
															-    curve_wv = contract_power_curve_df[contract_power_curve_class.wind_velocity].values
														
 
															-    curve_ap = contract_power_curve_df[contract_power_curve_class.active_power].values
														
 
															-
														
 
															-    df.dropna(subset=[active_power, wind_velocity], inplace=True)
														
 
															-    ap_gt_0_df = df[df[active_power] > 0]
														
 
															-    ap_le_0_df = df[df[active_power] <= 0]
														
 
															-    ap_le_0_df["marker"] = -1
														
 
															-
														
 
															-    active_power_values = ap_gt_0_df[active_power].values
														
 
															-    wind_speed_values = ap_gt_0_df[wind_velocity].values
														
 
															-    ap_gt_0_in = [0] * ap_gt_0_df.shape[0]
														
 
															-
														
 
															-    for i in range(len(ap_gt_0_in)):
														
 
															-        wind_speed = wind_speed_values[i]
														
 
															-        active_power = active_power_values[i]
														
 
															-
														
 
															-        # if active_power >= 2200 - 200:
														
 
															-        #     ap_gt_0_in[i] = 1
														
 
															-        # else:
														
 
															-        diffs = np.abs(curve_wv - wind_speed)
														
 
															-        # 找到差值最小的索引和对应的差值
														
 
															-        minDiff, idx = np.min(diffs), np.argmin(diffs)
														
 
															-
														
 
															-        # 使用找到的索引获取对应的值
														
 
															-        closestValue = curve_ap[idx]
														
 
															-        if active_power - closestValue >= -100:
														
 
															-            ap_gt_0_in[i] = 1
														
 
															-
														
 
															-    ap_gt_0_df['marker'] = ap_gt_0_in
														
 
															-    return pd.concat([ap_gt_0_df, ap_le_0_df])
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    wind_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\标记derating\PV_Curve.csv")
														
 
															-
														
 
															-    all_files = read_excel_files(r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666")
														
 
															-    save_path = r"D:\trans_data\诺木洪\清理数据\min-666-derating"
														
 
															-
														
 
															-    wind_power_df_class = ContractPowerCurve(wind_power_df)
														
 
															-
														
 
															-    for file in all_files:
														
 
															-        name = path.basename(file).split("@")[0]
														
 
															-        try:
														
 
															-            df = read_file_to_df(file)
														
 
															-            df = marker_active_power(wind_power_df_class, df)
														
 
															-            df = df[df['marker'] == 1]
														
 
															-            # 保存筛选后数据
														
 
															-            name = name.replace('HD', 'HD2')
														
 
															-            df.to_csv(path.join(save_path, name + '.csv'), index=False, encoding='utf-8')
														
 
															-
														
 
															-            # 使用scatter函数绘制散点图
														
 
															-            if not df.empty:
														
 
															-                scatter(name, x_label='风速均值', y_label='有功功率均值', x_values=df['风速 m/s均值'].values,
														
 
															-                        y_values=df['有功功率 kW均值'].values, color='green',
														
 
															-                        save_file_path=path.join(save_path, name + '均值.png'))
														
 
															-
														
 
															-        except Exception as e:
														
 
															-            print(path.basename(file), "出错", str(e))
														
 
															-            raise e
														
--- a/tmp_file/pv_youxiaoxing.py
+++ b/tmp_file/pv_youxiaoxing.py
@@ -1,262 +0,0 @@
 
															-import multiprocessing
														
 
															-from os import *
														
 
															-
														
 
															-import matplotlib
														
 
															-
														
 
															-matplotlib.use('Agg')
														
 
															-matplotlib.rcParams['font.family'] = 'SimHei'  # 或者 'Microsoft YaHei'
														
 
															-matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 或者 ['Microsoft YaHei']
														
 
															-
														
 
															-import chardet
														
 
															-import warnings
														
 
															-
														
 
															-warnings.filterwarnings("ignore")
														
 
															-
														
 
															-import datetime
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-def get_time_space(df, time_str):
														
 
															-    """
														
 
															-    :return: 查询时间间隔
														
 
															-    """
														
 
															-    begin = datetime.datetime.now()
														
 
															-    df1 = pd.DataFrame(df[time_str])
														
 
															-    df1[time_str] = pd.to_datetime(df1[time_str], errors='coerce')
														
 
															-    df1.sort_values(by=time_str, inplace=True)
														
 
															-    df1['chazhi'] = df1[time_str].shift(-1) - df1[time_str]
														
 
															-    result = df1.sample(int(df1.shape[0] / 100))['chazhi'].value_counts().idxmax().seconds
														
 
															-    del df1
														
 
															-    print(datetime.datetime.now() - begin)
														
 
															-    return abs(result)
														
 
															-
														
 
															-
														
 
															-def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
														
 
															-    """
														
 
															-    获取俩个时间之间的个数
														
 
															-    :return: 查询时间间隔
														
 
															-    """
														
 
															-    delta = end_time - start_time
														
 
															-    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
														
 
															-
														
 
															-    return abs(int(total_seconds / time_space)) + 1
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
														
 
															-        encoding = 'gb18030'
														
 
															-    return encoding
														
 
															-
														
 
															-
														
 
															-def del_blank(df=pd.DataFrame(), cols=list()):
														
 
															-    for col in cols:
														
 
															-        if df[col].dtype == object:
														
 
															-            df[col] = df[col].str.strip()
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-# 切割数组到多个数组
														
 
															-def split_array(array, num):
														
 
															-    return [array[i:i + num] for i in range(0, len(array), num)]
														
 
															-
														
 
															-
														
 
															-# 读取数据到df
														
 
															-def read_file_to_df(file_path, read_cols=list(), header=0):
														
 
															-    try:
														
 
															-        df = pd.DataFrame()
														
 
															-        if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
														
 
															-            encoding = detect_file_encoding(file_path)
														
 
															-            end_with_gz = str(file_path).lower().endswith("gz")
														
 
															-            if read_cols:
														
 
															-                if end_with_gz:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
														
 
															-                else:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header,
														
 
															-                                     on_bad_lines='warn')
														
 
															-            else:
														
 
															-
														
 
															-                if end_with_gz:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
														
 
															-                else:
														
 
															-                    df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
														
 
															-
														
 
															-        else:
														
 
															-            xls = pd.ExcelFile(file_path)
														
 
															-            # 获取所有的sheet名称
														
 
															-            sheet_names = xls.sheet_names
														
 
															-            for sheet in sheet_names:
														
 
															-                if read_cols:
														
 
															-                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)
														
 
															-                else:
														
 
															-                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header)
														
 
															-
														
 
															-                df = pd.concat([df, now_df])
														
 
															-
														
 
															-        print('文件读取成功', file_path, '文件数量', df.shape)
														
 
															-    except Exception as e:
														
 
															-        print('读取文件出错', file_path, str(e))
														
 
															-        message = '文件:' + path.basename(file_path) + ',' + str(e)
														
 
															-        raise ValueError(message)
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-    # 读取所有文件
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def time_biaozhun(df):
														
 
															-    time_space = get_time_space(df, '时间')
														
 
															-    query_df = df[['时间']]
														
 
															-    query_df['时间'] = pd.to_datetime(df['时间'], errors="coerce")
														
 
															-    query_df = query_df.dropna(subset=['时间'])
														
 
															-    total = get_time_space_count(query_df['时间'].min(), query_df['时间'].max(), time_space)
														
 
															-    return total, save_percent(1 - query_df.shape[0] / total), save_percent(1 - df.shape[0] / total)
														
 
															-
														
 
															-
														
 
															-def save_percent(value, save_decimal=7):
														
 
															-    return round(value, save_decimal) * 100
														
 
															-
														
 
															-
														
 
															-def calc(df, file_name):
														
 
															-    error_dict = {}
														
 
															-    lose_dict = {}
														
 
															-    error_dict['箱变'] = "".join(file_name.split(".")[:-1])
														
 
															-    lose_dict['箱变'] = "".join(file_name.split(".")[:-1])
														
 
															-
														
 
															-    total, lose_time, error_time = time_biaozhun(df)
														
 
															-    error_dict['时间'] = error_time
														
 
															-    lose_dict['时间'] = lose_time
														
 
															-
														
 
															-    error_df = pd.DataFrame()
														
 
															-    lose_df = pd.DataFrame()
														
 
															-
														
 
															-    try:
														
 
															-        df.columns = ["".join(["逆变器" + "".join(col.split("逆变器")[1:])]) if col.find("逆变器") > -1 else col for col in
														
 
															-                      df.columns]
														
 
															-
														
 
															-        for col in df.columns:
														
 
															-            if col == '时间':
														
 
															-                continue
														
 
															-            query_df = df[[col]]
														
 
															-            query_df[col] = pd.to_numeric(query_df[col], errors="coerce")
														
 
															-            query_df = query_df.dropna(subset=[col])
														
 
															-            lose_dict[col] = save_percent(1 - query_df.shape[0] / total)
														
 
															-
														
 
															-            if col.find('电压') > -1:
														
 
															-                error_dict[col] = save_percent(query_df[query_df[col] < 0].shape[0] / total)
														
 
															-
														
 
															-            if col.find('电流') > -1:
														
 
															-                error_dict[col] = save_percent(query_df[query_df[col] < -0.1].shape[0] / total)
														
 
															-
														
 
															-            if col.find('逆变器效率') > -1:
														
 
															-                error_dict[col] = save_percent(query_df[(query_df[col] <= 0) | (query_df[col] >= 100)].shape[0] / total)
														
 
															-
														
 
															-            if col.find('温度') > -1:
														
 
															-                error_dict[col] = save_percent(query_df[(query_df[col] < 0) | (query_df[col] > 100)].shape[0] / total)
														
 
															-
														
 
															-            if col.find('功率因数') > -1:
														
 
															-                error_dict[col] = save_percent(query_df[(query_df[col] < 0) | (query_df[col] > 1)].shape[0] / total)
														
 
															-
														
 
															-        total, count = 0, 0
														
 
															-        for k, v in error_dict.items():
														
 
															-            if k != '箱变':
														
 
															-                total = total + error_dict[k]
														
 
															-                count = count + 1
														
 
															-
														
 
															-        error_dict['平均异常率'] = save_percent(total / count / 100)
														
 
															-
														
 
															-        total, count = 0, 0
														
 
															-        for k, v in lose_dict.items():
														
 
															-            if k != '箱变':
														
 
															-                total = total + lose_dict[k]
														
 
															-                count = count + 1
														
 
															-
														
 
															-        lose_dict['平均缺失率'] = save_percent(total / count / 100)
														
 
															-
														
 
															-        error_df = pd.concat([error_df, pd.DataFrame(error_dict, index=[0])])
														
 
															-        lose_df = pd.concat([lose_df, pd.DataFrame(lose_dict, index=[0])])
														
 
															-
														
 
															-        error_df_cols = ['箱变', '平均异常率']
														
 
															-        for col in error_df.columns:
														
 
															-            if col not in error_df_cols:
														
 
															-                error_df_cols.append(col)
														
 
															-
														
 
															-        lose_df_cols = ['箱变', '平均缺失率']
														
 
															-        for col in lose_df.columns:
														
 
															-            if col not in lose_df_cols:
														
 
															-                lose_df_cols.append(col)
														
 
															-
														
 
															-        error_df = error_df[error_df_cols]
														
 
															-        lose_df = lose_df[lose_df_cols]
														
 
															-    except Exception as e:
														
 
															-        print("异常文件", path.basename(file_name))
														
 
															-        raise e
														
 
															-
														
 
															-    return error_df, lose_df
														
 
															-
														
 
															-
														
 
															-def run(file_path):
														
 
															-    df = read_file_to_df(file_path)
														
 
															-    return calc(df, path.basename(file_path))
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    # read_path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/逆变器数据'
														
 
															-    # save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/逆变器数据'
														
 
															-
														
 
															-    read_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\yuanshi'
														
 
															-    save_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\zhengli'
														
 
															-    all_files = read_excel_files(read_path)
														
 
															-
														
 
															-    with multiprocessing.Pool(2) as pool:
														
 
															-        df_arrys = pool.starmap(run, [(file,) for file in all_files])
														
 
															-
														
 
															-    error_df = pd.concat([df[0] for df in df_arrys])
														
 
															-    lose_df = pd.concat([df[1] for df in df_arrys])
														
 
															-    with pd.ExcelWriter(path.join(save_path, "玉湖光伏数据统计.xlsx")) as writer:
														
 
															-        error_df.to_excel(writer, sheet_name='error_percent', index=False)
														
 
															-        lose_df.to_excel(writer, sheet_name='lose_percent', index=False)
														
--- a/tmp_file/qinghai-nuomuhong-guifan.py
+++ b/tmp_file/qinghai-nuomuhong-guifan.py
@@ -1,134 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-Spyder 编辑器
														
 
															-
														
 
															-这是一个临时脚本文件。
														
 
															-"""
														
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-from os import *
														
 
															-
														
 
															-import numpy as np
														
 
															-import pandas as pd
														
 
															-
														
 
															-dianjian_str = """
														
 
															-wind_turbine_number		
														
 
															-time_stamp		时间
														
 
															-active_power		有功功率 kW
														
 
															-rotor_speed		风轮转速 rpm
														
 
															-generator_speed		发电机转速 rpm
														
 
															-wind_velocity		风速 m/s
														
 
															-pitch_angle_blade_1		叶片1角度 °
														
 
															-pitch_angle_blade_2		叶片2角度 °
														
 
															-pitch_angle_blade_3		叶片3角度 °
														
 
															-cabin_position		机舱位置 °
														
 
															-true_wind_direction		
														
 
															-yaw_error1		风向 °
														
 
															-twisted_cable_angle		
														
 
															-main_bearing_temperature		主轴温度 ℃
														
 
															-gearbox_oil_temperature		齿轮箱温度 ℃
														
 
															-gearbox_low_speed_shaft_bearing_temperature		齿轮箱轴承温度 ℃
														
 
															-gearboxmedium_speed_shaftbearing_temperature		
														
 
															-gearbox_high_speed_shaft_bearing_temperature		齿轮箱轴承温度2 ℃
														
 
															-generatordrive_end_bearing_temperature		发电机驱动侧轴承温度 ℃
														
 
															-generatornon_drive_end_bearing_temperature		发电机非驱动侧轴承温度 ℃
														
 
															-cabin_temperature		机舱温度 ℃
														
 
															-outside_cabin_temperature		舱外温度 ℃
														
 
															-generator_winding1_temperature		
														
 
															-generator_winding2_temperature		
														
 
															-generator_winding3_temperature		
														
 
															-front_back_vibration_of_the_cabin		
														
 
															-side_to_side_vibration_of_the_cabin		
														
 
															-required_gearbox_speed		
														
 
															-inverter_speed_master_control		
														
 
															-actual_torque		
														
 
															-given_torque		
														
 
															-clockwise_yaw_count		
														
 
															-counterclockwise_yaw_count		
														
 
															-unusable		
														
 
															-power_curve_available		
														
 
															-set_value_of_active_power		有功功率设定 kW
														
 
															-wind_turbine_status		
														
 
															-wind_turbine_status2		
														
 
															-turbulence_intensity		
														
 
															-"""
														
 
															-
														
 
															-datas = [i for i in dianjian_str.split("\n") if i]
														
 
															-
														
 
															-dianjian_dict = dict()
														
 
															-
														
 
															-for data in datas:
														
 
															-    ds = data.split("\t")
														
 
															-
														
 
															-    if len(ds) == 3:
														
 
															-        dianjian_dict[ds[0]] = ds[2]
														
 
															-    else:
														
 
															-        dianjian_dict[ds[0]] = ''
														
 
															-
														
 
															-
														
 
															-def read_df(file_path):
														
 
															-    df = pd.read_csv(file_path, header=[0, 1])
														
 
															-
														
 
															-    col_nams_map = dict()
														
 
															-    pre_col = ""
														
 
															-    for tuple_col in df.columns:
														
 
															-        col1 = tuple_col[0]
														
 
															-        col2 = tuple_col[1]
														
 
															-        if str(col1).startswith("Unnamed"):
														
 
															-            if pre_col:
														
 
															-                col1 = pre_col
														
 
															-                pre_col = ''
														
 
															-            else:
														
 
															-                col1 = ''
														
 
															-        else:
														
 
															-            pre_col = col1
														
 
															-
														
 
															-        if str(col2).startswith("Unnamed"):
														
 
															-            col2 = ''
														
 
															-
														
 
															-        col_nams_map[str(tuple_col)] = ''.join([col1, col2])
														
 
															-    # print(col_nams_map)
														
 
															-    # for k, v in col_nams_map.items():
														
 
															-    #     if str(v).endswith('采样值'):
														
 
															-    #         col_nams_map[k] = str(v)[:-3]
														
 
															-
														
 
															-    df.columns = [str(col) for col in df.columns]
														
 
															-    df.rename(columns=col_nams_map, inplace=True)
														
 
															-
														
 
															-    # for col, name in dianjian_dict.items():
														
 
															-    #     if name in df.columns:
														
 
															-    #         df.rename(columns={name: col}, inplace=True)
														
 
															-
														
 
															-    # for col in df.columns:
														
 
															-    #     if col not in dianjian_dict.keys():
														
 
															-    #         del df[col]
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def get_wind_name_files(path):
														
 
															-    files = listdir(path)
														
 
															-    return files
														
 
															-
														
 
															-
														
 
															-def combine_df(save_path, file):
														
 
															-    begin = datetime.datetime.now()
														
 
															-    df = read_df(file)
														
 
															-    print("读取", file, df.shape)
														
 
															-    df.replace("-", np.nan,inplace=True)
														
 
															-    df.to_csv(path.join(save_path, path.basename(file)), encoding='utf-8', index=False)
														
 
															-
														
 
															-    print('整理完成', '耗时:', (datetime.datetime.now() - begin).seconds)
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    read_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/min-666'
														
 
															-    save_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/清理数据/min-666'
														
 
															-
														
 
															-    # read_path = r'D:\trans_data\诺木洪\收资数据\min-666'
														
 
															-    # save_path = r'D:\trans_data\诺木洪\清理数据\min-666'
														
 
															-    if not path.exists(save_path):
														
 
															-        makedirs(save_path, exist_ok=True)
														
 
															-
														
 
															-    with multiprocessing.Pool(20) as pool:
														
 
															-        pool.starmap(combine_df, [(save_path, read_path + sep + file) for file in listdir(read_path)])
														
--- a/tmp_file/qinghai-nuomuhong.py
+++ b/tmp_file/qinghai-nuomuhong.py
@@ -1,162 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-Spyder 编辑器
														
 
															-
														
 
															-这是一个临时脚本文件。
														
 
															-"""
														
 
															-import copy
														
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-from os import *
														
 
															-
														
 
															-import numpy as np
														
 
															-import pandas as pd
														
 
															-
														
 
															-dianjian_str = """
														
 
															-wind_turbine_number		
														
 
															-time_stamp		时间
														
 
															-active_power		有功功率 kW
														
 
															-rotor_speed		风轮转速 rpm
														
 
															-generator_speed		发电机转速 rpm
														
 
															-wind_velocity		风速 m/s
														
 
															-pitch_angle_blade_1		叶片1角度 °
														
 
															-pitch_angle_blade_2		叶片2角度 °
														
 
															-pitch_angle_blade_3		叶片3角度 °
														
 
															-cabin_position		机舱位置 °
														
 
															-true_wind_direction		
														
 
															-yaw_error1		风向 °
														
 
															-twisted_cable_angle		
														
 
															-main_bearing_temperature		主轴温度 ℃
														
 
															-gearbox_oil_temperature		齿轮箱温度 ℃
														
 
															-gearbox_low_speed_shaft_bearing_temperature		齿轮箱轴承温度 ℃
														
 
															-gearboxmedium_speed_shaftbearing_temperature		
														
 
															-gearbox_high_speed_shaft_bearing_temperature		齿轮箱轴承温度2 ℃
														
 
															-generatordrive_end_bearing_temperature		发电机驱动侧轴承温度 ℃
														
 
															-generatornon_drive_end_bearing_temperature		发电机非驱动侧轴承温度 ℃
														
 
															-cabin_temperature		机舱温度 ℃
														
 
															-outside_cabin_temperature		舱外温度 ℃
														
 
															-generator_winding1_temperature		
														
 
															-generator_winding2_temperature		
														
 
															-generator_winding3_temperature		
														
 
															-front_back_vibration_of_the_cabin		
														
 
															-side_to_side_vibration_of_the_cabin		
														
 
															-required_gearbox_speed		
														
 
															-inverter_speed_master_control		
														
 
															-actual_torque		
														
 
															-given_torque		
														
 
															-clockwise_yaw_count		
														
 
															-counterclockwise_yaw_count		
														
 
															-unusable		
														
 
															-power_curve_available		
														
 
															-set_value_of_active_power		有功功率设定 kW
														
 
															-wind_turbine_status		
														
 
															-wind_turbine_status2		
														
 
															-turbulence_intensity		
														
 
															-"""
														
 
															-
														
 
															-datas = [i for i in dianjian_str.split("\n") if i]
														
 
															-
														
 
															-dianjian_dict = dict()
														
 
															-
														
 
															-for data in datas:
														
 
															-    ds = data.split("\t")
														
 
															-
														
 
															-    if len(ds) == 3:
														
 
															-        dianjian_dict[ds[0]] = ds[2]
														
 
															-    else:
														
 
															-        dianjian_dict[ds[0]] = ''
														
 
															-
														
 
															-
														
 
															-def read_df(file_path):
														
 
															-    df = pd.read_csv(file_path, header=[0, 1])
														
 
															-
														
 
															-    col_nams_map = dict()
														
 
															-    pre_col = ""
														
 
															-    for tuple_col in df.columns:
														
 
															-        col1 = tuple_col[0]
														
 
															-        col2 = tuple_col[1]
														
 
															-        if str(col1).startswith("Unnamed"):
														
 
															-            if pre_col:
														
 
															-                col1 = pre_col
														
 
															-                pre_col = ''
														
 
															-            else:
														
 
															-                col1 = ''
														
 
															-        else:
														
 
															-            pre_col = col1
														
 
															-
														
 
															-        if str(col2).startswith("Unnamed"):
														
 
															-            col2 = ''
														
 
															-
														
 
															-        col_nams_map[str(tuple_col)] = ''.join([col1, col2])
														
 
															-    print(col_nams_map)
														
 
															-    for k, v in col_nams_map.items():
														
 
															-        if str(v).endswith('采样值'):
														
 
															-            col_nams_map[k] = str(v)[:-3]
														
 
															-
														
 
															-    df.columns = [str(col) for col in df.columns]
														
 
															-    df.rename(columns=col_nams_map, inplace=True)
														
 
															-
														
 
															-    for col, name in dianjian_dict.items():
														
 
															-        if name in df.columns:
														
 
															-            df.rename(columns={name: col}, inplace=True)
														
 
															-
														
 
															-    for col in df.columns:
														
 
															-        if col not in dianjian_dict.keys():
														
 
															-            del df[col]
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def get_wind_name_files(path):
														
 
															-    files = listdir(path)
														
 
															-
														
 
															-    wind_files_map = dict()
														
 
															-    for file in files:
														
 
															-        full_file = path.join(path, file)
														
 
															-        file_datas = str(file).split("@")
														
 
															-        key = file_datas[0].replace("HD", "HD2")
														
 
															-        if key in wind_files_map.keys():
														
 
															-            wind_files_map[key].append(full_file)
														
 
															-        else:
														
 
															-            wind_files_map[key] = [full_file]
														
 
															-
														
 
															-    return wind_files_map
														
 
															-
														
 
															-
														
 
															-def combine_df(save_path, wind_name, files):
														
 
															-    begin = datetime.datetime.now()
														
 
															-    df = pd.DataFrame()
														
 
															-    for file in files:
														
 
															-        query_df = read_df(file)
														
 
															-        print("读取", file, query_df.shape)
														
 
															-        query_df['time_stamp'] = pd.to_datetime(query_df['time_stamp'])
														
 
															-        query_df.set_index(keys='time_stamp', inplace=True)
														
 
															-        query_df = query_df[~query_df.index.duplicated(keep='first')]
														
 
															-        if df.empty:
														
 
															-            df = copy.deepcopy(query_df)
														
 
															-        else:
														
 
															-            df = pd.concat([df, query_df], join='inner')
														
 
															-    df.reset_index(inplace=True)
														
 
															-    df['wind_turbine_number'] = wind_name
														
 
															-    for col, name in dianjian_dict.items():
														
 
															-        if col not in df.columns:
														
 
															-            df[col] = np.nan
														
 
															-
														
 
															-    df = df[dianjian_dict.keys()]
														
 
															-    df.to_csv(path.join(save_path, wind_name + ".csv"), encoding='utf-8', index=False)
														
 
															-
														
 
															-    print(wind_name, '整理完成', '耗时:', (datetime.datetime.now() - begin).seconds)
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    read_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/sec'
														
 
															-    save_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/sec_采样值'
														
 
															-
														
 
															-    # read_path = r'D:\trans_data\诺木洪\收资数据\min'
														
 
															-    # save_path = r'D:\trans_data\诺木洪\清理数据\min'
														
 
															-    if not path.exists(save_path):
														
 
															-        makedirs(save_path, exist_ok=True)
														
 
															-    wind_files_map = get_wind_name_files(read_path)
														
 
															-
														
 
															-    with multiprocessing.Pool(20) as pool:
														
 
															-        pool.starmap(combine_df, [(save_path, wind_name, files) for wind_name, files in wind_files_map.items()])
														
--- a/tmp_file/queshi_bili.py
+++ b/tmp_file/queshi_bili.py
@@ -1,38 +0,0 @@
 
															-import datetime
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
														
 
															-    """
														
 
															-    获取俩个时间之间的个数
														
 
															-    :return: 查询时间间隔
														
 
															-    """
														
 
															-    delta = end_time - start_time
														
 
															-    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
														
 
															-
														
 
															-    return abs(int(total_seconds / time_space))
														
 
															-
														
 
															-
														
 
															-df = pd.read_csv("D:\data\白玉山后评估数据资料\十分钟.csv", encoding='utf8')
														
 
															-
														
 
															-df['时间'] = pd.to_datetime(df['时间'])
														
 
															-df['plus_10min'] = df['时间'] + pd.Timedelta(minutes=10)
														
 
															-
														
 
															-names = list(set(df['设备名称']))
														
 
															-names.sort()
														
 
															-
														
 
															-count = get_time_space_count(datetime.datetime.strptime('2023-09-01 00:00:00', '%Y-%m-%d %H:%M:%S'),
														
 
															-                             datetime.datetime.strptime('2024-09-01 00:00:00', '%Y-%m-%d %H:%M:%S'), 600)
														
 
															-
														
 
															-result_df = pd.DataFrame(df['设备名称'].value_counts())
														
 
															-result_df.reset_index(inplace=True)
														
 
															-result_df.columns = ['风机', '数量']
														
 
															-
														
 
															-result_df['总数'] = count
														
 
															-
														
 
															-result_df['完整度'] = result_df['数量'].apply(lambda x: round(x * 100 / count, 2))
														
 
															-
														
 
															-result_df.sort_values(by=['风机'], inplace=True)
														
 
															-
														
 
															-print(result_df)
														
--- a/tmp_file/read_and_draw_png.py
+++ b/tmp_file/read_and_draw_png.py
@@ -1,42 +0,0 @@
 
															-import multiprocessing
														
 
															-from os import *
														
 
															-
														
 
															-from etl.wind_power.min_sec.ClassIdentifier import ClassIdentifier
														
 
															-from utils.draw.draw_file import scatter
														
 
															-from utils.file.trans_methods import read_file_to_df
														
 
															-
														
 
															-
														
 
															-def draw(file, fengchang='测试'):
														
 
															-    name = path.basename(file).split('.')[0]
														
 
															-    df = read_file_to_df(file)
														
 
															-    del df['lab']
														
 
															-    identifier = ClassIdentifier(wind_turbine_number='test', origin_df=df, rated_power=5000, cut_out_speed=20,
														
 
															-                                 active_power='active_power', wind_velocity='wind_velocity',
														
 
															-                                 pitch_angle_blade='pitch_angle_blade_1')
														
 
															-    df = identifier.run()
														
 
															-
														
 
															-    df.loc[df['active_power'] <= 0, 'lab'] = -1
														
 
															-
														
 
															-    print(df.groupby('lab').count())
														
 
															-    color_map = {-1: 'red', 0: 'green', 1: 'blue', 2: 'black', 3: 'orange', 4: 'magenta'}
														
 
															-    c = df['lab'].map(color_map)
														
 
															-
														
 
															-    # -1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电
														
 
															-    legend_map = {"停机": 'red', "好点": 'green', "欠发": 'blue', "超发": 'black', "额定风速以上的超发": 'orange',
														
 
															-                  "限电": 'magenta'}
														
 
															-    scatter(name, x_label='风速', y_label='有功功率', x_values=df['wind_velocity'].values,
														
 
															-            y_values=df['active_power'].values, color=c, col_map=legend_map,
														
 
															-            save_file_path=path.dirname(
														
 
															-                path.dirname(__file__)) + sep + "tmp" + sep + str(fengchang) + sep + name + '结果.png')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    read_dir = r"D:\data\logs\matlib-test"
														
 
															-
														
 
															-    files = [read_dir + sep + i for i in listdir(read_dir)]
														
 
															-
														
 
															-    if len(files) == 1:
														
 
															-        draw(files[0], "和风元宝山4")
														
 
															-    else:
														
 
															-        with multiprocessing.Pool(4) as pool:
														
 
															-            pool.starmap(draw, [(file, "和风元宝山4") for file in files])
														
--- a/tmp_file/select_part_cols.py
+++ b/tmp_file/select_part_cols.py
@@ -1,27 +0,0 @@
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-from os import *
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-read_dir = r'/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF01000010-WOB000002_ZY24年7-9月秒级/second'
														
 
															-save_dir = r'/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF01000010-WOB000002_ZY24年7-9月秒级/second_select_yaw_error1_20241014'
														
 
															-
														
 
															-
														
 
															-def read_and_select_and_save(file):
														
 
															-    df = pd.read_csv(read_dir + sep + file,
														
 
															-                     usecols=['active_power', 'wind_velocity', 'pitch_angle_blade_1', 'yaw_error1', 'lab'])
														
 
															-    df = df[df['yaw_error1'] <= 360]
														
 
															-    df['yaw_error1'] = df['yaw_error1'].apply(lambda x: x - 360 if 180 <= x <= 360 else x)
														
 
															-    condition = (df['active_power'] > 0) & (df['wind_velocity'] > 0)
														
 
															-    df = df[condition]
														
 
															-
														
 
															-    df.to_csv(path.join(save_dir, file), index=False, encoding='utf8')
														
 
															-    print(f'{file}处理完成')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    begin = datetime.datetime.now()
														
 
															-    with multiprocessing.Pool(32) as pool:
														
 
															-        pool.starmap(read_and_select_and_save, [(file,) for file in listdir(read_dir)])
														
 
															-    print(f'总耗时:{datetime.datetime.now() - begin}')
														
--- a/tmp_file/test_wave.py
+++ b/tmp_file/test_wave.py
@@ -1,19 +0,0 @@
 
															-import sys
														
 
															-from os import *
														
 
															-
														
 
															-sys.path.insert(0, path.abspath(__file__).split("tmp_file")[0])
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    env = 'dev'
														
 
															-    if len(sys.argv) >= 2:
														
 
															-        env = sys.argv[1]
														
 
															-
														
 
															-    conf_path = path.abspath(__file__).split("tmp_file")[0] + f"/conf/etl_config_{env}.yaml"
														
 
															-    environ['ETL_CONF'] = conf_path
														
 
															-    environ['env'] = env
														
 
															-    from etl.wind_power.wave.WaveTrans import WaveTrans
														
 
															-    test = WaveTrans('SKF001', r'/home/wzl/test_data/sdk_data/sdk_data', r'/home/wzl/test_data/sdk_data')
														
 
															-    # test = WaveTrans('SKF001', r'D:\data\sdk_data\sdk_data_less', r'/home/wzl/test_data/sdk_data')
														
 
															-
														
 
															-    test.run()
														
--- a/tmp_file/zibo_guzhang_select_time.py
+++ b/tmp_file/zibo_guzhang_select_time.py
@@ -1,55 +0,0 @@
 
															-from datetime import datetime, timedelta
														
 
															-
														
 
															-from utils.file.trans_methods import *
														
 
															-
														
 
															-
														
 
															-def convert_and_calculate_time_range(time_str):
														
 
															-    # 解析原始字符串
														
 
															-    date_part = time_str[:6]
														
 
															-    time_part = time_str[7:]
														
 
															-
														
 
															-    # 将短日期格式转换为完整年份
														
 
															-    year = '20' + date_part[:2]
														
 
															-    month = date_part[2:4]
														
 
															-    day = date_part[4:]
														
 
															-
														
 
															-    hour = time_part[:2]
														
 
															-    minute = time_part[2:]
														
 
															-
														
 
															-    # 创建 datetime 对象
														
 
															-    base_time = datetime.datetime.strptime(f"{year}-{month}-{day} {hour}:{minute}", "%Y-%m-%d %H:%M")
														
 
															-
														
 
															-    # 计算时间区间
														
 
															-    start_time = base_time.replace(second=0, microsecond=0) - timedelta(minutes=2)
														
 
															-    end_time = base_time.replace(second=0, microsecond=0) + timedelta(minutes=3)
														
 
															-
														
 
															-    return base_time.strftime("%Y-%m-%d %H:%M"), start_time.strftime("%Y-%m-%d %H:%M:%S"), end_time.strftime(
														
 
															-        "%Y-%m-%d %H:%M:%S")
														
 
															-
														
 
															-
														
 
															-all_df = read_file_to_df(r"D:\data\淄博\故障记录_filtered.csv")
														
 
															-all_df['激活时间'] = pd.to_datetime(all_df['激活时间'])
														
 
															-
														
 
															-all_files = read_excel_files(r"D:\data\淄博\淄博风场buffer文件(1)")
														
 
															-
														
 
															-dfs = pd.DataFrame()
														
 
															-
														
 
															-for file in all_files:
														
 
															-    base_name = path.basename(file)
														
 
															-    if base_name.startswith("b"):
														
 
															-        try:
														
 
															-            turbnine_no = int(base_name.split("_")[0].replace("b", ""))
														
 
															-            base_time, start_time, end_time = convert_and_calculate_time_range(
														
 
															-                base_name.replace(base_name.split("_")[0] + "_", "")[0:11])
														
 
															-        except Exception as e:
														
 
															-            print("error:", file)
														
 
															-            raise e
														
 
															-
														
 
															-        condation1 = (all_df['激活时间'] >= start_time) & (all_df['风机名'] == turbnine_no)
														
 
															-        condation2 = (all_df['激活时间'] < end_time) & (all_df['风机名'] == turbnine_no)
														
 
															-        condation = condation1 & condation2
														
 
															-        dfs = pd.concat([dfs, all_df[condation]])
														
 
															-
														
 
															-dfs.drop_duplicates(inplace=True)
														
 
															-
														
 
															-dfs.to_csv(r"D:\data\淄博\result.csv", encoding='utf8', index=False)
														
--- a/tmp_file/偏航误差验证.py
+++ b/tmp_file/偏航误差验证.py
@@ -1,98 +0,0 @@
 
															-import os
														
 
															-import sys
														
 
															-from concurrent.futures.thread import ThreadPoolExecutor
														
 
															-
														
 
															-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
														
 
															-
														
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-from utils.file.trans_methods import read_files, copy_to_new, read_excel_files, read_file_to_df
														
 
															-from utils.zip.unzip import get_desc_path, unzip
														
 
															-
														
 
															-
														
 
															-def get_real_path(win_path):
														
 
															-    return win_path.replace(r'Z:', r'/data/download').replace("\\", '/')
														
 
															-
														
 
															-
														
 
															-def unzip_or_remove(file, tmp_dir):
														
 
															-    if str(file).endswith("zip"):
														
 
															-        unzip(file, tmp_dir)
														
 
															-    else:
														
 
															-        copy_to_new(file, file.replace(file, tmp_dir))
														
 
															-
														
 
															-
														
 
															-def read_file_to_df_and_select(file_path):
														
 
															-    select_cols = ['Timestamp', 'Los', 'Distance', 'HWS(hub)', 'HWS(hub)status', 'DIR(hub)', 'DIR(hub)status']
														
 
															-    df = read_file_to_df(file_path, read_cols=select_cols)
														
 
															-    condition1 = df['HWS(hub)status'] > 0.8
														
 
															-    condition2 = df['DIR(hub)status'] > 0.8
														
 
															-    condition3 = df['Distance'].isin([70, 90])
														
 
															-
														
 
															-    df = df[condition1 & condition2 & condition3]
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def read_month_data_and_select(month, files, gonglv_df):
														
 
															-    with ThreadPoolExecutor(max_workers=10) as executor:
														
 
															-        dfs = list(executor.map(read_file_to_df_and_select, files))
														
 
															-
														
 
															-    df = pd.concat(dfs, ignore_index=True)
														
 
															-
														
 
															-    df['Time1'] = df['Timestamp'].apply(lambda x: x.split('.')[0])
														
 
															-    df['Time1'] = pd.to_datetime(df['Time1'], errors='coerce')
														
 
															-    df['Time1'] = df['Time1'].apply(
														
 
															-        lambda x: x + datetime.timedelta(seconds=10 - x.second % 10) if x.second % 10 != 0 else x)
														
 
															-    del gonglv_df['month']
														
 
															-    result_df = pd.merge(df, gonglv_df, left_on='Time1', right_on='Time1')
														
 
															-    result_df.sort_values(by='Time1', inplace=True)
														
 
															-    save_dir = get_real_path('Z:\偏航误差验证数据\整理结果')
														
 
															-    # save_dir = r'D:\data\pianhang\result'
														
 
															-    result_df.to_csv(os.path.join(save_dir, f'{month}.csv'), encoding='utf8', index=False)
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    read_dir = 'Z:\偏航误差验证数据\新华佳县雷达数据'
														
 
															-    read_dir = get_real_path(read_dir)
														
 
															-
														
 
															-    tmp_dir = get_real_path(r'Z:\偏航误差验证数据\tmp_data')
														
 
															-    gonglv_dir = get_real_path(r'Z:\偏航误差验证数据\陕西建工陕西智华\report\output')
														
 
															-
														
 
															-    # read_dir = r'D:\data\pianhang\1'
														
 
															-    # tmp_dir = r'D:\data\pianhang\tmp'
														
 
															-    # gonglv_dir = r'D:\data\pianhang\2'
														
 
															-
														
 
															-    gonglv_files = read_excel_files(gonglv_dir)
														
 
															-
														
 
															-    with multiprocessing.Pool(20) as pool:
														
 
															-        dfs = pool.starmap(read_file_to_df, [(i, ['collect_time', 'a0216']) for i in gonglv_files])
														
 
															-
														
 
															-    gonglv_df = pd.concat(dfs, ignore_index=True)
														
 
															-    gonglv_df.columns = ['Time1', '功率']
														
 
															-    gonglv_df['Time1'] = pd.to_datetime(gonglv_df['Time1'], errors='coerce')
														
 
															-    gonglv_df['month'] = gonglv_df['Time1'].dt.month
														
 
															-
														
 
															-    all_files = read_files(tmp_dir)
														
 
															-
														
 
															-    all_files = [i for i in all_files if str(os.path.basename(i)).startswith('WindSpeed2024')]
														
 
															-
														
 
															-    # with multiprocessing.Pool(20) as pool:
														
 
															-    #     pool.starmap(unzip_or_remove, [(file, tmp_dir) for file in all_files])
														
 
															-
														
 
															-    month_map = dict()
														
 
															-    for file in all_files:
														
 
															-        base_name = os.path.basename(file)
														
 
															-        month = base_name[13:15]
														
 
															-        if month in month_map.keys():
														
 
															-            month_map[month].append(file)
														
 
															-        else:
														
 
															-            month_map[month] = [file]
														
 
															-
														
 
															-    excel_files = read_excel_files(tmp_dir)
														
 
															-
														
 
															-    with multiprocessing.Pool(5) as pool:
														
 
															-        pool.starmap(read_month_data_and_select,
														
 
															-                     [(month, files, gonglv_df[gonglv_df['month'] == int(month)]) for month, files in
														
 
															-                      month_map.items()])
														
--- a/tmp_file/光伏箱体.py
+++ b/tmp_file/光伏箱体.py
@@ -1,155 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-Created on Tue Jul  9 16:28:48 2024
														
 
															-
														
 
															-@author: Administrator
														
 
															-"""
														
 
															-
														
 
															-from datetime import datetime
														
 
															-from os import *
														
 
															-
														
 
															-import chardet
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
														
 
															-        encoding = 'gb18030'
														
 
															-    return encoding
														
 
															-
														
 
															-
														
 
															-# 读取数据到df
														
 
															-def read_file_to_df(file_path, read_cols=list(), header=0):
														
 
															-    df = pd.DataFrame()
														
 
															-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
														
 
															-        encoding = detect_file_encoding(file_path)
														
 
															-        end_with_gz = str(file_path).lower().endswith("gz")
														
 
															-        if read_cols:
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
														
 
															-        else:
														
 
															-
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
														
 
															-
														
 
															-    else:
														
 
															-        xls = pd.ExcelFile(file_path)
														
 
															-        # 获取所有的sheet名称
														
 
															-        sheet_names = xls.sheet_names
														
 
															-        for sheet in sheet_names:
														
 
															-            if read_cols:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
														
 
															-            else:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-    # 读取所有文件
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def read_and_save_csv(file_path):
														
 
															-    begin = datetime.now()
														
 
															-    base_name = path.basename(file_path)
														
 
															-    print('开始', base_name)
														
 
															-
														
 
															-    df1 = read_file_to_df(file_path + "箱变（1-8号逆变器）数据1.xls")
														
 
															-    del df1['Unnamed: 0']
														
 
															-    df1['时间'] = pd.to_datetime(df1['时间'])
														
 
															-    df1.set_index(keys='时间', inplace=True)
														
 
															-
														
 
															-    df2 = read_file_to_df(file_path + "箱变（9-16号逆变器）数据1.xls")
														
 
															-    del df2['Unnamed: 0']
														
 
															-    df2['时间'] = pd.to_datetime(df2['时间'])
														
 
															-    df2.set_index(keys='时间', inplace=True)
														
 
															-
														
 
															-    df3 = read_file_to_df(file_path + "箱变（1-8号逆变器）数据2.xls")
														
 
															-    del df3['Unnamed: 0']
														
 
															-    df3['时间'] = pd.to_datetime(df3['时间'])
														
 
															-    df3.set_index(keys='时间', inplace=True)
														
 
															-
														
 
															-    df4 = read_file_to_df(file_path + "箱变（9-16号逆变器）数据2.xls")
														
 
															-    del df4['Unnamed: 0']
														
 
															-    df4['时间'] = pd.to_datetime(df4['时间'])
														
 
															-    df4.set_index(keys='时间', inplace=True)
														
 
															-
														
 
															-    df = pd.concat([df1, df2, df3, df4], axis=1)
														
 
															-    df.reset_index(inplace=True)
														
 
															-    columns = list(df.columns)
														
 
															-    columns.sort()
														
 
															-
														
 
															-    print(df.columns)
														
 
															-
														
 
															-    df = df[columns]
														
 
															-    df.sort_values(by='时间', inplace=True)
														
 
															-
														
 
															-    df.to_csv(path.join(r'D:\trans_data\大唐玉湖性能分析离线分析', '05整理数据', base_name + '_箱变.csv'), encoding='utf-8',
														
 
															-              index=False)
														
 
															-    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-
														
 
															-    path = r'D:\trans_data\大唐玉湖性能分析离线分析\test'
														
 
															-    all_files = read_excel_files(path)
														
 
															-
														
 
															-    all_paths = set()
														
 
															-    for file in all_files:
														
 
															-        base_name = path.basename(file).split("箱变")[0]
														
 
															-        base_path = path.dirname(file)
														
 
															-        if base_name not in all_paths:
														
 
															-            all_paths.add(path.join(base_path, base_name))
														
 
															-
														
 
															-    all_datas = list(all_paths)
														
 
															-    all_datas.sort()
														
 
															-
														
 
															-    print(all_datas)
														
 
															-    # with Pool(1) as pool:
														
 
															-    #     pool.starmap(read_and_save_csv, [(i,) for i in all_datas])
														
--- a/tmp_file/列名包含数据处理.py
+++ b/tmp_file/列名包含数据处理.py
@@ -1,22 +0,0 @@
 
															-import pandas as pd
														
 
															-
														
 
															-path = r'd://data//11.csv'
														
 
															-
														
 
															-df = pd.read_csv(path, encoding='gb18030')
														
 
															-df.reset_index(inplace=True)
														
 
															-print(df.columns)
														
 
															-df.columns = [i.replace('()', '') for i in df.columns]
														
 
															-wind_names = set([i.split('#-')[0] for i in df.columns if i.find('#-') > -1])
														
 
															-print(df.columns)
														
 
															-print(wind_names)
														
 
															-for wind_name in wind_names:
														
 
															-    select_cols = [i for i in df.columns if str(i).startswith(wind_name)]
														
 
															-    print(select_cols)
														
 
															-    select_cols.insert(0, '时间')
														
 
															-    print(select_cols)
														
 
															-    df_temp = df[select_cols]
														
 
															-    df_temp.columns = [i.split('#-')[-1] for i in df_temp.columns]
														
 
															-
														
 
															-    df_temp.sort_values(by='时间', inplace=True)
														
 
															-
														
 
															-    df_temp.to_csv("d://data//najiade//"+str(wind_name) + '.csv', encoding='utf8', index=False)
														
--- a/tmp_file/吉山批次处理并重新存数据库.py
+++ b/tmp_file/吉山批次处理并重新存数据库.py
@@ -1,40 +0,0 @@
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-import sys
														
 
															-from os import *
														
 
															-
														
 
															-sys.path.insert(0, path.abspath(__file__).split("tmp_file")[0])
														
 
															-
														
 
															-from service.trans_service import save_df_to_db, drop_table, creat_min_sec_table
														
 
															-from utils.file.trans_methods import read_file_to_df, read_files
														
 
															-
														
 
															-
														
 
															-def read_and_exec(file_path):
														
 
															-    begin = datetime.datetime.now()
														
 
															-    print("开始执行:", path.basename(file_path))
														
 
															-    df = read_file_to_df(file_path)
														
 
															-    df['yaw_error1'] = df['true_wind_direction'] - 180
														
 
															-    df.to_csv(file_path, index=False, encoding='utf8')
														
 
															-    creat_min_sec_table()
														
 
															-    save_df_to_db('WOF079200018-WOB000012_second', df)
														
 
															-    print("结束执行:", path.basename(file_path), ",耗时:", datetime.datetime.now() - begin)
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    begin = datetime.datetime.now()
														
 
															-    env = 'prod'
														
 
															-    if len(sys.argv) >= 2:
														
 
															-        env = sys.argv[1]
														
 
															-
														
 
															-    conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
														
 
															-    environ['ETL_CONF'] = conf_path
														
 
															-    environ['env'] = env
														
 
															-
														
 
															-    drop_table("WOF079200018-WOB000012_second")
														
 
															-
														
 
															-    read_dir = r'/data/download/collection_data/2完成/吉山风电场-江西-大唐/清理数据/WOF079200018-WOB000012_JS一期1秒24.8-10/second'
														
 
															-
														
 
															-    all_files = read_files(read_dir)
														
 
															-    with multiprocessing.Pool(24) as pool:
														
 
															-        pool.map(read_and_exec, all_files)
														
 
															-    print("总耗时:", datetime.datetime.now() - begin)
														
--- a/tmp_file/对比文件夹列名差值.py
+++ b/tmp_file/对比文件夹列名差值.py
@@ -1,97 +0,0 @@
 
															-from utils.file.trans_methods import *
														
 
															-
														
 
															-
														
 
															-def boolean_is_check_data(df_cols):
														
 
															-    fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机', '工作模式']
														
 
															-
														
 
															-    df_cols = [str(i).split('_')[-1] for i in df_cols]
														
 
															-    for fault in fault_list:
														
 
															-        if fault in df_cols:
														
 
															-            return True
														
 
															-
														
 
															-    return False
														
 
															-
														
 
															-
														
 
															-def compareTwoFolders(list1, other_dfs):
														
 
															-    for is_falut in [True]:
														
 
															-        result_df = pd.DataFrame()
														
 
															-        # for df1 in df1s:
														
 
															-        #     tmp_list = [str(i).split('_')[-1] for i in list(df1.columns) if i != 'sheet_name']
														
 
															-        #     if is_falut:
														
 
															-        #         if boolean_is_check_data(df1.columns):
														
 
															-        #             list1.extend(tmp_list)
														
 
															-        #     else:
														
 
															-        #         if not boolean_is_check_data(df1.columns):
														
 
															-        #             list1.extend(tmp_list)
														
 
															-
														
 
															-        set1 = set(list1)
														
 
															-
														
 
															-        list1 = list(set1)
														
 
															-        list1.sort()
														
 
															-
														
 
															-        result_df['完整列名'] = list1
														
 
															-
														
 
															-        for wind_name, dfs in other_dfs.items():
														
 
															-
														
 
															-            list2 = list()
														
 
															-            for df in dfs:
														
 
															-                tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
														
 
															-                if is_falut:
														
 
															-                    if boolean_is_check_data(df.columns):
														
 
															-                        list2.extend(tmp_list)
														
 
															-                else:
														
 
															-                    if not boolean_is_check_data(df.columns):
														
 
															-                        list2.extend(tmp_list)
														
 
															-
														
 
															-            set2 = set(list2)
														
 
															-            list2 = list(set2)
														
 
															-            list2.sort()
														
 
															-
														
 
															-            list3 = list(set1 - set2)
														
 
															-            list3.sort()
														
 
															-
														
 
															-            # list4 = list(set2 - set1)
														
 
															-            # list4.sort()
														
 
															-            # print(list3)
														
 
															-            # print(list4)
														
 
															-
														
 
															-            max_count = len(list1)
														
 
															-            list1.extend([''] * (max_count - len(list1)))
														
 
															-            list2.extend([''] * (max_count - len(list2)))
														
 
															-            list3.extend([''] * (max_count - len(list3)))
														
 
															-            # list4.extend([''] * (max_count - len(list4)))
														
 
															-
														
 
															-            result_df[str(wind_name) + '字段'] = list2
														
 
															-            result_df[str(wind_name) + '比完整列名少字段'] = list3
														
 
															-            # result_df['风机' + str(wind_name) + '_比风机1多字段'] = list4
														
 
															-
														
 
															-        file_name = 'col_compare.csv' if not is_falut else 'col_compare_fault.csv'
														
 
															-
														
 
															-        result_df.to_csv(file_name, encoding='utf-8', index=False)
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    begin = datetime.datetime.now()
														
 
															-    dir2 = r'D:\data\新华水电\风机SCADA数据'
														
 
															-    files2 = read_excel_files(dir2)
														
 
															-
														
 
															-    other_dfs = dict()
														
 
															-    list1 = list()
														
 
															-    for file in files2:
														
 
															-        month = path.basename(path.dirname(path.dirname(file)))[0:2]
														
 
															-        wind_name = month + path.basename(path.dirname(file)).split('#')[0] + '号风机'
														
 
															-        df = read_file_to_df(file, nrows=1)
														
 
															-        if boolean_is_check_data(df.columns):
														
 
															-            list1.extend([str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name'])
														
 
															-        if wind_name in other_dfs.keys():
														
 
															-            other_dfs[wind_name].append(df)
														
 
															-        else:
														
 
															-            other_dfs[wind_name] = [df]
														
 
															-
														
 
															-    # with multiprocessing.Pool(10) as pool:
														
 
															-    #     df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2])
														
 
															-    #
														
 
															-    list1 = [i for i in list(set(list1)) if i != 'sheet_name']
														
 
															-    compareTwoFolders(list1, other_dfs)
														
 
															-
														
 
															-    print(datetime.datetime.now() - begin)
														
--- a/tmp_file/年度汇总平均缺失率.py
+++ b/tmp_file/年度汇总平均缺失率.py
@@ -1,96 +0,0 @@
 
															-import calendar
														
 
															-import datetime
														
 
															-import math
														
 
															-import multiprocessing
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-from utils.file.trans_methods import read_excel_files, read_file_to_df
														
 
															-
														
 
															-
														
 
															-def get_year_days(year):
														
 
															-    now_year = datetime.datetime.now().year
														
 
															-
														
 
															-    if now_year == year:
														
 
															-        today = datetime.date.today()
														
 
															-        # 获取昨天的日期
														
 
															-        yesterday = today - datetime.timedelta(days=1)
														
 
															-        # 获取今年的第一天
														
 
															-        start_of_year = datetime.date(yesterday.year, 1, 1)
														
 
															-        # 计算从年初到昨天的天数
														
 
															-        return (yesterday - start_of_year).days + 1
														
 
															-
														
 
															-    if calendar.isleap(year):
														
 
															-        return 366
														
 
															-    else:
														
 
															-        return 365
														
 
															-
														
 
															-
														
 
															-def save_percent(value, save_decimal=7):
														
 
															-    return round(value, save_decimal) * 100
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-
														
 
															-    read_dir = r'D:\data\综合报表22-24年'
														
 
															-
														
 
															-    all_fils = read_excel_files(read_dir)
														
 
															-
														
 
															-    with multiprocessing.Pool(6) as pool:
														
 
															-        dfs = pool.map(read_file_to_df, all_fils)
														
 
															-
														
 
															-    df = pd.concat(dfs, ignore_index=True)
														
 
															-    del_cols = ['Unnamed: 0', '序号', 'times']
														
 
															-
														
 
															-    for col in del_cols:
														
 
															-        del df[col]
														
 
															-
														
 
															-    df = df.query("风机 != '完整'")
														
 
															-
														
 
															-    numic_cols = ['数据有效性', '历史总有功发电量', '历史总有功耗电量',
														
 
															-                  '查询区间有功发电量', '查询区间有功耗电量', '历史总无功发电量', '历史总无功耗电量',
														
 
															-                  '查询区间无功发电量',
														
 
															-                  '查询区间无功耗电量', '时间可利用率', '最大风速', '最小风速', '平均风速', '空气密度', '最大有功功率',
														
 
															-                  '最小有功功率', '平均有功功率', '平均无功功率', '电网停机次数', '累计运行时间', '有效风时数',
														
 
															-                  '满发时间',
														
 
															-                  '启动时间', '启动次数', '并网发电时间', '等效发电时间', '正常发电时间', '调度限功率发电时间',
														
 
															-                  '风机限功率发电时间',
														
 
															-                  '停机时间', '维护停机时间', '故障停机时间', '调度停机时间', '气象停机时间', '电网停机时间',
														
 
															-                  '远程停机时间',
														
 
															-                  '待机时间', '户外平均温度', '机舱最高温度', '维护停机次数', '气象停机次数', '故障停机次数',
														
 
															-                  '报警发电时间',
														
 
															-                  '报警发电次数', '偏航时长', '偏航次数', '通讯中断时间', '通讯故障次数', '调度限功率发电损失电量',
														
 
															-                  '风机限功率发电损失电量', '气象停机损失电量', '调度限功率停机损失电量', '远程停机损失电量',
														
 
															-                  '维护停机损失电量',
														
 
															-                  '风机故障停机损失电量', '电网停机损失电量']
														
 
															-
														
 
															-    for numic_col in numic_cols:
														
 
															-        df[numic_col] = pd.to_numeric(df[numic_col], errors='coerce')
														
 
															-
														
 
															-    cols = df.columns
														
 
															-    df['year'] = pd.to_datetime(df['时间'], errors='coerce').dt.year
														
 
															-
														
 
															-    group_df = df.groupby(by=['year', '风机']).count()
														
 
															-    group_df.reset_index(inplace=True)
														
 
															-    count_df = pd.DataFrame(group_df)
														
 
															-
														
 
															-    # now_df.to_csv('聚合后.csv', encoding='utf-8', index=False)
														
 
															-
														
 
															-    years = count_df['year'].unique()
														
 
															-    wind_names = count_df['风机'].unique()
														
 
															-    numic_cols.insert(0, '时间')
														
 
															-
														
 
															-    result_df = pd.DataFrame()
														
 
															-    for year in years:
														
 
															-        year_days = get_year_days(year)
														
 
															-        for wind_name in wind_names:
														
 
															-            count = count_df[(count_df['year'] == year) & (count_df['风机'] == wind_name)][numic_cols].values[0].sum()
														
 
															-            print(year, wind_name, count, len(numic_cols) * year_days)
														
 
															-            now_df = pd.DataFrame()
														
 
															-            now_df['时间'] = [int(year)]
														
 
															-            now_df['风机'] = [wind_name]
														
 
															-            now_df['缺失均值'] = [save_percent(count / (len(numic_cols) * year_days))]
														
 
															-
														
 
															-            result_df = pd.concat([result_df, now_df])
														
 
															-
														
 
															-    result_df.to_csv('年度平均缺失率.csv', encoding='utf-8', index=False)
														
--- a/tmp_file/张崾先26故障.py
+++ b/tmp_file/张崾先26故障.py
@@ -1,31 +0,0 @@
 
															-import pandas as pd
														
 
															-
														
 
															-df = pd.read_csv(r'C:\Users\Administrator\Documents\WeChat Files\anmox-\FileStorage\File\2024-12\26故障.csv',
														
 
															-                 encoding='gbk')
														
 
															-df['开始时间'] = pd.to_datetime(df['开始时间'], errors='coerce')
														
 
															-df['结束时间'] = pd.to_datetime(df['结束时间'], errors='coerce')
														
 
															-time_df = pd.DataFrame(df.groupby(['开始时间'])['结束时间'].max())
														
 
															-time_df.reset_index(inplace=True)
														
 
															-time_df.sort_values(by='开始时间', inplace=True)
														
 
															-
														
 
															-datas = set()
														
 
															-max_row = None
														
 
															-for index, row in time_df.iterrows():
														
 
															-    if index == 0:
														
 
															-        datas.add((row['开始时间'], row['结束时间']))
														
 
															-        max_row = row
														
 
															-        continue
														
 
															-
														
 
															-    if row['结束时间'] > max_row['结束时间']:
														
 
															-        datas.add((row['开始时间'], row['结束时间']))
														
 
															-        max_row = row
														
 
															-
														
 
															-result_df = pd.DataFrame()
														
 
															-for begin, end in datas:
														
 
															-    print(begin, end)
														
 
															-    now_df = df[(df['开始时间'] == begin) & (df['结束时间'] == end)]
														
 
															-    now_df = now_df.tail(1)
														
 
															-    result_df = pd.concat([result_df, now_df])
														
 
															-
														
 
															-result_df.sort_values(by='开始时间', inplace=True)
														
 
															-result_df.to_csv(r'd:\data\26故障_new.csv', encoding='utf8', index=False)
														
--- a/tmp_file/张崾先筛选20241210.py
+++ b/tmp_file/张崾先筛选20241210.py
@@ -1,46 +0,0 @@
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
														
 
															-
														
 
															-from utils.file.trans_methods import read_file_to_df, read_excel_files
														
 
															-
														
 
															-# read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241209'
														
 
															-
														
 
															-# save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/变桨-20241210'
														
 
															-
														
 
															-# user_cols = ['Time', '机舱外温度', '桨叶角度A', '桨叶角度B', '桨叶角度C',
														
 
															-#              '轴1电机电流', '轴2电机电流', '轴3电机电流',
														
 
															-#              '轴1电机温度', '轴2电机温度', '轴3电机温度']
														
 
															-
														
 
															-
														
 
															-read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241210'
														
 
															-
														
 
															-save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/偏航-20241210'
														
 
															-
														
 
															-user_cols = ['Time', '实际风速', '偏航误差', '电缆扭角', '激活偏航解缆阀','激活顺时针偏航','激活逆时针偏航']
														
 
															-
														
 
															-os.makedirs(save_dir, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def read_and_save(file_path, read_dir, save_dir):
														
 
															-    begin = datetime.datetime.now()
														
 
															-    df = read_file_to_df(file_path, read_cols=user_cols)
														
 
															-    df['Time'] = pd.to_datetime(df['Time'], errors='coerce')
														
 
															-    df.sort_values(by=['Time'], inplace=True)
														
 
															-    df.to_csv(os.path.join(save_dir, os.path.basename(file_path)), index=False, encoding='utf8')
														
 
															-    print(os.path.basename(file_path), '耗时:', (datetime.datetime.now() - begin))
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    begin = datetime.datetime.now()
														
 
															-    all_files = read_excel_files(read_dir)
														
 
															-
														
 
															-    with multiprocessing.Pool(16) as pool:
														
 
															-        pool.starmap(read_and_save, [(file, read_dir, save_dir) for file in all_files])
														
 
															-
														
 
															-    print('总耗时:', (datetime.datetime.now() - begin))
														
--- a/tmp_file/张崾先统计-分钟.py
+++ b/tmp_file/张崾先统计-分钟.py
@@ -1,67 +0,0 @@
 
															-import multiprocessing
														
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-from utils.file.trans_methods import read_file_to_df
														
 
															-
														
 
															-
														
 
															-def save_percent(value, save_decimal=7):
														
 
															-    return round(value, save_decimal) * 100
														
 
															-
														
 
															-
														
 
															-def read_and_select(file_path, read_cols):
														
 
															-    result_df = pd.DataFrame()
														
 
															-    df = read_file_to_df(file_path, read_cols=read_cols)
														
 
															-    wind_name = os.path.basename(file_path).split('.')[0]
														
 
															-    df['风机号'] = wind_name
														
 
															-    df = df.query("(startTime>='2023-10-01 00:00:00') & (startTime<'2024-10-01 00:00:00')")
														
 
															-    count = 366 * 24 * 6  # 十分钟数据  2024年366天
														
 
															-    repeat_time_count = df.shape[0] - len(df['startTime'].unique())
														
 
															-    print(wind_name, count, repeat_time_count)
														
 
															-    result_df['风机号'] = [wind_name]
														
 
															-    result_df['重复率'] = [save_percent(repeat_time_count / count)]
														
 
															-    result_df['重复次数'] = [repeat_time_count]
														
 
															-    result_df['总记录数'] = [count]
														
 
															-
														
 
															-    for read_col in read_cols:
														
 
															-
														
 
															-        if read_col != 'startTime':
														
 
															-            df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
														
 
															-        else:
														
 
															-            df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
														
 
															-
														
 
															-    group_df = df.groupby(by=['风机号']).count()
														
 
															-    group_df.reset_index(inplace=True)
														
 
															-    count_df = pd.DataFrame(group_df)
														
 
															-    total_count = count_df[read_cols].values[0].sum()
														
 
															-    print(wind_name, total_count, count * len(read_cols))
														
 
															-    result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
														
 
															-    result_df['缺失数值'] = ['-'.join([str(count - i) for i in count_df[read_cols].values[0]])]
														
 
															-    del group_df
														
 
															-
														
 
															-    error_fengsu_count = df.query("(风速10min < 0) | (风速10min > 80)").shape[0]
														
 
															-    error_yougong_gonglv = df.query("(有功功率 < -200) | (有功功率 > 4800)").shape[0]
														
 
															-
														
 
															-    result_df['平均异常率'] = [save_percent((error_fengsu_count + error_yougong_gonglv) / (2 * count))]
														
 
															-
														
 
															-    return result_df
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    read_cols_str = 'startTime,有功功率,叶轮转速,发电机转速,风速10min,桨叶1角度,桨叶2角度,桨叶3角度,机舱位置,偏航误差,发电机轴承温度,机舱内温度,环境温度,发电机U相温度,发电机V相温度,发电机W相温度'
														
 
															-    read_cols = [i for i in read_cols_str.split(",") if i]
														
 
															-    read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/导出数据2'
														
 
															-
														
 
															-    files = os.listdir(read_dir)
														
 
															-
														
 
															-    with multiprocessing.Pool(16) as pool:
														
 
															-        dfs = pool.starmap(read_and_select, [(os.path.join(read_dir, i), read_cols) for i in files])
														
 
															-
														
 
															-    df = pd.concat(dfs, ignore_index=True)
														
 
															-    df.sort_values(by=['风机号'], inplace=True)
														
 
															-
														
 
															-    df.to_csv("张崾先统计-分钟.csv", encoding='utf8', index=False)
														
--- a/tmp_file/张崾先统计-秒.py
+++ b/tmp_file/张崾先统计-秒.py
@@ -1,92 +0,0 @@
 
															-import multiprocessing
														
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-from utils.file.trans_methods import read_file_to_df
														
 
															-
														
 
															-
														
 
															-def save_percent(value, save_decimal=7):
														
 
															-    return round(value, save_decimal) * 100
														
 
															-
														
 
															-
														
 
															-def read_and_select(file_path, read_cols):
														
 
															-    result_df = pd.DataFrame()
														
 
															-    df = read_file_to_df(file_path, read_cols=read_cols)
														
 
															-    wind_name = os.path.basename(file_path).split('.')[0]
														
 
															-    df['风机号'] = wind_name
														
 
															-    df = df.query("(Time>='2024-06-01 00:00:00') & (Time<'2024-12-01 00:00:00')")
														
 
															-    count = 15811200  # 1秒数据  半年
														
 
															-    repeat_time_count = df.shape[0] - len(df['Time'].unique())
														
 
															-    print(wind_name, count, repeat_time_count)
														
 
															-    result_df['风机号'] = [wind_name]
														
 
															-    result_df['重复率'] = [save_percent(repeat_time_count / count)]
														
 
															-    result_df['重复次数'] = [repeat_time_count]
														
 
															-    result_df['总记录数'] = [count]
														
 
															-
														
 
															-    for read_col in read_cols:
														
 
															-
														
 
															-        if read_col != 'Time':
														
 
															-            df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
														
 
															-        else:
														
 
															-            df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
														
 
															-
														
 
															-    group_df = df.groupby(by=['风机号']).count()
														
 
															-    group_df.reset_index(inplace=True)
														
 
															-    count_df = pd.DataFrame(group_df)
														
 
															-    total_count = count_df[read_cols].values[0].sum()
														
 
															-    print(wind_name, total_count, count * len(read_cols))
														
 
															-    result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
														
 
															-    result_df['缺失数值'] = ['-'.join([str(count - i) for i in count_df[read_cols].values[0]])]
														
 
															-    del group_df
														
 
															-
														
 
															-    fengsu_count = 0
														
 
															-    fengsu_cols = [i for i in read_cols if '风速' in i]
														
 
															-    fengsu_str = ''
														
 
															-    for col in fengsu_cols:
														
 
															-        now_count = df.query("(" + col + " < 0) | (" + col + " > 80)").shape[0]
														
 
															-        fengsu_count = fengsu_count + now_count
														
 
															-        fengsu_str = fengsu_str + ',' + col + ':' + str(fengsu_count)
														
 
															-    result_df['风速异常'] = [fengsu_str]
														
 
															-
														
 
															-    gonglv_cols = ['有功功率', '瞬时功率', '当前理论可发最大功率']
														
 
															-    gonglv_count = 0
														
 
															-    gonglv_str = ''
														
 
															-    for col in gonglv_cols:
														
 
															-        now_count = df.query("(" + col + " < -200) | (" + col + " > 4800)").shape[0]
														
 
															-        gonglv_count = gonglv_count + now_count
														
 
															-        gonglv_str = gonglv_str + ',' + col + ':' + str(gonglv_count)
														
 
															-    result_df['功率异常'] = [gonglv_str]
														
 
															-
														
 
															-    result_df['平均异常率'] = [
														
 
															-        save_percent((fengsu_count + fengsu_count) / ((len(fengsu_cols) + len(gonglv_cols)) * count))]
														
 
															-
														
 
															-    return result_df
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    read_cols = ['Time', '设备主要状态', '功率曲线风速', '湍流强度', '实际风速', '有功功率', '桨叶角度A', '桨叶角度B',
														
 
															-                 '桨叶角度C', '机舱内温度', '机舱外温度', '绝对风向', '机舱绝对位置', '叶轮转速', '发电机转速',
														
 
															-                 '瞬时风速',
														
 
															-                 '有功设定反馈', '当前理论可发最大功率', '空气密度', '偏航误差', '发电机扭矩', '瞬时功率', '风向1s',
														
 
															-                 '偏航压力', '桨叶1速度', '桨叶2速度', '桨叶3速度', '桨叶1角度给定', '桨叶2角度给定', '桨叶3角度给定',
														
 
															-                 '轴1电机电流', '轴2电机电流', '轴3电机电流', '轴1电机温度', '轴2电机温度', '轴3电机温度', '待机',
														
 
															-                 '启动',
														
 
															-                 '偏航', '并网', '限功率', '正常发电', '故障', '计入功率曲线', '运行发电机冷却风扇1',
														
 
															-                 '运行发电机冷却风扇2',
														
 
															-                 '激活偏航解缆阀', '激活偏航刹车阀', '激活风轮刹车阀', '激活顺时针偏航', '激活逆时针偏航', '电缆扭角']
														
 
															-
														
 
															-    read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241210'
														
 
															-
														
 
															-    files = os.listdir(read_dir)
														
 
															-
														
 
															-    with multiprocessing.Pool(4) as pool:
														
 
															-        dfs = pool.starmap(read_and_select, [(os.path.join(read_dir, i), read_cols) for i in files])
														
 
															-
														
 
															-    df = pd.concat(dfs, ignore_index=True)
														
 
															-    df.sort_values(by=['风机号'], inplace=True)
														
 
															-
														
 
															-    df.to_csv("张崾先统计-秒.csv", encoding='utf8', index=False)
														
--- a/tmp_file/张崾先震动_参数获取.py
+++ b/tmp_file/张崾先震动_参数获取.py
@@ -1,90 +0,0 @@
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-import os.path
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in os.listdir(path):
														
 
															-        item_path = os.path.join(path, item)
														
 
															-        if os.path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif os.path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path, filter_types=None):
														
 
															-    if filter_types is None:
														
 
															-        filter_types = ['xls', 'xlsx', 'csv', 'gz']
														
 
															-    if os.path.isfile(read_path):
														
 
															-        return [read_path]
														
 
															-
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=filter_types)
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的文件
														
 
															-def read_files(read_path, filter_types=None):
														
 
															-    if filter_types is None:
														
 
															-        filter_types = ['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar']
														
 
															-    if os.path.isfile(read_path):
														
 
															-        return [read_path]
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=filter_types)
														
 
															-
														
 
															-    return [path1 for paths in directory_dict.values() for path1 in paths if path1]
														
 
															-
														
 
															-
														
 
															-all_files = read_files(r'G:\CMS', ['txt'])
														
 
															-
														
 
															-
														
 
															-def get_line_count(file_path):
														
 
															-    with open(file_path, 'r', encoding='utf-8') as file:
														
 
															-        return sum(1 for _ in file)
														
 
															-
														
 
															-
														
 
															-def read_file_and_read_count(index, file_path, datas):
														
 
															-    if index % 10000 == 0:
														
 
															-        print(datetime.datetime.now(), index)
														
 
															-    base_name = os.path.basename(file_path).split('.')[0]
														
 
															-    cols = base_name.split('_')
														
 
															-
														
 
															-    cols.append(get_line_count(file_path))
														
 
															-    datas.append(cols)
														
 
															-
														
 
															-
														
 
															-def get_name(x):
														
 
															-    result_str = ''
														
 
															-    if x['col3'] != '无':
														
 
															-        result_str += x['col3']
														
 
															-    result_str += x['col2']
														
 
															-    if x['col4'] != '无':
														
 
															-        result_str += x['col4']
														
 
															-    result_str += x['col7']
														
 
															-    return result_str
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    datas = multiprocessing.Manager().list()
														
 
															-
														
 
															-    with multiprocessing.Pool(20) as pool:
														
 
															-        pool.starmap(read_file_and_read_count, [(i, file_path, datas) for i, file_path in enumerate(all_files)])
														
 
															-
														
 
															-    df = pd.DataFrame(datas, columns=[f'col{i}' for i in range(10)])
														
 
															-
														
 
															-    df['col8'] = pd.to_datetime(df['col8'], format='%Y%m%d%H%M%S', errors='coerce')
														
 
															-    df.sort_values(by=['col1', 'col8'], inplace=True)
														
 
															-    df['测点完整名称'] = df.apply(get_name, axis=1)
														
 
															-    df.to_csv('d://cms_data.csv', index=False, encoding='utf8')
														
--- a/tmp_file/张崾先风电场-故障整理.py
+++ b/tmp_file/张崾先风电场-故障整理.py
@@ -1,32 +0,0 @@
 
															-import multiprocessing
														
 
															-import os
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-read_dir = 'D:\data\张崾先风电场\故障事件数据'
														
 
															-save_dir = 'D:\data\崾先风电场\故障事件数据整理'
														
 
															-
														
 
															-print(os.listdir(read_dir))
														
 
															-
														
 
															-
														
 
															-def read_solve_data(file_dir):
														
 
															-    base_dir = os.path.basename(file_dir)
														
 
															-    df = pd.DataFrame()
														
 
															-    for file in os.listdir(file_dir):
														
 
															-        df = pd.concat([df, pd.read_csv(file_dir + '/' + file, encoding='gbk')])
														
 
															-
														
 
															-    df['开始时间'] = pd.to_datetime(df['开始时间'], errors='coerce')
														
 
															-    df = df.query("(开始时间 >= '2024-01-01 00:00:00') & (开始时间 < '2024-12-01 00:00:00')")
														
 
															-    df['month'] = df['开始时间'].dt.month
														
 
															-    months = df['month'].unique()
														
 
															-    for month in months:
														
 
															-        df_month = df[df['month'] == month]
														
 
															-        os.makedirs(save_dir + os.sep + base_dir, exist_ok=True)
														
 
															-        df_month.to_csv(save_dir + os.sep + base_dir + os.sep + str(month) + '.csv', index=False)
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    dirs = os.listdir(read_dir)
														
 
															-
														
 
															-    with multiprocessing.Pool(4) as pool:
														
 
															-        pool.map(read_solve_data, [read_dir + os.sep + i for i in dirs])
														
--- a/tmp_file/张崾先风电场-非点检字段获取.py
+++ b/tmp_file/张崾先风电场-非点检字段获取.py
@@ -1,108 +0,0 @@
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
														
 
															-
														
 
															-from utils.file.trans_methods import read_excel_files, copy_to_new, read_file_to_df
														
 
															-from utils.zip.unzip import unzip, get_desc_path, unrar
														
 
															-import pandas as pd
														
 
															-
														
 
															-read_cols = ['Time', '设备主要状态', '功率曲线风速', '湍流强度', '实际风速', '有功功率', '桨叶角度A', '桨叶角度B',
														
 
															-             '桨叶角度C', '机舱内温度', '机舱外温度', '绝对风向', '机舱绝对位置', '叶轮转速', '发电机转速', '瞬时风速',
														
 
															-             '有功设定反馈', '当前理论可发最大功率', '空气密度', '偏航误差', '发电机扭矩', '瞬时功率', '风向1s',
														
 
															-             '偏航压力', '桨叶1速度', '桨叶2速度', '桨叶3速度', '桨叶1角度给定', '桨叶2角度给定', '桨叶3角度给定',
														
 
															-             '轴1电机电流', '轴2电机电流', '轴3电机电流', '轴1电机温度', '轴2电机温度', '轴3电机温度', '待机', '启动',
														
 
															-             '偏航', '并网', '限功率', '正常发电', '故障', '计入功率曲线', '运行发电机冷却风扇1', '运行发电机冷却风扇2',
														
 
															-             '激活偏航解缆阀', '激活偏航刹车阀', '激活风轮刹车阀', '激活顺时针偏航', '激活逆时针偏航', '电缆扭角']
														
 
															-
														
 
															-read_path = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/sec'
														
 
															-save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241210'
														
 
															-tmp_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/tmp/second/excel_tmp'
														
 
															-
														
 
															-# read_path = r'D:\data\张崾先风电场\6'
														
 
															-# save_dir = r'D:\data\张崾先风电场\点检表以外测点儿-20241209'
														
 
															-# tmp_dir = r'D:\data\张崾先风电场\tmp'
														
 
															-
														
 
															-os.makedirs(tmp_dir, exist_ok=True)
														
 
															-os.makedirs(save_dir, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def get_and_remove(file):
														
 
															-    to_path = tmp_dir
														
 
															-    if str(file).endswith("zip"):
														
 
															-        if str(file).endswith("csv.zip"):
														
 
															-            copy_to_new(file, file.replace(read_path, to_path).replace("csv.zip", 'csv.gz'))
														
 
															-        else:
														
 
															-            desc_path = file.replace(read_path, to_path)
														
 
															-            is_success, e = unzip(file, get_desc_path(desc_path))
														
 
															-            if not is_success:
														
 
															-                # raise e
														
 
															-                pass
														
 
															-    elif str(file).endswith("rar"):
														
 
															-        desc_path = file.replace(read_path, to_path)
														
 
															-        unrar(file, get_desc_path(desc_path))
														
 
															-    else:
														
 
															-        copy_to_new(file, file.replace(read_path, to_path))
														
 
															-
														
 
															-
														
 
															-def get_resolve(file_path, exist_wind_names, map_lock):
														
 
															-    begin = datetime.datetime.now()
														
 
															-    df = read_file_to_df(file_path, read_cols=read_cols)
														
 
															-    wind_name = str(os.path.basename(file_path)[0:2])
														
 
															-    date = os.path.basename(file_path)[14:24]
														
 
															-    df['Time'] = df['Time'].apply(lambda x: date + ' ' + x)
														
 
															-    df = df[read_cols]
														
 
															-    with map_lock[str(wind_name)]:
														
 
															-        if wind_name in exist_wind_names:
														
 
															-            df.to_csv(save_dir + '/' + wind_name + '.csv', mode='a', index=False, header=False, encoding='utf8')
														
 
															-        else:
														
 
															-            df.to_csv(save_dir + '/' + wind_name + '.csv', index=False, encoding='utf8')
														
 
															-            exist_wind_names.append(wind_name)
														
 
															-
														
 
															-    print(os.path.basename(file_path), '执行完成,耗时:', get_haoshi(begin))
														
 
															-
														
 
															-
														
 
															-def sort_data(file_path):
														
 
															-    df = pd.read_csv(file_path, encoding='utf8')
														
 
															-    df['Time'] = pd.to_datetime(df['Time'], error='coerce')
														
 
															-    df.sort_values(by=['Time'], inplace=True)
														
 
															-    df.to_csv(file_path, index=False, encoding='utf8')
														
 
															-
														
 
															-
														
 
															-def get_haoshi(begin):
														
 
															-    return datetime.datetime.now() - begin
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    begin = datetime.datetime.now()
														
 
															-    # all_files = read_files(read_path)
														
 
															-    # split_count = get_available_cpu_count_with_percent(1 / 2)
														
 
															-    # all_arrays = split_array(all_files, split_count)
														
 
															-    #
														
 
															-    # for index, arr in enumerate(all_arrays):
														
 
															-    #     with multiprocessing.Pool(10) as pool:
														
 
															-    #         pool.starmap(get_and_remove, [(i,) for i in arr])
														
 
															-    #
														
 
															-    # print("移动完成,耗时:", get_haoshi(begin))
														
 
															-
														
 
															-    # exist_wind_names = multiprocessing.Manager().list()
														
 
															-    #
														
 
															-    # map_lock = dict()
														
 
															-    # for i in range(26, 42):
														
 
															-    #     map_lock[str(i)] = multiprocessing.Manager().Lock()
														
 
															-    #
														
 
															-    # all_files = read_excel_files(tmp_dir)
														
 
															-    # with multiprocessing.Pool(16) as pool:
														
 
															-    #     pool.starmap(get_resolve, [(i, exist_wind_names, map_lock) for i in all_files])
														
 
															-    #
														
 
															-    # print("整理完成,耗时:", get_haoshi(begin))
														
 
															-
														
 
															-    all_files = read_excel_files(save_dir)
														
 
															-    with multiprocessing.Pool(4) as pool:
														
 
															-        pool.map(sort_data, all_files)
														
 
															-    print("排序完成,耗时:", get_haoshi(begin))
														
 
															-
														
 
															-    # shutil.rmtree(tmp_dir)
														
 
															-    # print("移除临时文件完成,耗时:", get_haoshi(begin))
														
--- a/tmp_file/玉湖光伏-标准化.py
+++ b/tmp_file/玉湖光伏-标准化.py
@@ -1,158 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-Created on Tue Jul  9 16:28:48 2024
														
 
															-
														
 
															-@author: Administrator
														
 
															-"""
														
 
															-import multiprocessing
														
 
															-from datetime import datetime
														
 
															-from os import *
														
 
															-
														
 
															-import chardet
														
 
															-import pandas as pd
														
 
															-
														
 
															-pd.options.mode.copy_on_write = True
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
														
 
															-        encoding = 'gb18030'
														
 
															-    return encoding
														
 
															-
														
 
															-
														
 
															-# 读取数据到df
														
 
															-def read_file_to_df(file_path, read_cols=list(), header=0):
														
 
															-    df = pd.DataFrame()
														
 
															-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
														
 
															-        encoding = detect_file_encoding(file_path)
														
 
															-        end_with_gz = str(file_path).lower().endswith("gz")
														
 
															-        if read_cols:
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
														
 
															-        else:
														
 
															-
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
														
 
															-
														
 
															-    else:
														
 
															-        xls = pd.ExcelFile(file_path)
														
 
															-        # 获取所有的sheet名称
														
 
															-        sheet_names = xls.sheet_names
														
 
															-        for sheet in sheet_names:
														
 
															-            if read_cols:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
														
 
															-            else:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-    # 读取所有文件
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def generate_df(pv_df, col):
														
 
															-    if col != '时间':
														
 
															-        xiangbian = col.split("逆变器")[0].replace("#", "")
														
 
															-        nibianqi = col.split("-")[0].split('逆变器')[1]
														
 
															-        pv_index = col.split("-")[1].replace("PV", "")
														
 
															-        now_df = pv_df[['时间', col + '输入电流()', col + '输入电压()']]
														
 
															-        now_df.loc[:, '箱变'] = xiangbian
														
 
															-        now_df.loc[:, '逆变器'] = nibianqi
														
 
															-        now_df.loc[:, 'PV'] = pv_index
														
 
															-        now_df.columns = [df_col.replace(col, "").replace("()", "") for df_col in now_df.columns]
														
 
															-        now_df['输入电流'] = now_df['输入电流'].astype(float)
														
 
															-        now_df['输入电压'] = now_df['输入电压'].astype(float)
														
 
															-
														
 
															-        print(xiangbian, nibianqi, pv_index, now_df.shape)
														
 
															-        return now_df
														
 
															-    return pd.DataFrame()
														
 
															-
														
 
															-
														
 
															-def read_and_save_csv(file_path, save_path):
														
 
															-    begin = datetime.now()
														
 
															-    base_name = path.basename(file_path)
														
 
															-    print('开始', base_name)
														
 
															-
														
 
															-    df = read_file_to_df(file_path)
														
 
															-    df['时间'] = pd.to_datetime(df['时间'])
														
 
															-    # df.set_index(keys='时间', inplace=True)
														
 
															-
														
 
															-    pv_df_cols = [col for col in df.columns if col.find('输入电') > -1]
														
 
															-    pv_df_cols.append('时间')
														
 
															-    pv_df = df[pv_df_cols]
														
 
															-    shuru_cols = set([col.split("输入电")[0] for col in pv_df.columns])
														
 
															-
														
 
															-    with multiprocessing.Pool(6) as pool:
														
 
															-        dfs = pool.starmap(generate_df, [(pv_df, col) for col in shuru_cols])
														
 
															-
														
 
															-    saved_pv_df = pd.concat(dfs)
														
 
															-    saved_pv_df.sort_values(by=['箱变', '逆变器', 'PV', '时间'], inplace=True)
														
 
															-    save_file = path.join(save_path, path.basename(file_path).split(".")[0], 'PV.csv')
														
 
															-    create_file_path(save_file, True)
														
 
															-
														
 
															-    saved_pv_df.to_csv(save_file, encoding='utf-8', index=False)
														
 
															-
														
 
															-    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\yuanshi'
														
 
															-    save_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\zhengli'
														
 
															-    all_files = read_excel_files(path)
														
 
															-
														
 
															-    all_datas = list(all_files)
														
 
															-    all_datas.sort()
														
 
															-    print(all_datas)
														
 
															-
														
 
															-    for file in all_datas:
														
 
															-        read_and_save_csv(file, save_path)
														
 
															-
														
 
															-    # with Pool(1) as pool:
														
 
															-    #     pool.starmap(read_and_save_csv, [(i, save_path) for i in all_datas])
														
--- a/tmp_file/玉湖光伏-标准化_1.py
+++ b/tmp_file/玉湖光伏-标准化_1.py
@@ -1,209 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-Created on Tue Jul  9 16:28:48 2024
														
 
															-
														
 
															-@author: Administrator
														
 
															-"""
														
 
															-import multiprocessing
														
 
															-from datetime import datetime
														
 
															-from os import *
														
 
															-
														
 
															-import chardet
														
 
															-import pandas as pd
														
 
															-
														
 
															-pd.options.mode.copy_on_write = True
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
														
 
															-        encoding = 'gb18030'
														
 
															-    return encoding
														
 
															-
														
 
															-
														
 
															-# 读取数据到df
														
 
															-def read_file_to_df(file_path, read_cols=list(), header=0):
														
 
															-    df = pd.DataFrame()
														
 
															-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
														
 
															-        encoding = detect_file_encoding(file_path)
														
 
															-        end_with_gz = str(file_path).lower().endswith("gz")
														
 
															-        if read_cols:
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
														
 
															-        else:
														
 
															-
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
														
 
															-
														
 
															-    else:
														
 
															-        xls = pd.ExcelFile(file_path)
														
 
															-        # 获取所有的sheet名称
														
 
															-        sheet_names = xls.sheet_names
														
 
															-        for sheet in sheet_names:
														
 
															-            if read_cols:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
														
 
															-            else:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-    # 读取所有文件
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def generate_df(pv_df, col):
														
 
															-    if col != '时间':
														
 
															-        xiangbian = col.split("逆变器")[0].replace("#", "")
														
 
															-        nibianqi = col.split("-")[0].split('逆变器')[1]
														
 
															-        pv_index = col.split("-")[1].replace("PV", "")
														
 
															-        now_df = pv_df[['时间', col + '输入电流()', col + '输入电压()']]
														
 
															-        now_df.loc[:, '箱变'] = xiangbian
														
 
															-        now_df.loc[:, '逆变器'] = nibianqi
														
 
															-        now_df.loc[:, 'PV'] = pv_index
														
 
															-        now_df.columns = [df_col.replace(col, "").replace("()", "") for df_col in now_df.columns]
														
 
															-        now_df['输入电流'] = now_df['输入电流'].astype(float)
														
 
															-        now_df['输入电压'] = now_df['输入电压'].astype(float)
														
 
															-
														
 
															-        print(xiangbian, nibianqi, pv_index, now_df.shape)
														
 
															-        return now_df
														
 
															-    return pd.DataFrame()
														
 
															-
														
 
															-
														
 
															-def split_index(split_data: str, split_str: str):
														
 
															-    count = split_data.find(split_str)
														
 
															-    if count > -1:
														
 
															-        return split_data[count + len(split_str):]
														
 
															-    else:
														
 
															-        return split_str
														
 
															-
														
 
															-
														
 
															-def replece_col_to_biaozhun(col):
														
 
															-    for k, v in dianjian_dict.items():
														
 
															-        if col.find(k) > -1:
														
 
															-            col = col.replace(k, v)
														
 
															-            return col
														
 
															-
														
 
															-    return col
														
 
															-
														
 
															-
														
 
															-def read_and_save_csv(file_path, save_path):
														
 
															-    begin = datetime.now()
														
 
															-    base_name = path.basename(file_path)
														
 
															-    print('开始', base_name)
														
 
															-
														
 
															-    df = read_file_to_df(file_path)
														
 
															-
														
 
															-    for col in df.columns:
														
 
															-        for del_col in del_cols:
														
 
															-            if col.find(del_col) > -1:
														
 
															-                del df[col]
														
 
															-
														
 
															-    df['时间'] = pd.to_datetime(df['时间'])
														
 
															-    xiangbian = [col for col in df.columns if str(col).startswith('#') and str(col).find('逆变器') > -1][0].split("逆变器")[
														
 
															-        0].replace("#", "")
														
 
															-    df.columns = [xiangbian + "_" + split_index(df_col, "逆变器").replace('PV', "").replace("()", "").replace("-",
														
 
															-                                                                                                           "_") if df_col.startswith(
														
 
															-        "#") else df_col for df_col in
														
 
															-                  df.columns]
														
 
															-
														
 
															-    df.columns = [col.replace("输入", "_输入") for col in df.columns]
														
 
															-
														
 
															-    df.columns = [replece_col_to_biaozhun(col) for col in df.columns]
														
 
															-
														
 
															-    # saved_pv_df = pd.concat(dfs)
														
 
															-    df.sort_values(by=['时间'], inplace=True)
														
 
															-    save_file = path.join(save_path, path.basename(file_path))
														
 
															-    create_file_path(save_file, True)
														
 
															-
														
 
															-    df.to_csv(save_file, encoding='utf-8', index=False)
														
 
															-
														
 
															-    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
														
 
															-
														
 
															-
														
 
															-dianjian_data_str = """
														
 
															-输入电压	支路输出电压
														
 
															-输入电流	支路输出电流
														
 
															-功率因数	
														
 
															-总发电量	逆变器总发电量
														
 
															-无功功率	
														
 
															-有功功率	逆变器输出有功功率
														
 
															-机内温度	逆变器温度
														
 
															-电网AB线电压	交流输出电压
														
 
															-电网A相电流	逆变器输出电流A相
														
 
															-电网BC线电压	
														
 
															-电网B相电流	逆变器输出电流B相
														
 
															-电网CA线电压	
														
 
															-电网C相电流	逆变器输出电流C相
														
 
															-逆变器效率	逆变器转换效率
														
 
															-"""
														
 
															-
														
 
															-dianjian_dict = {}
														
 
															-del_cols = []
														
 
															-for data in dianjian_data_str.split("\n"):
														
 
															-    if data:
														
 
															-        datas = data.split("\t")
														
 
															-        if len(datas) == 2 and datas[1]:
														
 
															-            dianjian_dict[datas[0]] = datas[1]
														
 
															-        else:
														
 
															-            del_cols.append(datas[0])
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/逆变器数据'
														
 
															-    save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/逆变器数据'
														
 
															-    all_files = read_excel_files(path)
														
 
															-
														
 
															-    all_datas = list(all_files)
														
 
															-    all_datas.sort()
														
 
															-    print(all_datas)
														
 
															-    #
														
 
															-    # for file in all_datas:
														
 
															-    #     read_and_save_csv(file, save_path)
														
 
															-
														
 
															-    with multiprocessing.Pool(20) as pool:
														
 
															-        pool.starmap(read_and_save_csv, [(i, save_path) for i in all_datas])
														
--- a/tmp_file/玉湖光伏-标准化_2.py
+++ b/tmp_file/玉湖光伏-标准化_2.py
@@ -1,283 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-Created on Tue Jul  9 16:28:48 2024
														
 
															-
														
 
															-@author: Administrator
														
 
															-"""
														
 
															-import multiprocessing
														
 
															-from datetime import datetime
														
 
															-from os import *
														
 
															-
														
 
															-import chardet
														
 
															-import numpy as np
														
 
															-import pandas as pd
														
 
															-
														
 
															-pd.options.mode.copy_on_write = True
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
														
 
															-        encoding = 'gb18030'
														
 
															-    return encoding
														
 
															-
														
 
															-
														
 
															-# 读取数据到df
														
 
															-def read_file_to_df(file_path, read_cols=list(), header=0):
														
 
															-    df = pd.DataFrame()
														
 
															-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
														
 
															-        encoding = detect_file_encoding(file_path)
														
 
															-        end_with_gz = str(file_path).lower().endswith("gz")
														
 
															-        if read_cols:
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
														
 
															-        else:
														
 
															-
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
														
 
															-
														
 
															-    else:
														
 
															-        xls = pd.ExcelFile(file_path)
														
 
															-        # 获取所有的sheet名称
														
 
															-        sheet_names = xls.sheet_names
														
 
															-        for sheet in sheet_names:
														
 
															-            if read_cols:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
														
 
															-            else:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-    # 读取所有文件
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-def split_index(split_data: str, split_str: str):
														
 
															-    count = split_data.find(split_str)
														
 
															-    if count > -1:
														
 
															-        return split_data[count + len(split_str):]
														
 
															-    else:
														
 
															-        return split_str
														
 
															-
														
 
															-
														
 
															-def replece_col_to_biaozhun(col):
														
 
															-    for k, v in dianjian_dict.items():
														
 
															-        if col.find(k) > -1:
														
 
															-            col = col.replace(k, v)
														
 
															-            return col
														
 
															-
														
 
															-    return col
														
 
															-
														
 
															-
														
 
															-def row_to_datas(row, pv_dict, inverter_cols, df_cols):
														
 
															-    row_datas = list(list())
														
 
															-    for xiangbian in pv_dict.keys():
														
 
															-        for nibianqi in pv_dict[xiangbian].keys():
														
 
															-            for pv in pv_dict[xiangbian][nibianqi]:
														
 
															-                datas = [np.nan] * 14
														
 
															-                datas[0] = row['时间']
														
 
															-                datas[1] = xiangbian
														
 
															-                datas[2] = nibianqi
														
 
															-                datas[3] = pv
														
 
															-                datas_4_col = "_".join([str(xiangbian), str(nibianqi), str(pv), '支路输出电压'])
														
 
															-                if datas_4_col in df_cols:
														
 
															-                    datas[4] = row[datas_4_col]
														
 
															-                else:
														
 
															-                    datas[4] = np.nan
														
 
															-
														
 
															-                datas_5_col = "_".join([str(xiangbian), str(nibianqi), str(pv), '支路输出电流'])
														
 
															-                if datas_5_col in df_cols:
														
 
															-                    datas[5] = row[datas_5_col]
														
 
															-                else:
														
 
															-                    datas[5] = np.nan
														
 
															-
														
 
															-                row_datas.append(datas)
														
 
															-
														
 
															-    for xiangbian in pv_dict.keys():
														
 
															-        for nibianqi in pv_dict[xiangbian].keys():
														
 
															-            datas = [np.nan] * 14
														
 
															-            datas[0] = row['时间']
														
 
															-            datas[1] = xiangbian
														
 
															-            datas[2] = nibianqi
														
 
															-            datas[3] = 0
														
 
															-            for index, col_name in enumerate(inverter_cols):
														
 
															-                col = '_'.join([str(xiangbian), str(nibianqi), col_name])
														
 
															-                if col in df_cols:
														
 
															-                    datas[index + 6] = row[col]
														
 
															-                else:
														
 
															-                    datas[index + 6] = np.nan
														
 
															-
														
 
															-            row_datas.append(datas)
														
 
															-
														
 
															-    return row_datas
														
 
															-
														
 
															-
														
 
															-def df_to_biaozhun(df):
														
 
															-    pv_cols = ['支路输出电压', '支路输出电流']
														
 
															-    inverter_cols = ['逆变器总发电量', '逆变器输出有功功率', '逆变器温度', '交流输出电压', '逆变器输出电流A相', '逆变器输出电流B相', '逆变器输出电流C相', '逆变器转换效率']
														
 
															-    # 从列名获取箱变->逆变器->PV等的字典
														
 
															-    pv_dict = dict(dict())
														
 
															-    for col in df.columns:
														
 
															-        for pv_col in pv_cols:
														
 
															-            if str(col).endswith(pv_col):
														
 
															-                datas = col.split("_")
														
 
															-                xiangbian = datas[0]
														
 
															-                nibiangqi = datas[1]
														
 
															-                pv = datas[2]
														
 
															-
														
 
															-                if xiangbian in pv_dict.keys():
														
 
															-                    if nibiangqi in pv_dict[xiangbian]:
														
 
															-                        pv_dict[xiangbian][nibiangqi].add(pv)
														
 
															-                    else:
														
 
															-                        pv_dict[xiangbian][nibiangqi] = set([pv])
														
 
															-                else:
														
 
															-                    pv_dict[xiangbian] = {nibiangqi: set([pv])}
														
 
															-
														
 
															-    results = df.apply(row_to_datas, args=(pv_dict, inverter_cols, df.columns), axis=1)
														
 
															-
														
 
															-    df_datas = results.to_list()
														
 
															-    df_datas = [da for data in df_datas for da in data]
														
 
															-    df_cols = ["时间", "箱变", "逆变器", "支路"]
														
 
															-    df_cols.extend(pv_cols)
														
 
															-    df_cols.extend(inverter_cols)
														
 
															-    df = pd.DataFrame(df_datas, columns=df_cols)
														
 
															-
														
 
															-    type_conver_list = []
														
 
															-    type_conver_list.extend(pv_cols)
														
 
															-    type_conver_list.extend(inverter_cols)
														
 
															-    for type_conver in type_conver_list:
														
 
															-        df[type_conver] = pd.to_numeric(df[type_conver], errors='coerce')
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def read_and_save_csv(file_path, save_path):
														
 
															-    begin = datetime.now()
														
 
															-    base_name = path.basename(file_path)
														
 
															-    print('开始', base_name)
														
 
															-
														
 
															-    df = read_file_to_df(file_path)
														
 
															-
														
 
															-    for col in df.columns:
														
 
															-        for del_col in del_cols:
														
 
															-            if col.find(del_col) > -1:
														
 
															-                del df[col]
														
 
															-
														
 
															-    df['时间'] = pd.to_datetime(df['时间'])
														
 
															-    xiangbian = [col for col in df.columns if str(col).startswith('#') and str(col).find('逆变器') > -1][0].split("逆变器")[
														
 
															-        0].replace("#", "")
														
 
															-    df.columns = [xiangbian + "_" + split_index(df_col, "逆变器").replace('PV', "").replace("()", "").replace("-",
														
 
															-                                                                                                           "_") if df_col.startswith(
														
 
															-        "#") else df_col for df_col in
														
 
															-                  df.columns]
														
 
															-
														
 
															-    df.columns = [col.replace("输入", "_输入") for col in df.columns]
														
 
															-    df.columns = [replece_col_to_biaozhun(col) for col in df.columns]
														
 
															-
														
 
															-    df = df_to_biaozhun(df)
														
 
															-
														
 
															-    # df.sort_values(by=['时间', "箱变", "逆变器", "支路"], inplace=True)
														
 
															-    # save_file = path.join(save_path, path.basename(file_path))
														
 
															-    # create_file_path(save_file, True)
														
 
															-
														
 
															-    # df.to_csv(save_file, encoding='utf-8', index=False)
														
 
															-
														
 
															-    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
														
 
															-    return df
														
 
															-
														
 
															-dianjian_data_str = """
														
 
															-输入电压	支路输出电压
														
 
															-输入电流	支路输出电流
														
 
															-功率因数	
														
 
															-总发电量	逆变器总发电量
														
 
															-无功功率	
														
 
															-有功功率	逆变器输出有功功率
														
 
															-机内温度	逆变器温度
														
 
															-电网AB线电压	交流输出电压
														
 
															-电网A相电流	逆变器输出电流A相
														
 
															-电网BC线电压	
														
 
															-电网B相电流	逆变器输出电流B相
														
 
															-电网CA线电压	
														
 
															-电网C相电流	逆变器输出电流C相
														
 
															-逆变器效率	逆变器转换效率
														
 
															-"""
														
 
															-
														
 
															-dianjian_dict = {}
														
 
															-del_cols = []
														
 
															-for data in dianjian_data_str.split("\n"):
														
 
															-    if data:
														
 
															-        datas = data.split("\t")
														
 
															-        if len(datas) == 2 and datas[1]:
														
 
															-            dianjian_dict[datas[0]] = datas[1]
														
 
															-        else:
														
 
															-            del_cols.append(datas[0])
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/逆变器数据'
														
 
															-    save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/逆变器数据'
														
 
															-    # path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\yuanshi'
														
 
															-    # save_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\zhengli'
														
 
															-    all_files = read_excel_files(path)
														
 
															-
														
 
															-    all_datas = list(all_files)
														
 
															-    all_datas.sort()
														
 
															-    print(all_datas)
														
 
															-
														
 
															-    # for file in all_datas:
														
 
															-    #     read_and_save_csv(file, save_path)
														
 
															-
														
 
															-    with multiprocessing.Pool(40) as pool:
														
 
															-        dfs = pool.starmap(read_and_save_csv, [(i, save_path) for i in all_datas])
														
 
															-
														
 
															-    saved_pv_df = pd.concat(dfs)
														
 
															-    saved_pv_df.sort_values(by=['时间', "箱变", "逆变器", "支路"], inplace=True)
														
 
															-    save_file = path.join(save_path, "合并.csv")
														
 
															-    create_file_path(save_file, True)
														
 
															-    saved_pv_df.sort_values(by=['时间', "箱变", "逆变器", "支路"], inplace=True)
														
 
															-    saved_pv_df.to_csv(save_file, encoding='utf-8', index=False)
														
--- a/tmp_file/玉湖光伏-气象标准化.py
+++ b/tmp_file/玉湖光伏-气象标准化.py
@@ -1,122 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-Created on Tue Jul  9 16:28:48 2024
														
 
															-
														
 
															-@author: Administrator
														
 
															-"""
														
 
															-from os import *
														
 
															-
														
 
															-import chardet
														
 
															-import pandas as pd
														
 
															-
														
 
															-pd.options.mode.copy_on_write = True
														
 
															-
														
 
															-
														
 
															-# 获取文件编码
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
														
 
															-        encoding = 'gb18030'
														
 
															-    return encoding
														
 
															-
														
 
															-
														
 
															-# 读取数据到df
														
 
															-def read_file_to_df(file_path, read_cols=list(), header=0):
														
 
															-    df = pd.DataFrame()
														
 
															-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
														
 
															-        encoding = detect_file_encoding(file_path)
														
 
															-        end_with_gz = str(file_path).lower().endswith("gz")
														
 
															-        if read_cols:
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
														
 
															-        else:
														
 
															-
														
 
															-            if end_with_gz:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
														
 
															-            else:
														
 
															-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
														
 
															-
														
 
															-    else:
														
 
															-        xls = pd.ExcelFile(file_path)
														
 
															-        # 获取所有的sheet名称
														
 
															-        sheet_names = xls.sheet_names
														
 
															-        for sheet in sheet_names:
														
 
															-            if read_cols:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
														
 
															-            else:
														
 
															-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
														
 
															-
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in listdir(path):
														
 
															-        item_path = path.join(path, item)
														
 
															-        if path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-    # 读取所有文件
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path):
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-# 创建路径
														
 
															-def create_file_path(path, is_file_path=False):
														
 
															-    if is_file_path:
														
 
															-        path = path.dirname(path)
														
 
															-
														
 
															-    if not path.exists(path):
														
 
															-        makedirs(path, exist_ok=True)
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    # path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/气象站数据'
														
 
															-    # save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/气象站数据'
														
 
															-    path = r'Z:\大唐玉湖性能分析离线分析\05整理数据\气象站数据'
														
 
															-    save_path = r'Z:\大唐玉湖性能分析离线分析\06整理数据\气象站数据'
														
 
															-
														
 
															-    fengsu_df = read_file_to_df(path.join(path, '风速.csv'), read_cols=['当前时间', '实际风速'])
														
 
															-    fengxiang_df = read_file_to_df(path.join(path, '风向.csv'), read_cols=['当前时间', '实际风向'])
														
 
															-    fuzhaodu_df = read_file_to_df(path.join(path, '辐照度.csv'), read_cols=['时间', '水平总辐照度', '倾斜总辐照度', '散射辐照度'])
														
 
															-    shidu_df = read_file_to_df(path.join(path, '湿度.csv'), read_cols=['时间', '实际湿度'])
														
 
															-    wendu_df = read_file_to_df(path.join(path, '温度.csv'), read_cols=['时间', '实际温度'])
														
 
															-    yali_df = read_file_to_df(path.join(path, '压力.csv'), read_cols=['时间', '实际气压'])
														
 
															-
														
 
															-    fengsu_df.rename(columns={'当前时间': '时间'}, inplace=True)
														
 
															-    fengxiang_df.rename(columns={'当前时间': '时间'}, inplace=True)
														
 
															-
														
 
															-    dfs = [fengxiang_df, fengsu_df, fuzhaodu_df, shidu_df, wendu_df, yali_df]
														
 
															-
														
 
															-    for df in dfs:
														
 
															-        df['时间'] = pd.to_datetime(df['时间'])
														
 
															-        df.set_index(keys='时间', inplace=True)
														
 
															-
														
 
															-    df = pd.concat(dfs, axis=1)
														
 
															-    create_file_path(save_path, is_file_path=False)
														
 
															-    df.to_csv(path.join(save_path, '气象合并.csv'), encoding='utf-8')
														
--- a/tmp_file/王博提取数据完整风机数据.py
+++ b/tmp_file/王博提取数据完整风机数据.py
@@ -1,90 +0,0 @@
 
															-import datetime
														
 
															-import multiprocessing
														
 
															-import os
														
 
															-
														
 
															-import chardet
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-def detect_file_encoding(filename):
														
 
															-    # 读取文件的前1000个字节（足够用于大多数编码检测）
														
 
															-    with open(filename, 'rb') as f:
														
 
															-        rawdata = f.read(1000)
														
 
															-    result = chardet.detect(rawdata)
														
 
															-    encoding = result['encoding']
														
 
															-
														
 
															-    print("文件类型:", filename, encoding)
														
 
															-
														
 
															-    if encoding is None:
														
 
															-        encoding = 'gb18030'
														
 
															-
														
 
															-    if encoding.lower() in ['utf-8', 'ascii', 'utf8', 'utf-8-sig']:
														
 
															-        return 'utf-8'
														
 
															-
														
 
															-    return 'gb18030'
														
 
															-
														
 
															-
														
 
															-def __build_directory_dict(directory_dict, path, filter_types=None):
														
 
															-    # 遍历目录下的所有项
														
 
															-    for item in os.listdir(path):
														
 
															-        item_path = os.path.join(path, item)
														
 
															-        if os.path.isdir(item_path):
														
 
															-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
														
 
															-        elif os.path.isfile(item_path):
														
 
															-            if path not in directory_dict:
														
 
															-                directory_dict[path] = []
														
 
															-
														
 
															-            if filter_types is None or len(filter_types) == 0:
														
 
															-                directory_dict[path].append(item_path)
														
 
															-            elif str(item_path).split(".")[-1] in filter_types:
														
 
															-                if str(item_path).count("~$") == 0:
														
 
															-                    directory_dict[path].append(item_path)
														
 
															-
														
 
															-
														
 
															-# 读取路径下所有的excel文件
														
 
															-def read_excel_files(read_path, filter_types=None):
														
 
															-    if filter_types is None:
														
 
															-        filter_types = ['xls', 'xlsx', 'csv', 'gz']
														
 
															-    if os.path.isfile(read_path):
														
 
															-        return [read_path]
														
 
															-
														
 
															-    directory_dict = {}
														
 
															-    __build_directory_dict(directory_dict, read_path, filter_types=filter_types)
														
 
															-
														
 
															-    return [path for paths in directory_dict.values() for path in paths if path]
														
 
															-
														
 
															-
														
 
															-def read_file_to_df(file_path):
														
 
															-    df = pd.read_csv(file_path, encoding=detect_file_encoding(file_path))
														
 
															-    date = os.path.basename(file_path)[14:24]
														
 
															-    df['Time'] = df['Time'].apply(lambda x: date + ' ' + x)
														
 
															-    return df
														
 
															-
														
 
															-
														
 
															-def read_files_and_save_csv(file_dir, month, save_dir):
														
 
															-    begin = datetime.datetime.now()
														
 
															-    base_dir = os.path.basename(file_dir)
														
 
															-    print(f"{datetime.datetime.now()}: 开始执行{base_dir}-{month}")
														
 
															-    all_files = read_excel_files(os.path.join(file_dir, month))
														
 
															-    df = pd.concat([read_file_to_df(file) for file in all_files], ignore_index=True)
														
 
															-    save_path = os.path.join(save_dir, base_dir, f'{month}.csv')
														
 
															-    os.makedirs(os.path.dirname(save_path), exist_ok=True)
														
 
															-    df.sort_values(by=['Time'], inplace=True)
														
 
															-    df.to_csv(save_path, encoding='utf8', index=False)
														
 
															-    print(f"{datetime.datetime.now()}: 执行{base_dir}-{month}结束,耗时{datetime.datetime.now() - begin}")
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    begin = datetime.datetime.now()
														
 
															-    read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/tmp/second/excel_tmp/'
														
 
															-    save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/20241217完整字段'
														
 
															-    read_dirs = list()
														
 
															-    for i in range(26, 42):
														
 
															-        read_dirs.append(os.path.join(read_dir, str(i)))
														
 
															-
														
 
															-    for read_dir in read_dirs:
														
 
															-        begin = datetime.datetime.now()
														
 
															-        with multiprocessing.Pool(6) as pool:
														
 
															-            pool.starmap(read_files_and_save_csv, [(read_dir, i, save_dir) for i in os.listdir(read_dir)])
														
 
															-
														
 
															-    print(f"{datetime.datetime.now()}: 执行结束,总耗时{datetime.datetime.now() - begin}")
														
--- a/tmp_file/白玉山每月限电损失.py
+++ b/tmp_file/白玉山每月限电损失.py
@@ -1,35 +0,0 @@
 
															-from os import *
														
 
															-
														
 
															-import pandas as pd
														
 
															-
														
 
															-read_path = r'D:\data\白玉山后评估数据资料\需要整理的数据\每月发电量和限电量、限电率'
														
 
															-
														
 
															-all_paths = list()
														
 
															-for root, dirs, files in walk(read_path):
														
 
															-    if files:
														
 
															-        for file in files:
														
 
															-            year_mont = int(file.split("(")[1].split("_")[0])
														
 
															-            if year_mont >= 20230901 and year_mont < 20240901:
														
 
															-                all_paths.append(path.join(root, file))
														
 
															-
														
 
															-df = pd.DataFrame()
														
 
															-
														
 
															-for path in all_paths:
														
 
															-    now_df = pd.read_excel(path, usecols=['设备名称', '统计时间', '限电损失电量(kWh)'], header=2)
														
 
															-    now_df = now_df[now_df['设备名称'].str.startswith("#")]
														
 
															-    df = pd.concat([df, now_df])
														
 
															-
														
 
															-## 人工验证 看一看
														
 
															-print(df[df['设备名称'] == '#34'])
														
 
															-
														
 
															-df = df[['设备名称', '限电损失电量(kWh)']]
														
 
															-group_df = df.groupby('设备名称').sum()
														
 
															-
														
 
															-result_df = pd.DataFrame(group_df)
														
 
															-result_df.reset_index(inplace=True)
														
 
															-result_df.columns = ['设备名称', '总限电损失电量(kWh)']
														
 
															-result_df.sort_values(by=['设备名称'], inplace=True)
														
 
															-
														
 
															-print(result_df)
														
 
															-
														
 
															-result_df.to_csv("设备总限电损失.csv", encoding='utf-8', index=False)
														
--- a/tmp_file/筛选字段.py
+++ b/tmp_file/筛选字段.py
@@ -1,29 +0,0 @@
 
															-import multiprocessing
														
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
														
 
															-
														
 
															-from utils.file.trans_methods import read_file_to_df, create_file_path
														
 
															-
														
 
															-
														
 
															-def read_and_save(file_path, select_cols, save_path):
														
 
															-    base_name = os.path.basename(file_path).split('.')[0]
														
 
															-    df = read_file_to_df(file_path, read_cols=select_cols)
														
 
															-
														
 
															-    save_path = os.path.join(save_path, base_name + '.csv')
														
 
															-    create_file_path(save_path, True)
														
 
															-    df.to_csv(save_path, index=False, encoding='utf-8')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    select_cols_str = 'Time,瞬时风速,风机号,瞬时功率,扭矩给定,扭矩反馈,高风切出,风机允许功率管理,功率管理使能反馈,不可利用,功率曲线可用,主控初始化完成,待机,启动,偏航,并网,限功率,正常发电,故障,紧急停机,快速停机,正常停机,告警,停机完成,允许功率管理,处于功率管理,检修,维护'
														
 
															-
														
 
															-    select_cols = [i for i in select_cols_str.split(',') if i]
														
 
															-
														
 
															-    read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/20241213(26,38)完整字段/26'
														
 
															-
														
 
															-    save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/20241213(26,38)完整字段/20241216113130'
														
 
															-
														
 
															-    with multiprocessing.Pool(6) as pool:
														
 
															-        pool.starmap(read_and_save, [(os.path.join(read_dir, i), select_cols, save_dir) for i in os.listdir(read_dir)])
														
--- a/utils/common.py
+++ b/utils/common.py
@@ -0,0 +1,3 @@
 
															+excel_types = ['xls', 'xlsx', 'xlsm', 'xlsb', 'odf', 'ods', 'csv', 'csv.gz']
														
 
															+
														
 
															+zip_types = ['rar', 'zip']
														
--- a/utils/db/ConnectMysql.py
+++ b/utils/db/ConnectMysql.py
@@ -13,10 +13,10 @@ from utils.log.trans_log import trans_print
 
															 class ConnectMysql:
														
 
															     def __init__(self, connet_name):
														
 
															-        config_path = path.abspath(__file__).split("utils")[0] + 'conf' + sep + 'etl_config_prod.yaml'
														
 
															-        self.yaml_data = yaml_conf(environ.get('ETL_CONF', config_path))
														
 
															+        self.yaml_data = yaml_conf(environ.get('ETL_CONF'))
														
 
															         self.connet_name = connet_name
														
 
															         self.config = self.yaml_data[self.connet_name]
														
 
															+        self.database = self.config['database']
														
 
															     # 从连接池中获取一个连接
														
 
															     def get_conn(self):
														
--- a/utils/file/trans_methods.py
+++ b/utils/file/trans_methods.py
@@ -10,6 +10,7 @@ import warnings
 
															 import chardet
														
 
															 import pandas as pd
														
 
															+from utils.common import excel_types, zip_types
														
 
															 from utils.log.trans_log import trans_print
														
 
															 warnings.filterwarnings("ignore")
														
@@ -162,6 +163,9 @@ def __build_directory_dict(directory_dict, path, filter_types=None):
 
															 # 读取路径下所有的excel文件
														
 
															 def read_excel_files(read_path, filter_types=None):
														
 
															+    if not os.path.exists(read_path):
														
 
															+        return []
														
 
															+
														
 
															     if filter_types is None:
														
 
															         filter_types = ['xls', 'xlsx', 'csv', 'gz']
														
 
															     if os.path.isfile(read_path):
														
@@ -176,7 +180,8 @@ def read_excel_files(read_path, filter_types=None):
 
															 # 读取路径下所有的文件
														
 
															 def read_files(read_path, filter_types=None):
														
 
															     if filter_types is None:
														
 
															-        filter_types = ['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar']
														
 
															+        filter_types = [i for i in excel_types]
														
 
															+        filter_types.extend(zip_types)
														
 
															     if os.path.isfile(read_path):
														
 
															         return [read_path]
														
 
															     directory_dict = {}