Forráskód Böngészése

去掉批次后第一次提交

wzl 11 hónapja
szülő
commit
5c1321dbb7
72 módosított fájl, 1053 hozzáadás és 5899 törlés
  1. 26 21
      app_run.py
  2. 4 2
      conf/etl_config_dev.yaml
  3. 5 3
      conf/etl_config_prod.yaml
  4. 25 0
      etl/common/ArchiveFile.py
  5. 74 40
      etl/common/BaseDataTrans.py
  6. 7 5
      etl/common/ClearData.py
  7. 61 0
      etl/common/CombineAndSaveFormalFile.py
  8. 30 37
      etl/common/PathsAndTable.py
  9. 24 15
      etl/common/SaveToDb.py
  10. 15 17
      etl/common/UnzipAndRemove.py
  11. 70 23
      etl/wind_power/fault_warn/FaultWarnTrans.py
  12. 17 7
      etl/wind_power/laser/LaserTrans.py
  13. 26 43
      etl/wind_power/min_sec/MinSecTrans.py
  14. 5 5
      etl/wind_power/min_sec/ReadAndSaveTmp.py
  15. 35 27
      etl/wind_power/min_sec/StatisticsAndSaveTmpFormalFile.py
  16. 79 48
      etl/wind_power/wave/WaveTrans.py
  17. 0 3
      package.sh
  18. 249 10
      requirements.txt
  19. 0 145
      service/plt_service.py
  20. 145 0
      service/trans_conf_service.py
  21. 137 128
      service/trans_service.py
  22. 0 111
      service/wave_service.py
  23. 8 12
      test_run_local.py
  24. 0 91
      test_run_local_piliang.py
  25. 0 755
      tmp_file/ClassIdentifier_1.py_bak
  26. 0 196
      tmp_file/baiyushan_20240906.py
  27. 0 48
      tmp_file/changing_hebing_guzhang.py
  28. 0 94
      tmp_file/cp_online_data_to_other.py
  29. 0 47
      tmp_file/curge_read.py
  30. 0 40
      tmp_file/error_ms_data.py
  31. 0 57
      tmp_file/fengxiang_fengdianchang.py
  32. 0 48
      tmp_file/filter_lose_data.py
  33. 0 205
      tmp_file/gradio_web.py
  34. 0 28
      tmp_file/hebing_matlib_result.py
  35. 0 77
      tmp_file/hebing_muti_batch.py
  36. 0 173
      tmp_file/organize_xinhua_files.py
  37. 0 205
      tmp_file/organize_xinhua_files_data.py
  38. 0 97
      tmp_file/orgranize_hongyang.py
  39. 0 91
      tmp_file/power_derating.py
  40. 0 90
      tmp_file/power_derating_biaozhun.py
  41. 0 213
      tmp_file/power_derating_for_chunlin.py
  42. 0 262
      tmp_file/pv_youxiaoxing.py
  43. 0 134
      tmp_file/qinghai-nuomuhong-guifan.py
  44. 0 162
      tmp_file/qinghai-nuomuhong.py
  45. 0 38
      tmp_file/queshi_bili.py
  46. 0 42
      tmp_file/read_and_draw_png.py
  47. 0 27
      tmp_file/select_part_cols.py
  48. 0 19
      tmp_file/test_wave.py
  49. 0 55
      tmp_file/zibo_guzhang_select_time.py
  50. 0 98
      tmp_file/偏航误差验证.py
  51. 0 155
      tmp_file/光伏箱体.py
  52. 0 22
      tmp_file/列名包含数据处理.py
  53. 0 40
      tmp_file/吉山批次处理并重新存数据库.py
  54. 0 97
      tmp_file/对比文件夹列名差值.py
  55. 0 96
      tmp_file/年度汇总平均缺失率.py
  56. 0 31
      tmp_file/张崾先26故障.py
  57. 0 46
      tmp_file/张崾先筛选20241210.py
  58. 0 67
      tmp_file/张崾先统计-分钟.py
  59. 0 92
      tmp_file/张崾先统计-秒.py
  60. 0 90
      tmp_file/张崾先震动_参数获取.py
  61. 0 32
      tmp_file/张崾先风电场-故障整理.py
  62. 0 108
      tmp_file/张崾先风电场-非点检字段获取.py
  63. 0 158
      tmp_file/玉湖光伏-标准化.py
  64. 0 209
      tmp_file/玉湖光伏-标准化_1.py
  65. 0 283
      tmp_file/玉湖光伏-标准化_2.py
  66. 0 122
      tmp_file/玉湖光伏-气象标准化.py
  67. 0 90
      tmp_file/王博提取数据完整风机数据.py
  68. 0 35
      tmp_file/白玉山每月限电损失.py
  69. 0 29
      tmp_file/筛选字段.py
  70. 3 0
      utils/common.py
  71. 2 2
      utils/db/ConnectMysql.py
  72. 6 1
      utils/file/trans_methods.py

+ 26 - 21
app_run.py

@@ -7,37 +7,39 @@ from os import *
 from utils.conf.read_conf import yaml_conf, read_conf
 from utils.conf.read_conf import yaml_conf, read_conf
 
 
 
 
-def get_exec_data(batch_no=None, read_type=None, run_count=1):
-    if batch_no and read_type:
-        data = get_data_by_batch_no_and_type(batch_no, read_type)
-        if data is None:
-            raise ValueError(f"未找到批次号:{batch_no},类型:{read_type}")
-
+def get_exec_data(run_count=1):
+    now_run_count = get_now_running_count()
+    data = None
+    if now_run_count >= run_count:
+        trans_print(f"当前有{now_run_count}个任务在执行")
     else:
     else:
-        data = get_batch_exec_data(run_count)
-        if data is None:
-            trans_print("当前有任务在执行")
-            sys.exit(0)
-        elif len(data.keys()) == 0:
-            trans_print("当前无任务")
-            sys.exit(0)
-
+        data = get_batch_exec_data()
     return data
     return data
 
 
 
 
-def run(batch_no=None, read_type=None, save_db=True, run_count=1):
+def run(save_db=True, run_count=1, yaml_config=None, step=0, end=6):
     update_timeout_trans_data()
     update_timeout_trans_data()
-    data = get_exec_data(batch_no, read_type, run_count)
+    data = get_exec_data(run_count)
+
+    if data is None:
+        trans_print("没有需要执行的任务")
+        return
 
 
     exec_process = None
     exec_process = None
     if data['transfer_type'] in ['second', 'minute']:
     if data['transfer_type'] in ['second', 'minute']:
-        exec_process = MinSecTrans(data=data, save_db=save_db)
+        exec_process = MinSecTrans(data=data, save_db=save_db, yaml_config=yaml_config, step=step, end=end)
 
 
     if data['transfer_type'] in ['fault', 'warn']:
     if data['transfer_type'] in ['fault', 'warn']:
-        exec_process = FaultWarnTrans(data=data, save_db=save_db)
+        exec_process = FaultWarnTrans(data=data, save_db=save_db, yaml_config=yaml_config)
+
+    if data['transfer_type'] == 'wave':
+        exec_process = WaveTrans(data['id'], data['wind_farm_code'], data['read_dir'])
+
+    if data['transfer_type'] == 'laser':
+        exec_process = LaserTrans(data['id'], data['wind_farm_code'], data['read_dir'])
 
 
     if exec_process is None:
     if exec_process is None:
-        raise Exception("No exec process")
+        raise Exception("没有相应的执行器")
     exec_process.run()
     exec_process.run()
 
 
 
 
@@ -54,11 +56,14 @@ if __name__ == '__main__':
     run_count = int(read_conf(yaml_config, "run_batch_count", 1))
     run_count = int(read_conf(yaml_config, "run_batch_count", 1))
 
 
     from utils.log.trans_log import trans_print
     from utils.log.trans_log import trans_print
-    from service.plt_service import get_batch_exec_data, get_data_by_batch_no_and_type, update_timeout_trans_data
+    from service.trans_conf_service import update_timeout_trans_data, \
+        get_now_running_count, get_batch_exec_data
     from etl.wind_power.fault_warn.FaultWarnTrans import FaultWarnTrans
     from etl.wind_power.fault_warn.FaultWarnTrans import FaultWarnTrans
     from etl.wind_power.min_sec.MinSecTrans import MinSecTrans
     from etl.wind_power.min_sec.MinSecTrans import MinSecTrans
+    from etl.wind_power.laser.LaserTrans import LaserTrans
+    from etl.wind_power.wave.WaveTrans import WaveTrans
 
 
     trans_print("所有请求参数:", sys.argv, "env:", env, "最大可执行个数:", run_count)
     trans_print("所有请求参数:", sys.argv, "env:", env, "最大可执行个数:", run_count)
     trans_print("配置文件路径:", environ.get("ETL_CONF"))
     trans_print("配置文件路径:", environ.get("ETL_CONF"))
 
 
-    run(run_count=run_count)
+    run(run_count=run_count, yaml_config=yaml_config, step=0)

+ 4 - 2
conf/etl_config_dev.yaml

@@ -18,9 +18,11 @@ etl_origin_path_contain: 收资数据
 save_path:
 save_path:
 
 
 # 日志保存路径
 # 日志保存路径
-log_path_dir: /data/logs
+log_path_dir: /data/logs/no_batch_trans
 
 
 # 临时文件存放处,有些甲方公司隔得tmp太小,只好自己配置
 # 临时文件存放处,有些甲方公司隔得tmp太小,只好自己配置
 tmp_base_path: /tmp
 tmp_base_path: /tmp
 
 
-run_batch_count: 1
+run_batch_count: 1
+
+archive_path: /data/download/collection_data/archive/dev

+ 5 - 3
conf/etl_config_prod.yaml

@@ -18,9 +18,11 @@ etl_origin_path_contain: 收资数据
 save_path:
 save_path:
 
 
 # 日志保存路径
 # 日志保存路径
-log_path_dir: /data/logs
+log_path_dir: /data/logs/no_batch_trans
 
 
 # 临时文件存放处,有些甲方公司隔得tmp太小,只好自己配置
 # 临时文件存放处,有些甲方公司隔得tmp太小,只好自己配置
-tmp_base_path: /tmp
+tmp_base_path: /data/download/collection_data/tmp
 
 
-run_batch_count: 3
+run_batch_count: 2
+
+archive_path: /data/download/collection_data/archive/prod

+ 25 - 0
etl/common/ArchiveFile.py

@@ -0,0 +1,25 @@
+import os.path
+import shutil
+
+from etl.common.PathsAndTable import PathsAndTable
+from service.trans_conf_service import update_archive_success
+from utils.log.trans_log import trans_print
+
+
+class ArchiveFile(object):
+
+    def __init__(self, pathsAndTable: PathsAndTable, exec_id):
+        self.pathsAndTable = pathsAndTable
+        self.exec_id = exec_id
+
+    def run(self):
+        """
+        归档文件
+        """
+        if os.path.exists(self.pathsAndTable.get_tmp_formal_path()):
+            shutil.make_archive(self.pathsAndTable.get_archive_path(), 'zip', self.pathsAndTable.get_tmp_formal_path())
+            update_archive_success(self.exec_id, self.pathsAndTable.read_type,
+                                   f"{self.pathsAndTable.get_archive_path()}.zip")
+            trans_print(f"文件夹已归档为 {self.pathsAndTable.get_archive_path()}.zip")
+        else:
+            trans_print(f"文件夹 {self.pathsAndTable.get_tmp_formal_path()} 不存在")

+ 74 - 40
etl/common/BaseDataTrans.py

@@ -1,112 +1,152 @@
 import datetime
 import datetime
 import traceback
 import traceback
 
 
+from etl.common.ArchiveFile import ArchiveFile
 from etl.common.ClearData import ClearData
 from etl.common.ClearData import ClearData
 from etl.common.PathsAndTable import PathsAndTable
 from etl.common.PathsAndTable import PathsAndTable
 from etl.common.SaveToDb import SaveToDb
 from etl.common.SaveToDb import SaveToDb
 from etl.common.UnzipAndRemove import UnzipAndRemove
 from etl.common.UnzipAndRemove import UnzipAndRemove
-from service.plt_service import get_all_wind, update_trans_status_success, update_trans_status_error, \
+from service.plt_service import get_all_wind
+from service.trans_conf_service import update_trans_status_success, update_trans_status_error, \
     update_trans_status_running
     update_trans_status_running
 from utils.file.trans_methods import read_excel_files
 from utils.file.trans_methods import read_excel_files
 from utils.log.trans_log import trans_print, set_trance_id
 from utils.log.trans_log import trans_print, set_trance_id
 
 
 
 
 class BaseDataTrans(object):
 class BaseDataTrans(object):
-    def __init__(self, data: dict = None, save_db=True, step=0, end=4):
-
-        self.batch_no = data['batch_code']
-        self.batch_name = data['batch_name']
-        self.read_type = data['transfer_type']
-        self.read_path = data['transfer_addr']
-        self.field_code = data['field_code']
-        self.field_name = data['field_name']
+    def __init__(self, data: dict = None, save_db=True, yaml_config=None, step=0, end=6):
+
+        self.id = data['id']
+        self.task_name = data['task_name']
+        self.transfer_type = data['transfer_type']
+        self.read_dir = data['read_dir']
+        self.wind_farm_code = data['wind_farm_code']
+        self.wind_farm_name = data['wind_farm_name']
+        self.yaml_config = yaml_config
         self.save_zip = False
         self.save_zip = False
         self.step = step
         self.step = step
         self.end = end
         self.end = end
-        self.wind_col_trans, self.rated_power_and_cutout_speed_map = get_all_wind(self.field_code)
+        self.wind_col_trans, self.rated_power_and_cutout_speed_map = get_all_wind(self.wind_farm_code)
         self.batch_count = 100000
         self.batch_count = 100000
         self.save_db = save_db
         self.save_db = save_db
         self.filed_conf = self.get_filed_conf()
         self.filed_conf = self.get_filed_conf()
-
-        # trans_print("是否是秒转分钟:", self.boolean_sec_to_min)
+        self.update_files = list()
 
 
         try:
         try:
-            self.pathsAndTable = PathsAndTable(self.batch_no, self.batch_name, self.read_path, self.field_name,
-                                               self.read_type, save_db, self.save_zip)
+            self.pathsAndTable = PathsAndTable(self.id, self.task_name, self.read_dir, self.wind_farm_code,
+                                               self.wind_farm_name, self.transfer_type, save_db, self.save_zip,
+                                               self.yaml_config, self.wind_col_trans)
         except Exception as e:
         except Exception as e:
             trans_print(traceback.format_exc())
             trans_print(traceback.format_exc())
-            update_trans_status_error(self.batch_no, self.read_type, str(e), self.save_db)
+            update_trans_status_error(self.id, self.transfer_type, str(e), self.save_db)
             raise e
             raise e
 
 
     def get_filed_conf(self):
     def get_filed_conf(self):
         raise NotImplementedError("需要实现 获取点检表 方法")
         raise NotImplementedError("需要实现 获取点检表 方法")
 
 
-    # 第一步 清理数据
+    # 清理数据
     def clean_file_and_db(self):
     def clean_file_and_db(self):
         clean_data = ClearData(self.pathsAndTable)
         clean_data = ClearData(self.pathsAndTable)
         clean_data.run()
         clean_data.run()
 
 
-    # 第二步 解压 移动到临时文件
+    # 解压 移动到临时文件
     def unzip_or_remove_to_tmp_dir(self):
     def unzip_or_remove_to_tmp_dir(self):
         # 解压并删除
         # 解压并删除
         unzip_and_remove = UnzipAndRemove(self.pathsAndTable)
         unzip_and_remove = UnzipAndRemove(self.pathsAndTable)
         unzip_and_remove.run()
         unzip_and_remove.run()
 
 
-    # 第三步 读取  保存到临时文件
+    # 读取并保存到临时文件
     def read_and_save_tmp_file(self):
     def read_and_save_tmp_file(self):
-        raise NotImplementedError("第三步未做实现")
+        raise NotImplementedError("读取并保存到临时文件未做实现")
+
+    # 读取并保存到临时正式文件
+    def statistics_and_save_tmp_formal_file(self):
+        raise NotImplementedError("读取并保存到临时正式文件未做实现")
+
+    # 归档文件
+    def archive_file(self):
+        archive_file = ArchiveFile(self.pathsAndTable, self.id)
+        archive_file.run()
 
 
-    # 第四步 统计 并 保存到正式文件
-    def statistics_and_save_to_file(self):
-        raise NotImplementedError("第四步未做实现")
+    # 合并到正式文件
+    def combine_and_save_formal_file(self):
+        raise NotImplementedError("合并到正式文件未做实现")
 
 
-    # 第五步 保存到数据库
+    # 保存到数据库
     def save_to_db(self):
     def save_to_db(self):
-        save_to_db = SaveToDb(self.pathsAndTable, self.batch_count)
+        save_to_db = SaveToDb(self.pathsAndTable, self.update_files, self.batch_count)
         save_to_db.run()
         save_to_db.run()
 
 
     # 最后更新执行程度
     # 最后更新执行程度
     def update_exec_progress(self):
     def update_exec_progress(self):
-        update_trans_status_success(self.batch_no, self.read_type,
+        update_trans_status_success(self.id, self.transfer_type,
                                     len(read_excel_files(self.pathsAndTable.get_save_path())),
                                     len(read_excel_files(self.pathsAndTable.get_save_path())),
                                     None, None, None, None, self.save_db)
                                     None, None, None, None, self.save_db)
 
 
     def run(self):
     def run(self):
         total_begin = datetime.datetime.now()
         total_begin = datetime.datetime.now()
         try:
         try:
-            trance_id = '-'.join([self.batch_no, self.field_name, self.read_type])
+            trance_id = '-'.join([str(self.id), self.wind_farm_name, self.transfer_type])
             set_trance_id(trance_id)
             set_trance_id(trance_id)
-            update_trans_status_running(self.batch_no, self.read_type, self.save_db)
+            update_trans_status_running(self.id, self.transfer_type, self.save_db)
 
 
-            if self.step <= 0 and self.end >= 0:
+            now_index = 0
+            # 0
+            if self.step <= now_index <= self.end:
                 begin = datetime.datetime.now()
                 begin = datetime.datetime.now()
                 trans_print("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
                 trans_print("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
                 self.clean_file_and_db()
                 self.clean_file_and_db()
                 trans_print("清理数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
                 trans_print("清理数据结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
                             datetime.datetime.now() - total_begin)
                             datetime.datetime.now() - total_begin)
 
 
-            if self.step <= 1 and self.end >= 1:
+            now_index = now_index + 1
+            # 1
+            if self.step <= now_index <= self.end:
                 begin = datetime.datetime.now()
                 begin = datetime.datetime.now()
                 trans_print("开始解压移动文件")
                 trans_print("开始解压移动文件")
                 self.unzip_or_remove_to_tmp_dir()
                 self.unzip_or_remove_to_tmp_dir()
                 trans_print("解压移动文件结束:耗时:", datetime.datetime.now() - begin, "总耗时:",
                 trans_print("解压移动文件结束:耗时:", datetime.datetime.now() - begin, "总耗时:",
                             datetime.datetime.now() - total_begin)
                             datetime.datetime.now() - total_begin)
 
 
-            if self.step <= 2 and self.end >= 2:
+            now_index = now_index + 1
+            # 2
+            if self.step <= now_index <= self.end:
                 begin = datetime.datetime.now()
                 begin = datetime.datetime.now()
                 trans_print("开始保存数据到临时文件")
                 trans_print("开始保存数据到临时文件")
                 self.read_and_save_tmp_file()
                 self.read_and_save_tmp_file()
                 trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
                 trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
                             datetime.datetime.now() - total_begin)
                             datetime.datetime.now() - total_begin)
 
 
-            if self.step <= 3 and self.end >= 3:
+            now_index = now_index + 1
+            # 3
+            if self.step <= now_index <= self.end:
+                begin = datetime.datetime.now()
+                trans_print("开始保存到临时正式文件")
+                self.statistics_and_save_tmp_formal_file()
+                trans_print("保存到临时正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
+                            datetime.datetime.now() - total_begin)
+
+            now_index = now_index + 1
+            # 4
+            if self.step <= now_index <= self.end:
+                begin = datetime.datetime.now()
+                trans_print("开始保存归档文件")
+                self.archive_file()
+                trans_print("保存到保存归档文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
+                            datetime.datetime.now() - total_begin)
+
+            now_index = now_index + 1
+            # 5
+            if self.step <= now_index <= self.end:
                 begin = datetime.datetime.now()
                 begin = datetime.datetime.now()
                 trans_print("开始保存数据到正式文件")
                 trans_print("开始保存数据到正式文件")
-                self.statistics_and_save_to_file()
+                self.combine_and_save_formal_file()
                 trans_print("保存数据到正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
                 trans_print("保存数据到正式文件结束,耗时:", datetime.datetime.now() - begin, "总耗时:",
                             datetime.datetime.now() - total_begin)
                             datetime.datetime.now() - total_begin)
 
 
-            if self.step <= 4 and self.end >= 4:
+            now_index = now_index + 1
+            # 6
+            if self.step <= now_index <= self.end:
                 begin = datetime.datetime.now()
                 begin = datetime.datetime.now()
                 trans_print("开始保存到数据库,是否存库:", self.pathsAndTable.save_db)
                 trans_print("开始保存到数据库,是否存库:", self.pathsAndTable.save_db)
                 self.save_to_db()
                 self.save_to_db()
@@ -116,14 +156,8 @@ class BaseDataTrans(object):
             self.update_exec_progress()
             self.update_exec_progress()
         except Exception as e:
         except Exception as e:
             trans_print(traceback.format_exc())
             trans_print(traceback.format_exc())
-            update_trans_status_error(self.batch_no, self.read_type, str(e), self.save_db)
+            update_trans_status_error(self.id, self.transfer_type, str(e), self.save_db)
             raise e
             raise e
         finally:
         finally:
             self.pathsAndTable.delete_tmp_files()
             self.pathsAndTable.delete_tmp_files()
             trans_print("执行结束,总耗时:", str(datetime.datetime.now() - total_begin))
             trans_print("执行结束,总耗时:", str(datetime.datetime.now() - total_begin))
-
-
-if __name__ == '__main__':
-    test = BaseDataTrans(save_db=False, batch_no="WOF053600062-WOB000010", read_type="fault")
-
-    test.run()

+ 7 - 5
etl/common/ClearData.py

@@ -1,7 +1,7 @@
 import datetime
 import datetime
 
 
 from etl.common.PathsAndTable import PathsAndTable
 from etl.common.PathsAndTable import PathsAndTable
-from service.plt_service import update_trans_transfer_progress
+from service.trans_conf_service import update_trans_transfer_progress
 from utils.log.trans_log import trans_print
 from utils.log.trans_log import trans_print
 
 
 
 
@@ -12,14 +12,16 @@ class ClearData(object):
 
 
     def clean_data(self):
     def clean_data(self):
         self.pathsAndTable.delete_tmp_files()
         self.pathsAndTable.delete_tmp_files()
-        if self.pathsAndTable.save_db:
-            self.pathsAndTable.delete_batch_db()
-        self.pathsAndTable.delete_batch_files()
+
+        # 不基于批次,不能删除数据库了以及历史数据了
+        # if self.pathsAndTable.save_db:
+        #     self.pathsAndTable.delete_batch_db()
+        # self.pathsAndTable.delete_batch_files()
 
 
     def run(self):
     def run(self):
         trans_print("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
         trans_print("开始清理数据,临时文件夹:", self.pathsAndTable.get_tmp_path())
         begin = datetime.datetime.now()
         begin = datetime.datetime.now()
         self.clean_data()
         self.clean_data()
-        update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type, 5,
+        update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type, 5,
                                        self.pathsAndTable.save_db)
                                        self.pathsAndTable.save_db)
         trans_print("清理数据结束,耗时:", datetime.datetime.now() - begin)
         trans_print("清理数据结束,耗时:", datetime.datetime.now() - begin)

+ 61 - 0
etl/common/CombineAndSaveFormalFile.py

@@ -0,0 +1,61 @@
+import multiprocessing
+import os
+
+import pandas as pd
+
+from etl.common.PathsAndTable import PathsAndTable
+from utils.file.trans_methods import read_excel_files, read_file_to_df, copy_to_new
+from utils.log.trans_log import trans_print
+from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
+
+
+class CombineAndSaveFormalFile(object):
+
+    def __init__(self, pathsAndTable: PathsAndTable):
+        self.pathsAndTable = pathsAndTable
+        self.update_files = multiprocessing.Manager().list()
+
+    def combine_and_save(self, file_path, key, exists_file_path):
+        exists_same = False
+        if exists_file_path:
+            exists_same = True
+            exists_df = read_file_to_df(exists_file_path)
+            now_df = read_file_to_df(file_path)
+            # 合并两个 DataFrame
+            combined_df = pd.concat([exists_df, now_df])
+            # 去重,保留 now_df 的值
+            combined_df = combined_df.drop_duplicates(subset='time_stamp', keep='last')
+            # 按 time_stamp 排序
+            combined_df = combined_df.sort_values(by='time_stamp').reset_index(drop=True)
+            combined_df.to_csv(exists_file_path, encoding='utf-8', index=False)
+            self.update_files.append(exists_file_path)
+        else:
+            save_path = str(os.path.join(self.pathsAndTable.get_save_path(), key[0], key[1]))
+            copy_to_new(file_path, save_path)
+            self.update_files.append(save_path)
+        trans_print(f"{key[0]}/{key[1]} {'包含' if exists_same else '不包含'} 相同文件,保存成功")
+
+    def combine_and_save_formal_file(self):
+        exists_files = read_excel_files(self.pathsAndTable.get_save_path())
+        exists_file_maps = dict()
+        for file_path in exists_files:
+            name = (os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path))
+            exists_file_maps[name] = file_path
+
+        new_files = read_excel_files(self.pathsAndTable.get_tmp_formal_path())
+        new_file_maps = dict()
+        for file_path in new_files:
+            name = (os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path))
+            new_file_maps[name] = file_path
+
+        same_keys = list(set(exists_file_maps.keys()).intersection(new_file_maps.keys()))
+        split_count = get_available_cpu_count_with_percent(2 / 3)
+        with multiprocessing.Pool(split_count) as pool:
+            pool.starmap(self.combine_and_save,
+                         [(file_path, key, exists_file_maps[key] if key in same_keys else None) for key, file_path in
+                          new_file_maps.items()])
+
+    def run(self):
+        self.combine_and_save_formal_file()
+        print(self.update_files)
+        return list(self.update_files)

+ 30 - 37
etl/common/PathsAndTable.py

@@ -1,45 +1,47 @@
 import shutil
 import shutil
-from os import path, environ, sep
+from os import path, sep
 
 
-from service.trans_service import drop_table, creat_min_sec_table, create_warn_fault_table
+from service.trans_service import creat_min_sec_table, create_warn_fault_table
 from utils.conf.read_conf import *
 from utils.conf.read_conf import *
 from utils.log.trans_log import trans_print
 from utils.log.trans_log import trans_print
 
 
 
 
 class PathsAndTable(object):
 class PathsAndTable(object):
-    def __init__(self, batch_no=None, batch_name=None, read_path=None, field_name=None, read_type=None,
-                 save_db=True, save_zip=True):
-        self.batch_no = batch_no
-        self.batch_name = batch_name
-        self.read_path = read_path
-        self.field_name = field_name
+    def __init__(self, id=None, task_name=None, read_dir=None, wind_farm_code=None, wind_farm_name=None,
+                 read_type=None, save_db=True, save_zip=True, yaml_config=None, wind_col_trans=None):
+        self.id = id
+        self.task_name = task_name
+        self.read_dir = read_dir
+        self.wind_farm_code = wind_farm_code
+        self.wind_farm_name = wind_farm_name
         self.read_type = read_type
         self.read_type = read_type
         self.save_db = save_db
         self.save_db = save_db
         self.save_zip = save_zip
         self.save_zip = save_zip
         self.multi_pool_count = 6
         self.multi_pool_count = 6
-        self.is_delete_db = False
-
-        yaml_config = yaml_conf(environ.get('ETL_CONF'))
+        self.yaml_config = yaml_config
+        self.wind_col_trans = wind_col_trans
 
 
         save_path_conf = read_conf(yaml_config, "save_path")
         save_path_conf = read_conf(yaml_config, "save_path")
         self.tmp_base_path = read_conf(yaml_config, "tmp_base_path", "/tmp")
         self.tmp_base_path = read_conf(yaml_config, "tmp_base_path", "/tmp")
         if save_path_conf:
         if save_path_conf:
-            self.save_path = save_path_conf + sep + self.field_name
+            self.save_path = save_path_conf + sep + self.wind_farm_name
         else:
         else:
-            find_index = read_path.find(read_conf(yaml_config, 'etl_origin_path_contain', "etl_origin_path_contain"))
+            find_index = read_dir.find(read_conf(yaml_config, 'etl_origin_path_contain', "etl_origin_path_contain"))
             if find_index == -1:
             if find_index == -1:
-                raise Exception("路径未包含原始数据特定字符:" + read_path)
-            self.save_path = read_path[0:find_index] + sep + "清理数据"
+                raise Exception("路径未包含原始数据特定字符:" + read_dir)
+            self.save_path = read_dir[0:find_index] + sep + "清理数据"
 
 
         if self.save_path is None:
         if self.save_path is None:
-            raise Exception("未配置保存路径:" + read_path)
+            raise Exception("未配置保存路径:" + read_dir)
+
+        self.archive_path = read_conf(yaml_config, "archive_path", "/tmp/archive")
 
 
     def get_save_path(self):
     def get_save_path(self):
-        return path.join(self.save_path, self.batch_no + "_" + self.batch_name, self.read_type)
+        return path.join(self.save_path, self.read_type)
 
 
     def get_tmp_path(self):
     def get_tmp_path(self):
-        return path.join(self.tmp_base_path, self.field_name, self.batch_no + "_" + self.batch_name,
-                         self.read_type)
+        return str(path.join(self.tmp_base_path, self.wind_farm_name, str(self.id) + "_" + self.task_name,
+                             self.read_type))
 
 
     def get_excel_tmp_path(self):
     def get_excel_tmp_path(self):
         return path.join(self.get_tmp_path(), 'excel_tmp' + sep)
         return path.join(self.get_tmp_path(), 'excel_tmp' + sep)
@@ -53,14 +55,14 @@ class PathsAndTable(object):
         else:
         else:
             return path.join(self.get_tmp_path(), 'merge_tmp', str(wind_turbine_number))
             return path.join(self.get_tmp_path(), 'merge_tmp', str(wind_turbine_number))
 
 
-    def get_table_name(self):
-        return "_".join([self.batch_no, self.read_type])
+    def get_tmp_formal_path(self):
+        return path.join(self.get_tmp_path(), 'formal_tmp')
 
 
-    def delete_batch_files(self):
-        trans_print("开始删除已存在的批次文件夹")
-        if path.exists(self.get_save_path()):
-            shutil.rmtree(self.get_save_path())
-        trans_print("删除已存在的批次文件夹")
+    def get_archive_path(self):
+        return path.join(self.archive_path, self.wind_farm_name, self.read_type, f'{self.id}_{self.task_name}')
+
+    def get_table_name(self):
+        return "_".join([self.wind_farm_code, self.read_type])
 
 
     def delete_tmp_files(self):
     def delete_tmp_files(self):
         trans_print("开始删除临时文件夹")
         trans_print("开始删除临时文件夹")
@@ -68,20 +70,11 @@ class PathsAndTable(object):
             shutil.rmtree(self.get_tmp_path())
             shutil.rmtree(self.get_tmp_path())
         trans_print("删除临时文件夹删除成功")
         trans_print("删除临时文件夹删除成功")
 
 
-    def delete_batch_db(self):
-        if self.save_db:
-            trans_print("开始删除表")
-            if not self.is_delete_db:
-                table_name = self.get_table_name()
-                drop_table(table_name, self.save_db)
-                self.is_delete_db = True
-            trans_print("删除表结束")
-
-    def create_batch_db(self, wind_names: list = list()):
+    def create_wind_farm_db(self):
         if self.save_db:
         if self.save_db:
             trans_print("开始创建表")
             trans_print("开始创建表")
             if self.read_type in ['second', 'minute']:
             if self.read_type in ['second', 'minute']:
-                creat_min_sec_table(self.get_table_name(), wind_names, self.read_type)
+                creat_min_sec_table(self.get_table_name(), self.read_type)
             elif self.read_type in ['fault', 'warn']:
             elif self.read_type in ['fault', 'warn']:
                 create_warn_fault_table(self.get_table_name())
                 create_warn_fault_table(self.get_table_name())
             else:
             else:

+ 24 - 15
etl/common/SaveToDb.py

@@ -1,40 +1,49 @@
 import multiprocessing
 import multiprocessing
+import os.path
 import traceback
 import traceback
-from os import path
 
 
 from etl.common.PathsAndTable import PathsAndTable
 from etl.common.PathsAndTable import PathsAndTable
-from service.plt_service import update_trans_transfer_progress
-from service.trans_service import save_file_to_db
-from utils.file.trans_methods import read_excel_files, split_array
+from service.trans_conf_service import update_trans_transfer_progress
+from service.trans_service import save_partation_file_to_db, save_file_to_db
+from utils.file.trans_methods import split_array
 from utils.log.trans_log import trans_print
 from utils.log.trans_log import trans_print
 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
 
 
 
 
 class SaveToDb(object):
 class SaveToDb(object):
 
 
-    def __init__(self, pathsAndTable: PathsAndTable, batch_count=100000):
+    def __init__(self, pathsAndTable: PathsAndTable, update_files, batch_count=100000):
         self.pathsAndTable = pathsAndTable
         self.pathsAndTable = pathsAndTable
         self.batch_count = batch_count
         self.batch_count = batch_count
+        self.update_files = update_files
 
 
     def mutiprocessing_to_save_db(self):
     def mutiprocessing_to_save_db(self):
         # 开始保存到SQL文件
         # 开始保存到SQL文件
+        all_saved_files = self.update_files
 
 
-        self.pathsAndTable.delete_batch_db()
-        all_saved_files = read_excel_files(self.pathsAndTable.get_save_path())
-        wind_names = [str(path.basename(i)).replace(".csv", "") for i in all_saved_files]
+        # 映射到的文件保存到数据库
+        all_saved_files = [i for i in all_saved_files if
+                           os.path.basename(i).split(".")[0] in self.pathsAndTable.wind_col_trans.keys()]
 
 
-        self.pathsAndTable.create_batch_db(wind_names)
+        self.pathsAndTable.create_wind_farm_db()
 
 
-        split_count = get_available_cpu_count_with_percent(percent=1 / 2)
+        split_count = get_available_cpu_count_with_percent(percent=2 / 3)
         split_count = split_count if split_count <= len(all_saved_files) else len(all_saved_files)
         split_count = split_count if split_count <= len(all_saved_files) else len(all_saved_files)
         all_arrays = split_array(all_saved_files, split_count)
         all_arrays = split_array(all_saved_files, split_count)
         try:
         try:
             for index, arr in enumerate(all_arrays):
             for index, arr in enumerate(all_arrays):
                 with multiprocessing.Pool(split_count) as pool:
                 with multiprocessing.Pool(split_count) as pool:
-                    pool.starmap(save_file_to_db,
-                                 [(self.pathsAndTable.get_table_name(), file, self.batch_count) for file in
-                                  all_saved_files])
-                update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type,
+                    if self.pathsAndTable.read_type in ['minute', 'second']:
+                        pool.starmap(save_partation_file_to_db,
+                                     [(self.pathsAndTable.get_table_name(), file,
+                                       self.pathsAndTable.wind_col_trans[os.path.basename(file).split(".")[0]],
+                                       os.path.basename(os.path.dirname(file)),
+                                       self.batch_count) for file in arr])
+                    else:
+                        pool.starmap(save_file_to_db,
+                                     [(self.pathsAndTable.get_table_name(), file, self.batch_count) for file in arr])
+
+                update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type,
                                                round(70 + 29 * (index + 1) / len(all_arrays), 2),
                                                round(70 + 29 * (index + 1) / len(all_arrays), 2),
                                                self.pathsAndTable.save_db)
                                                self.pathsAndTable.save_db)
         except Exception as e:
         except Exception as e:
@@ -45,5 +54,5 @@ class SaveToDb(object):
     def run(self):
     def run(self):
         if self.pathsAndTable.save_db:
         if self.pathsAndTable.save_db:
             self.mutiprocessing_to_save_db()
             self.mutiprocessing_to_save_db()
-            update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type, 99,
+            update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type, 99,
                                            self.pathsAndTable.save_db)
                                            self.pathsAndTable.save_db)

+ 15 - 17
etl/common/UnzipAndRemove.py

@@ -3,7 +3,7 @@ import traceback
 from os import *
 from os import *
 
 
 from etl.common.PathsAndTable import PathsAndTable
 from etl.common.PathsAndTable import PathsAndTable
-from service.plt_service import update_trans_transfer_progress
+from service.trans_conf_service import update_trans_transfer_progress
 from utils.file.trans_methods import read_files, read_excel_files, copy_to_new, split_array
 from utils.file.trans_methods import read_files, read_excel_files, copy_to_new, split_array
 from utils.log.trans_log import trans_print
 from utils.log.trans_log import trans_print
 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
@@ -20,40 +20,38 @@ class UnzipAndRemove(object):
         to_path = self.pathsAndTable.get_excel_tmp_path()
         to_path = self.pathsAndTable.get_excel_tmp_path()
         if str(file).endswith("zip"):
         if str(file).endswith("zip"):
             if str(file).endswith("csv.zip"):
             if str(file).endswith("csv.zip"):
-                copy_to_new(file, file.replace(self.pathsAndTable.read_path, to_path).replace("csv.zip", 'csv.gz'))
+                copy_to_new(file, file.replace(self.pathsAndTable.read_dir, to_path).replace("csv.zip", 'csv.gz'))
             else:
             else:
-                desc_path = file.replace(self.pathsAndTable.read_path, to_path)
-                is_success, e = unzip(file, get_desc_path(desc_path))
+                desc_path = file.replace(self.pathsAndTable.read_dir, to_path)
+                unzip(file, get_desc_path(desc_path))
                 self.pathsAndTable.has_zip = True
                 self.pathsAndTable.has_zip = True
-                if not is_success:
-                    # raise e
-                    pass
         elif str(file).endswith("rar"):
         elif str(file).endswith("rar"):
-            desc_path = file.replace(self.pathsAndTable.read_path, to_path)
+            desc_path = file.replace(self.pathsAndTable.read_dir, to_path)
             is_success, e = unrar(file, get_desc_path(desc_path))
             is_success, e = unrar(file, get_desc_path(desc_path))
             self.pathsAndTable.has_zip = True
             self.pathsAndTable.has_zip = True
             if not is_success:
             if not is_success:
                 trans_print(traceback.format_exc())
                 trans_print(traceback.format_exc())
                 pass
                 pass
         else:
         else:
-            copy_to_new(file, file.replace(self.pathsAndTable.read_path, to_path))
+            copy_to_new(file, file.replace(self.pathsAndTable.read_dir, to_path))
 
 
     def remove_file_to_tmp_path(self):
     def remove_file_to_tmp_path(self):
         # 读取文件
         # 读取文件
         try:
         try:
-            if path.isfile(self.pathsAndTable.read_path):
-                all_files = [self.pathsAndTable.read_path]
+            if path.isfile(self.pathsAndTable.read_dir):
+                all_files = [self.pathsAndTable.read_dir]
             else:
             else:
-                all_files = read_files(self.pathsAndTable.read_path)
+                all_files = read_files(self.pathsAndTable.read_dir)
 
 
             # 最大取系统cpu的 三分之二
             # 最大取系统cpu的 三分之二
-            split_count = get_available_cpu_count_with_percent(1 / 2)
+            split_count = get_available_cpu_count_with_percent(2 / 3)
             all_arrays = split_array(all_files, split_count)
             all_arrays = split_array(all_files, split_count)
 
 
             for index, arr in enumerate(all_arrays):
             for index, arr in enumerate(all_arrays):
-                with multiprocessing.Pool(self.pathsAndTable.multi_pool_count) as pool:
+                pool_count = split_count if split_count < len(arr) else len(arr)
+                with multiprocessing.Pool(pool_count) as pool:
                     pool.starmap(self.get_and_remove, [(i,) for i in arr])
                     pool.starmap(self.get_and_remove, [(i,) for i in arr])
-                update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type,
+                update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type,
                                                round(5 + 15 * (index + 1) / len(all_arrays), 2),
                                                round(5 + 15 * (index + 1) / len(all_arrays), 2),
                                                self.pathsAndTable.save_db)
                                                self.pathsAndTable.save_db)
 
 
@@ -62,11 +60,11 @@ class UnzipAndRemove(object):
             trans_print('读取文件数量:', len(all_files))
             trans_print('读取文件数量:', len(all_files))
         except Exception as e:
         except Exception as e:
             trans_print(traceback.format_exc())
             trans_print(traceback.format_exc())
-            message = "读取文件列表错误:" + self.pathsAndTable.read_path + ",系统返回错误:" + str(e)
+            message = "读取文件列表错误:" + self.pathsAndTable.read_dir + ",系统返回错误:" + str(e)
             raise ValueError(message)
             raise ValueError(message)
         return all_files
         return all_files
 
 
     def run(self):
     def run(self):
         self.remove_file_to_tmp_path()
         self.remove_file_to_tmp_path()
-        update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type, 20,
+        update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type, 20,
                                        self.pathsAndTable.save_db)
                                        self.pathsAndTable.save_db)

+ 70 - 23
etl/wind_power/fault_warn/FaultWarnTrans.py

@@ -1,11 +1,13 @@
+import os.path
 from os import *
 from os import *
 
 
 import numpy as np
 import numpy as np
 import pandas as pd
 import pandas as pd
 
 
 from etl.common.BaseDataTrans import BaseDataTrans
 from etl.common.BaseDataTrans import BaseDataTrans
-from service.plt_service import update_trans_status_error
-from service.trans_service import get_fault_warn_conf, get_trans_exec_code
+from service.trans_conf_service import update_trans_status_error, update_trans_status_success
+from service.trans_service import get_fault_warn_conf, get_trans_exec_code, drop_table, create_warn_fault_table, \
+    save_file_to_db
 from utils.conf.read_conf import read_conf
 from utils.conf.read_conf import read_conf
 from utils.file.trans_methods import read_excel_files, read_file_to_df, create_file_path, valid_eval
 from utils.file.trans_methods import read_excel_files, read_file_to_df, create_file_path, valid_eval
 from utils.log.trans_log import trans_print
 from utils.log.trans_log import trans_print
@@ -13,23 +15,27 @@ from utils.log.trans_log import trans_print
 
 
 class FaultWarnTrans(BaseDataTrans):
 class FaultWarnTrans(BaseDataTrans):
 
 
-    def __init__(self, data: dict = None, save_db=True, step=0, end=4):
-        super(FaultWarnTrans, self).__init__(data, save_db, step, end)
+    def __init__(self, data: dict = None, save_db=True, yaml_config=None, step=0, end=6):
+        super(FaultWarnTrans, self).__init__(data, save_db, yaml_config, step, end)
+        self.engine_count = 0
+        self.min_date = None
+        self.max_date = None
+        self.data_count = 0
 
 
     def get_filed_conf(self):
     def get_filed_conf(self):
-        return get_fault_warn_conf(self.field_code, self.read_type)
+        return get_fault_warn_conf(self.wind_farm_code, self.transfer_type)
 
 
     # 第三步 读取 并 保存到临时文件
     # 第三步 读取 并 保存到临时文件
     def read_and_save_tmp_file(self):
     def read_and_save_tmp_file(self):
         trans_print("无需保存临时文件")
         trans_print("无需保存临时文件")
 
 
-    # 第四步 统计 并 保存到正式文件
-    def statistics_and_save_to_file(self):
+    # 读取并保存到临时正式文件
+    def statistics_and_save_tmp_formal_file(self):
         conf_map = self.get_filed_conf()
         conf_map = self.get_filed_conf()
         if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
         if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
-            message = f"未找到{self.batch_no}的{self.read_type}配置"
+            message = f"未找到{self.id}的{self.transfer_type}配置"
             trans_print(message)
             trans_print(message)
-            update_trans_status_error(self.batch_no, self.read_type, message, self.save_db)
+            update_trans_status_error(self.id, self.transfer_type, message, self.save_db)
         else:
         else:
 
 
             for key, v in conf_map.items():
             for key, v in conf_map.items():
@@ -112,21 +118,62 @@ class FaultWarnTrans(BaseDataTrans):
                 df['time_diff'] = (df['end_time'] - df['begin_time']).dt.total_seconds()
                 df['time_diff'] = (df['end_time'] - df['begin_time']).dt.total_seconds()
                 df.loc[df['time_diff'] < 0, 'time_diff'] = np.nan
                 df.loc[df['time_diff'] < 0, 'time_diff'] = np.nan
 
 
-            # 如果有需要处理的,先进行代码处理,在保存文件
-            exec_code = get_trans_exec_code(self.batch_no, self.read_type)
-            if exec_code:
-                exec(exec_code)
-
-            # 根绝开始时间进行排序
-            df.sort_values(by=['wind_turbine_number', 'begin_time'], inplace=True)
-
             if self.save_zip:
             if self.save_zip:
-                save_path = path.join(self.pathsAndTable.get_save_path(), str(self.batch_name) + '.csv.gz')
+                save_path = path.join(self.pathsAndTable.get_tmp_formal_path(),
+                                      str(self.pathsAndTable.read_type) + '.csv.gz')
             else:
             else:
-                save_path = path.join(self.pathsAndTable.get_save_path(), str(self.batch_name) + '.csv')
+                save_path = path.join(self.pathsAndTable.get_tmp_formal_path(),
+                                      str(self.pathsAndTable.read_type) + '.csv')
 
 
             create_file_path(save_path, is_file_path=True)
             create_file_path(save_path, is_file_path=True)
-            if self.save_zip:
-                df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8', date_format='%Y-%m-%d %H:%M:%S')
-            else:
-                df.to_csv(save_path, index=False, encoding='utf-8', date_format='%Y-%m-%d %H:%M:%S')
+
+            df.to_csv(save_path, index=False, encoding='utf-8')
+
+    # 归档文件
+    # def archive_file(self):
+    #     trans_print("无需归档文件")
+
+    # 合并到正式文件
+    def combine_and_save_formal_file(self):
+
+        df = read_file_to_df(
+            os.path.join(self.pathsAndTable.get_tmp_formal_path(), str(self.pathsAndTable.read_type) + '.csv'))
+
+        self.engine_count = len(df['wind_turbine_number'].unique())
+        self.min_date = df['begin_time'].min()
+        self.max_date = df['begin_time'].max()
+        self.data_count = df.shape[0]
+
+        df = df[df['wind_turbine_number'].isin(self.wind_col_trans.values())]
+
+        save_path = os.path.join(self.pathsAndTable.get_save_path(), str(self.pathsAndTable.read_type) + '.csv')
+
+        exists_df = pd.DataFrame()
+        if os.path.exists(save_path):
+            exists_df = read_file_to_df(save_path)
+        else:
+            create_file_path(save_path, is_file_path=True)
+
+        df = pd.concat([exists_df, df], ignore_index=True)
+        df.drop_duplicates(inplace=True, keep='last')
+        self.update_files = [save_path]
+
+        # 根据开始时间进行排序
+        df.sort_values(by=['wind_turbine_number', 'begin_time'], inplace=True)
+
+
+        if self.save_zip:
+            df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8', date_format='%Y-%m-%d %H:%M:%S')
+        else:
+            df.to_csv(save_path, index=False, encoding='utf-8', date_format='%Y-%m-%d %H:%M:%S')
+
+    def save_to_db(self):
+        table_name = self.pathsAndTable.get_table_name()
+        drop_table(table_name)
+        create_warn_fault_table(table_name)
+        save_file_to_db(table_name, self.update_files[0], self.batch_count)
+
+    def update_exec_progress(self):
+        update_trans_status_success(self.id, self.transfer_type,
+                                    self.engine_count, None, self.min_date, self.max_date, self.data_count,
+                                    self.save_db)

+ 17 - 7
etl/wind_power/laser/LaserTrans.py

@@ -8,6 +8,8 @@ import pandas as pd
 
 
 from service.plt_service import get_all_wind
 from service.plt_service import get_all_wind
 from service.trans_service import save_df_to_db
 from service.trans_service import save_df_to_db
+from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
+    update_trans_status_success
 from utils.file.trans_methods import read_files, read_file_to_df
 from utils.file.trans_methods import read_files, read_file_to_df
 from utils.log.trans_log import set_trance_id, trans_print
 from utils.log.trans_log import set_trance_id, trans_print
 
 
@@ -17,11 +19,12 @@ class LaserTrans():
     激光测距仪转化
     激光测距仪转化
     """
     """
 
 
-    def __init__(self, field_code, read_path):
-        self.field_code = field_code
+    def __init__(self, id, wind_farm_code, read_path):
+        self.id = id
+        self.wind_farm_code = wind_farm_code
         self.read_path = read_path
         self.read_path = read_path
         self.begin = datetime.datetime.now()
         self.begin = datetime.datetime.now()
-        self.wind_col_trans, _ = get_all_wind(self.field_code, need_rated_param=False)
+        self.wind_col_trans, _ = get_all_wind(self.wind_farm_code, need_rated_param=False)
 
 
     def get_file_data(self, file_path):
     def get_file_data(self, file_path):
         file_name = os.path.basename(file_path)
         file_name = os.path.basename(file_path)
@@ -49,18 +52,25 @@ class LaserTrans():
         return result_df
         return result_df
 
 
     def run(self):
     def run(self):
-        trance_id = '-'.join([self.field_code, 'laser'])
+        update_trans_status_running(self.id)
+        trance_id = '-'.join([self.wind_farm_code, 'laser'])
         set_trance_id(trance_id)
         set_trance_id(trance_id)
         all_files = read_files(self.read_path, ['csv'])
         all_files = read_files(self.read_path, ['csv'])
-        trans_print(self.field_code, '获取文件总数为:', len(all_files))
+        trans_print(self.wind_farm_code, '获取文件总数为:', len(all_files))
         pool_count = 8 if len(all_files) > 8 else len(all_files)
         pool_count = 8 if len(all_files) > 8 else len(all_files)
 
 
         with multiprocessing.Pool(pool_count) as pool:
         with multiprocessing.Pool(pool_count) as pool:
             dfs = pool.map(self.get_file_data, all_files)
             dfs = pool.map(self.get_file_data, all_files)
+
+        update_trans_transfer_progress(self.id, 80)
         df = pd.concat(dfs, ignore_index=True)
         df = pd.concat(dfs, ignore_index=True)
-        save_df_to_db(self.field_code + "_laser", df)
+        update_trans_transfer_progress(self.id, 90)
         df.sort_values(by=['acquisition_time'], inplace=True)
         df.sort_values(by=['acquisition_time'], inplace=True)
-        trans_print(self.field_code, '执行结束,总耗时:', (datetime.datetime.now() - self.begin))
+        save_df_to_db(self.wind_farm_code + "_laser", df)
+        update_trans_status_success(self.id, 'laser', len(df['wind_turbine_number'].unique()), None,
+                                    df['acquisition_time'].min(), df['acquisition_time'].max(), df.shape[0])
+        #update_trans_status_success(self.id)
+        trans_print(self.wind_farm_code, '执行结束,总耗时:', (datetime.datetime.now() - self.begin))
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 26 - 43
etl/wind_power/min_sec/MinSecTrans.py

@@ -2,38 +2,36 @@
 # @Time    : 2024/5/15
 # @Time    : 2024/5/15
 # @Author  : 魏志亮
 # @Author  : 魏志亮
 import multiprocessing
 import multiprocessing
-
-import pandas as pd
+import os.path
 
 
 from etl.common.BaseDataTrans import BaseDataTrans
 from etl.common.BaseDataTrans import BaseDataTrans
+from etl.common.CombineAndSaveFormalFile import CombineAndSaveFormalFile
 from etl.wind_power.min_sec.ReadAndSaveTmp import ReadAndSaveTmp
 from etl.wind_power.min_sec.ReadAndSaveTmp import ReadAndSaveTmp
-from etl.wind_power.min_sec.StatisticsAndSaveFile import StatisticsAndSaveFile
+from etl.wind_power.min_sec.StatisticsAndSaveTmpFormalFile import StatisticsAndSaveTmpFormalFile
 from etl.wind_power.min_sec.TransParam import TransParam
 from etl.wind_power.min_sec.TransParam import TransParam
-from service.plt_service import update_trans_status_success, update_trans_status_error
-from service.trans_service import batch_statistics, get_min_sec_conf
+from service.trans_conf_service import update_trans_status_success, update_trans_status_error
+from service.trans_service import get_min_sec_conf
 from utils.conf.read_conf import read_conf
 from utils.conf.read_conf import read_conf
-from utils.df_utils.util import get_time_space
-from utils.file.trans_methods import read_excel_files, read_file_to_df
 from utils.log.trans_log import trans_print
 from utils.log.trans_log import trans_print
 
 
 
 
 class MinSecTrans(BaseDataTrans):
 class MinSecTrans(BaseDataTrans):
 
 
-    def __init__(self, data: dict = None, save_db=True, step=0, end=4):
-        super(MinSecTrans, self).__init__(data, save_db, step, end)
+    def __init__(self, data: dict = None, save_db=True, yaml_config=None, step=0, end=6):
+        super(MinSecTrans, self).__init__(data, save_db, yaml_config, step, end)
         self.statistics_map = multiprocessing.Manager().dict()
         self.statistics_map = multiprocessing.Manager().dict()
         self.trans_param = self.get_trans_param()
         self.trans_param = self.get_trans_param()
         self.trans_param.wind_col_trans = self.wind_col_trans
         self.trans_param.wind_col_trans = self.wind_col_trans
 
 
     def get_filed_conf(self):
     def get_filed_conf(self):
-        return get_min_sec_conf(self.field_code, self.read_type)
+        return get_min_sec_conf(self.wind_farm_code, self.transfer_type)
 
 
     def get_trans_param(self):
     def get_trans_param(self):
         conf_map = self.get_filed_conf()
         conf_map = self.get_filed_conf()
         if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
         if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
-            message = f"未找到{self.batch_no}的{self.read_type}配置"
+            message = f"未找到{self.id}的{self.transfer_type}配置"
             trans_print(message)
             trans_print(message)
-            update_trans_status_error(self.batch_no, self.read_type, message, self.save_db)
+            update_trans_status_error(self.id, self.transfer_type, message, self.save_db)
         else:
         else:
             resolve_col_prefix = read_conf(conf_map, 'resolve_col_prefix')
             resolve_col_prefix = read_conf(conf_map, 'resolve_col_prefix')
             wind_name_exec = read_conf(conf_map, 'wind_name_exec', None)
             wind_name_exec = read_conf(conf_map, 'wind_name_exec', None)
@@ -74,7 +72,7 @@ class MinSecTrans(BaseDataTrans):
             for col in trans_cols:
             for col in trans_cols:
                 cols_trans_all[col] = read_conf(conf_map, col, '')
                 cols_trans_all[col] = read_conf(conf_map, col, '')
 
 
-            return TransParam(read_type=self.read_type, read_path=self.read_path,
+            return TransParam(read_type=self.transfer_type, read_path=self.read_dir,
                               cols_tran=cols_trans_all,
                               cols_tran=cols_trans_all,
                               wind_name_exec=wind_name_exec, is_vertical_table=is_vertical_table,
                               wind_name_exec=wind_name_exec, is_vertical_table=is_vertical_table,
                               vertical_cols=vertical_cols, vertical_key=vertical_key,
                               vertical_cols=vertical_cols, vertical_key=vertical_key,
@@ -88,37 +86,22 @@ class MinSecTrans(BaseDataTrans):
         read_and_save_tmp.run()
         read_and_save_tmp.run()
 
 
     # 第四步 统计 并 保存到正式文件
     # 第四步 统计 并 保存到正式文件
-    def statistics_and_save_to_file(self):
+    def statistics_and_save_tmp_formal_file(self):
         # 保存到正式文件
         # 保存到正式文件
-        statistics_and_save_file = StatisticsAndSaveFile(self.pathsAndTable, self.trans_param, self.statistics_map,
-                                                         self.rated_power_and_cutout_speed_map)
-        statistics_and_save_file.run()
+        statistics_and_save_tmp_formal_file = StatisticsAndSaveTmpFormalFile(self.pathsAndTable, self.trans_param,
+                                                                             self.statistics_map,
+                                                                             self.rated_power_and_cutout_speed_map)
+        statistics_and_save_tmp_formal_file.run()
+
+    def combine_and_save_formal_file(self):
+        combine_and_save_formal_file = CombineAndSaveFormalFile(self.pathsAndTable)
+        self.update_files = combine_and_save_formal_file.run()
 
 
     # 最后更新执行程度
     # 最后更新执行程度
     def update_exec_progress(self):
     def update_exec_progress(self):
-        if self.end >= 4:
-            all_files = read_excel_files(self.pathsAndTable.get_save_path())
-            if self.step <= 3:
-                update_trans_status_success(self.batch_no, self.trans_param.read_type,
-                                            len(all_files),
-                                            self.statistics_map['time_granularity'],
-                                            self.statistics_map['min_date'], self.statistics_map['max_date'],
-                                            self.statistics_map['total_count'], self.save_db)
-            else:
-                df = read_file_to_df(all_files[0], read_cols=['time_stamp'])
-                df['time_stamp'] = pd.to_datetime(df['time_stamp'])
-                time_granularity = get_time_space(df, 'time_stamp')
-                batch_data = batch_statistics("_".join([self.batch_no, self.trans_param.read_type]))
-                if batch_data is not None:
-                    update_trans_status_success(self.batch_no, self.trans_param.read_type,
-                                                len(read_excel_files(self.pathsAndTable.get_save_path())),
-                                                time_granularity,
-                                                batch_data['min_date'], batch_data['max_date'],
-                                                batch_data['total_count'], self.save_db)
-                else:
-                    update_trans_status_success(self.batch_no, self.trans_param.read_type,
-                                                len(read_excel_files(self.pathsAndTable.get_save_path())),
-                                                time_granularity,
-                                                None, None,
-                                                None, self.save_db)
-
+        all_files = set([os.path.basename(i) for i in self.update_files])
+        update_trans_status_success(self.id, self.trans_param.read_type,
+                                    len(all_files),
+                                    self.statistics_map['time_granularity'],
+                                    self.statistics_map['min_date'], self.statistics_map['max_date'],
+                                    self.statistics_map['total_count'], self.save_db)

+ 5 - 5
etl/wind_power/min_sec/ReadAndSaveTmp.py

@@ -8,7 +8,7 @@ import pandas as pd
 
 
 from etl.common.PathsAndTable import PathsAndTable
 from etl.common.PathsAndTable import PathsAndTable
 from etl.wind_power.min_sec import TransParam
 from etl.wind_power.min_sec import TransParam
-from service.plt_service import update_trans_transfer_progress
+from service.trans_conf_service import update_trans_transfer_progress
 from utils.file.trans_methods import read_excel_files, split_array, del_blank, \
 from utils.file.trans_methods import read_excel_files, split_array, del_blank, \
     create_file_path, read_file_to_df, valid_eval
     create_file_path, read_file_to_df, valid_eval
 from utils.log.trans_log import trans_print
 from utils.log.trans_log import trans_print
@@ -166,7 +166,7 @@ class ReadAndSaveTmp(object):
                     message = "整理临时文件,系统返回错误:" + str(e)
                     message = "整理临时文件,系统返回错误:" + str(e)
                     raise ValueError(message)
                     raise ValueError(message)
 
 
-                update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type,
+                update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type,
                                                round(20 + 20 * (index + 1) / len(all_arrays), 2),
                                                round(20 + 20 * (index + 1) / len(all_arrays), 2),
                                                self.pathsAndTable.save_db)
                                                self.pathsAndTable.save_db)
 
 
@@ -186,7 +186,7 @@ class ReadAndSaveTmp(object):
                     message = "整理临时文件,系统返回错误:" + str(e)
                     message = "整理临时文件,系统返回错误:" + str(e)
                     raise ValueError(message)
                     raise ValueError(message)
 
 
-                update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type,
+                update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type,
                                                round(20 + 30 * (index + 1) / len(all_arrays), 2),
                                                round(20 + 30 * (index + 1) / len(all_arrays), 2),
                                                self.pathsAndTable.save_db)
                                                self.pathsAndTable.save_db)
 
 
@@ -203,7 +203,7 @@ class ReadAndSaveTmp(object):
                     message = "整理临时文件,系统返回错误:" + str(e)
                     message = "整理临时文件,系统返回错误:" + str(e)
                     raise ValueError(message)
                     raise ValueError(message)
 
 
-                update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type,
+                update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type,
                                                round(20 + 30 * (index + 1) / len(all_arrays), 2),
                                                round(20 + 30 * (index + 1) / len(all_arrays), 2),
                                                self.pathsAndTable.save_db)
                                                self.pathsAndTable.save_db)
 
 
@@ -361,6 +361,6 @@ class ReadAndSaveTmp(object):
         trans_print("开始保存数据到临时文件")
         trans_print("开始保存数据到临时文件")
         begin = datetime.datetime.now()
         begin = datetime.datetime.now()
         self.read_file_and_save_tmp()
         self.read_file_and_save_tmp()
-        update_trans_transfer_progress(self.pathsAndTable.batch_no, self.pathsAndTable.read_type, 50,
+        update_trans_transfer_progress(self.pathsAndTable.id, self.pathsAndTable.read_type, 50,
                                        self.pathsAndTable.save_db)
                                        self.pathsAndTable.save_db)
         trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin)
         trans_print("保存数据到临时文件结束,耗时:", datetime.datetime.now() - begin)

+ 35 - 27
etl/wind_power/min_sec/StatisticsAndSaveFile.py → etl/wind_power/min_sec/StatisticsAndSaveTmpFormalFile.py

@@ -1,4 +1,3 @@
-import datetime
 import multiprocessing
 import multiprocessing
 import traceback
 import traceback
 from os import path
 from os import path
@@ -9,8 +8,7 @@ import pandas as pd
 from etl.common.PathsAndTable import PathsAndTable
 from etl.common.PathsAndTable import PathsAndTable
 from etl.wind_power.min_sec import TransParam
 from etl.wind_power.min_sec import TransParam
 from etl.wind_power.min_sec.ClassIdentifier import ClassIdentifier
 from etl.wind_power.min_sec.ClassIdentifier import ClassIdentifier
-from service.plt_service import update_trans_transfer_progress
-from service.trans_service import get_trans_exec_code
+from service.trans_conf_service import update_trans_transfer_progress
 from utils.conf.read_conf import read_conf
 from utils.conf.read_conf import read_conf
 from utils.df_utils.util import get_time_space
 from utils.df_utils.util import get_time_space
 from utils.file.trans_methods import create_file_path, read_excel_files, read_file_to_df, split_array
 from utils.file.trans_methods import create_file_path, read_excel_files, read_file_to_df, split_array
@@ -20,7 +18,7 @@ from utils.systeminfo.sysinfo import use_files_get_max_cpu_count
 exec("import math")
 exec("import math")
 
 
 
 
-class StatisticsAndSaveFile(object):
+class StatisticsAndSaveTmpFormalFile(object):
 
 
     def __init__(self, paths_and_table: PathsAndTable, trans_param: TransParam, statistics_map,
     def __init__(self, paths_and_table: PathsAndTable, trans_param: TransParam, statistics_map,
                  rated_power_and_cutout_speed_map):
                  rated_power_and_cutout_speed_map):
@@ -80,7 +78,7 @@ class StatisticsAndSaveFile(object):
 
 
         # 删除 有功功率 和 风速均为空的情况
         # 删除 有功功率 和 风速均为空的情况
         df.dropna(subset=['active_power', 'wind_velocity'], how='all', inplace=True)
         df.dropna(subset=['active_power', 'wind_velocity'], how='all', inplace=True)
-        trans_print(wind_col_name, "删除有功功率和风速均为空的情况后:", df.shape)
+        trans_print(origin_wind_name, wind_col_name, "删除有功功率和风速均为空的情况后:", df.shape)
         df.replace(np.nan, -999999999, inplace=True)
         df.replace(np.nan, -999999999, inplace=True)
         number_cols = df.select_dtypes(include=['number']).columns.tolist()
         number_cols = df.select_dtypes(include=['number']).columns.tolist()
         for col in df.columns:
         for col in df.columns:
@@ -89,7 +87,7 @@ class StatisticsAndSaveFile(object):
                     df[col] = pd.to_numeric(df[col], errors='coerce')
                     df[col] = pd.to_numeric(df[col], errors='coerce')
                     # 删除包含NaN的行(即那些列A转换失败的行)
                     # 删除包含NaN的行(即那些列A转换失败的行)
                     df = df.dropna(subset=[col])
                     df = df.dropna(subset=[col])
-                    trans_print(wind_col_name, "删除非数值列名:", col)
+                    trans_print(origin_wind_name, wind_col_name, "删除非数值列名:", col)
         df.replace(-999999999, np.nan, inplace=True)
         df.replace(-999999999, np.nan, inplace=True)
 
 
         df.drop_duplicates(['wind_turbine_number', 'time_stamp'], keep='first', inplace=True)
         df.drop_duplicates(['wind_turbine_number', 'time_stamp'], keep='first', inplace=True)
@@ -107,11 +105,11 @@ class StatisticsAndSaveFile(object):
             df = df.groupby(['wind_turbine_number', 'time_stamp']).mean().reset_index()
             df = df.groupby(['wind_turbine_number', 'time_stamp']).mean().reset_index()
         trans_print('有功功率前10个', df.head(10)['active_power'].values)
         trans_print('有功功率前10个', df.head(10)['active_power'].values)
         power_df = df[df['active_power'] > 0]
         power_df = df[df['active_power'] > 0]
-        trans_print(wind_col_name, "功率大于0的数量:", power_df.shape)
+        trans_print(origin_wind_name, wind_col_name, "功率大于0的数量:", power_df.shape)
         power = power_df.sample(int(power_df.shape[0] / 100))['active_power'].median()
         power = power_df.sample(int(power_df.shape[0] / 100))['active_power'].median()
 
 
         del power_df
         del power_df
-        trans_print(wind_col_name, '有功功率,中位数', power)
+        trans_print(origin_wind_name, wind_col_name, '有功功率,中位数', power)
         if power > 100000:
         if power > 100000:
             df['active_power'] = df['active_power'] / 1000
             df['active_power'] = df['active_power'] / 1000
         ## 做数据检测前,羡强行处理有功功率
         ## 做数据检测前,羡强行处理有功功率
@@ -122,33 +120,43 @@ class StatisticsAndSaveFile(object):
             rated_power_and_cutout_speed_tuple = (None, None)
             rated_power_and_cutout_speed_tuple = (None, None)
 
 
         # 如果有需要处理的,先进行代码处理,在进行打标签
         # 如果有需要处理的,先进行代码处理,在进行打标签
-        exec_code = get_trans_exec_code(self.paths_and_table.batch_no, self.paths_and_table.read_type)
-        if exec_code:
-            if 'import ' in exec_code:
-                raise Exception("执行代码不支持导入包")
-            exec(exec_code)
+        # exec_code = get_trans_exec_code(self.paths_and_table.exec_id, self.paths_and_table.read_type)
+        # if exec_code:
+        #     if 'import ' in exec_code:
+        #         raise Exception("执行代码不支持导入包")
+        #     exec(exec_code)
 
 
-        class_identifiler = ClassIdentifier(wind_turbine_number=wind_col_name, origin_df=df,
+        class_identifiler = ClassIdentifier(wind_turbine_number=origin_wind_name, origin_df=df,
                                             rated_power=rated_power_and_cutout_speed_tuple[0],
                                             rated_power=rated_power_and_cutout_speed_tuple[0],
                                             cut_out_speed=rated_power_and_cutout_speed_tuple[1])
                                             cut_out_speed=rated_power_and_cutout_speed_tuple[1])
         df = class_identifiler.run()
         df = class_identifiler.run()
         df['year'] = df['time_stamp'].dt.year
         df['year'] = df['time_stamp'].dt.year
         df['month'] = df['time_stamp'].dt.month
         df['month'] = df['time_stamp'].dt.month
         df['day'] = df['time_stamp'].dt.day
         df['day'] = df['time_stamp'].dt.day
-        df['time_stamp'] = df['time_stamp'].apply(
-            lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
+        df['time_stamp'] = df['time_stamp'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
 
 
         df['wind_turbine_name'] = str(origin_wind_name)
         df['wind_turbine_name'] = str(origin_wind_name)
+        df['year_month'] = df[['year', 'month']].apply(lambda x: str(x['year']) + str(x['month']).zfill(2), axis=1)
+        cols = df.columns
 
 
-        if self.paths_and_table.save_zip:
-            save_path = path.join(self.paths_and_table.get_save_path(), str(wind_col_name) + '.csv.gz')
+        if self.paths_and_table.read_type == 'second':
+            type_col = 'year_month'
         else:
         else:
-            save_path = path.join(self.paths_and_table.get_save_path(), str(wind_col_name) + '.csv')
-        create_file_path(save_path, is_file_path=True)
-        if self.paths_and_table.save_zip:
-            df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8')
-        else:
-            df.to_csv(save_path, index=False, encoding='utf-8')
+            type_col = 'year'
+
+        date_strs = df[type_col].unique().tolist()
+        for date_str in date_strs:
+            save_path = path.join(self.paths_and_table.get_tmp_formal_path(), str(date_str),
+                                  str(origin_wind_name) + '.csv')
+            create_file_path(save_path, is_file_path=True)
+            now_df = df[df[type_col] == date_str][cols]
+            if self.paths_and_table.save_zip:
+                save_path = save_path + '.gz'
+                now_df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8')
+            else:
+                now_df.to_csv(save_path, index=False, encoding='utf-8')
+
+            del now_df
 
 
         self.set_statistics_data(df)
         self.set_statistics_data(df)
 
 
@@ -166,8 +174,8 @@ class StatisticsAndSaveFile(object):
             for index, arr in enumerate(all_arrays):
             for index, arr in enumerate(all_arrays):
                 with multiprocessing.Pool(split_count) as pool:
                 with multiprocessing.Pool(split_count) as pool:
                     pool.starmap(self.save_to_csv, [(i,) for i in arr])
                     pool.starmap(self.save_to_csv, [(i,) for i in arr])
-                update_trans_transfer_progress(self.paths_and_table.batch_no, self.paths_and_table.read_type,
-                                               round(50 + 20 * (index + 1) / len(all_arrays), 2),
+                update_trans_transfer_progress(self.paths_and_table.id, self.paths_and_table.read_type,
+                                               round(50 + 15 * (index + 1) / len(all_arrays), 2),
                                                self.paths_and_table.save_db)
                                                self.paths_and_table.save_db)
 
 
         except Exception as e:
         except Exception as e:
@@ -177,5 +185,5 @@ class StatisticsAndSaveFile(object):
 
 
     def run(self):
     def run(self):
         self.mutiprocessing_to_save_file()
         self.mutiprocessing_to_save_file()
-        update_trans_transfer_progress(self.paths_and_table.batch_no, self.paths_and_table.read_type, 70,
+        update_trans_transfer_progress(self.paths_and_table.id, self.paths_and_table.read_type, 65,
                                        self.paths_and_table.save_db)
                                        self.paths_and_table.save_db)

+ 79 - 48
etl/wind_power/wave/WaveTrans.py

@@ -1,9 +1,12 @@
+import datetime
 import json
 import json
 import multiprocessing
 import multiprocessing
 
 
 from service.plt_service import get_all_wind
 from service.plt_service import get_all_wind
 from service.trans_service import get_wave_conf, save_df_to_db, get_or_create_wave_table, \
 from service.trans_service import get_wave_conf, save_df_to_db, get_or_create_wave_table, \
     get_wave_data, delete_exist_wave_data
     get_wave_data, delete_exist_wave_data
+from service.trans_conf_service import update_trans_status_running, update_trans_transfer_progress, \
+    update_trans_status_success
 from utils.file.trans_methods import *
 from utils.file.trans_methods import *
 from utils.log.trans_log import set_trance_id
 from utils.log.trans_log import set_trance_id
 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
 from utils.systeminfo.sysinfo import get_available_cpu_count_with_percent
@@ -13,46 +16,53 @@ exec("from os.path import *")
 
 
 class WaveTrans(object):
 class WaveTrans(object):
 
 
-    def __init__(self, field_code, read_path, save_path: str):
-        self.field_code = field_code
-        self.read_path = read_path
-        self.save_path = save_path
+    def __init__(self, id, wind_farm_code, read_dir):
+        self.id = id
+        self.wind_farm_code = wind_farm_code
+        self.read_dir = read_dir
         self.begin = datetime.datetime.now()
         self.begin = datetime.datetime.now()
 
 
-    def get_data_exec(self, func_code, arg):
+        self.engine_count = 0
+        self.min_date = None
+        self.max_date = None
+        self.data_count = 0
+
+    def get_data_exec(self, func_code, filepath, measupoint_names: set):
         exec(func_code)
         exec(func_code)
-        return locals()['get_data'](arg)
+        return locals()['get_data'](filepath, measupoint_names)
 
 
     def del_exists_data(self, df):
     def del_exists_data(self, df):
         min_date, max_date = df['time_stamp'].min(), df['time_stamp'].max()
         min_date, max_date = df['time_stamp'].min(), df['time_stamp'].max()
-        db_df = get_wave_data(self.field_code + '_wave', min_date, max_date)
+        db_df = get_wave_data(self.wind_farm_code + '_wave', min_date, max_date)
 
 
         exists_df = pd.merge(db_df, df,
         exists_df = pd.merge(db_df, df,
                              on=['wind_turbine_name', 'time_stamp', 'sampling_frequency', 'mesure_point_name'],
                              on=['wind_turbine_name', 'time_stamp', 'sampling_frequency', 'mesure_point_name'],
                              how='inner')
                              how='inner')
         ids = [int(i) for i in exists_df['id'].to_list()]
         ids = [int(i) for i in exists_df['id'].to_list()]
         if ids:
         if ids:
-            delete_exist_wave_data(self.field_code + "_wave", ids)
+            delete_exist_wave_data(self.wind_farm_code + "_wave", ids)
 
 
     def run(self):
     def run(self):
-        trance_id = '-'.join([self.field_code, 'wave'])
+        update_trans_status_running(self.id)
+        trance_id = '-'.join([self.wind_farm_code, 'wave'])
         set_trance_id(trance_id)
         set_trance_id(trance_id)
-        all_files = read_files(self.read_path, ['csv'])
-        print(len)
+        all_files = read_files(self.read_dir, ['txt'])
+        self.data_count = len(all_files)
+        update_trans_transfer_progress(self.id, 5)
         # 最大取系统cpu的 1/2
         # 最大取系统cpu的 1/2
         split_count = get_available_cpu_count_with_percent(1 / 2)
         split_count = get_available_cpu_count_with_percent(1 / 2)
 
 
-        all_wind, _ = get_all_wind(self.field_code, False)
+        all_wind, _ = get_all_wind(self.wind_farm_code, False)
 
 
-        get_or_create_wave_table(self.field_code + '_wave')
+        get_or_create_wave_table(self.wind_farm_code + '_wave')
 
 
-        wave_conf = get_wave_conf(self.field_code)
+        wave_conf = get_wave_conf(self.wind_farm_code)
 
 
         base_param_exec = wave_conf['base_param_exec']
         base_param_exec = wave_conf['base_param_exec']
         map_dict = {}
         map_dict = {}
         if base_param_exec:
         if base_param_exec:
             base_param_exec = base_param_exec.replace('\r\n', '\n').replace('\t', '    ')
             base_param_exec = base_param_exec.replace('\r\n', '\n').replace('\t', '    ')
-            print(base_param_exec)
+            trans_print(base_param_exec)
             if 'import ' in base_param_exec:
             if 'import ' in base_param_exec:
                 raise Exception("方法不支持import方法")
                 raise Exception("方法不支持import方法")
 
 
@@ -60,36 +70,57 @@ class WaveTrans(object):
         for point in mesure_poins:
         for point in mesure_poins:
             map_dict[wave_conf[point]] = point.replace('conf_', '')
             map_dict[wave_conf[point]] = point.replace('conf_', '')
 
 
-        with multiprocessing.Pool(split_count) as pool:
-            file_datas = pool.starmap(self.get_data_exec, [(base_param_exec, i) for i in all_files])
-
-        print("读取文件耗时:", datetime.datetime.now() - self.begin)
-
-        result_list = list()
-        for file_data in file_datas:
-            wind_turbine_name, time_stamp, sampling_frequency, rotational_speed, mesure_point_name, mesure_data = \
-                file_data[0], file_data[1], file_data[2], file_data[3], file_data[4], file_data[5]
-
-            if mesure_point_name in map_dict.keys():
-                result_list.append(
-                    [wind_turbine_name, time_stamp, rotational_speed, sampling_frequency, mesure_point_name,
-                     mesure_data])
-
-        df = pd.DataFrame(result_list,
-                          columns=['wind_turbine_name', 'time_stamp', 'rotational_speed', 'sampling_frequency',
-                                   'mesure_point_name', 'mesure_data'])
-        df['time_stamp'] = pd.to_datetime(df['time_stamp'], errors='coerce')
-        df['mesure_point_name'] = df['mesure_point_name'].map(map_dict)
-        df.dropna(subset=['mesure_point_name'], inplace=True)
-
-        df['wind_turbine_number'] = df['wind_turbine_name'].map(all_wind).fillna(df['wind_turbine_name'])
-
-        df['mesure_data'] = df['mesure_data'].apply(lambda x: json.dumps(x))
-
-        df.sort_values(by=['time_stamp', 'mesure_point_name'], inplace=True)
-
-        self.del_exists_data(df)
-
-        save_df_to_db(self.field_code + '_wave', df, batch_count=1000)
-
-        print("总耗时:", datetime.datetime.now() - self.begin)
+        wind_turbine_name_set = set()
+
+        all_array = split_array(all_files, split_count * 10)
+        total_index = len(all_array)
+        for index, now_array in enumerate(all_array):
+            index_begin = datetime.datetime.now()
+            with multiprocessing.Pool(split_count) as pool:
+                file_datas = pool.starmap(self.get_data_exec,
+                                          [(base_param_exec, i, list(map_dict.keys())) for i in now_array])
+
+            update_trans_transfer_progress(self.id, 20 + int(index / total_index * 60))
+            trans_print("读取文件耗时:", datetime.datetime.now() - self.begin)
+
+            result_list = list()
+            for file_data in file_datas:
+                if file_data:
+                    wind_turbine_name, time_stamp, sampling_frequency, rotational_speed, mesure_point_name, type, mesure_data = \
+                        file_data[0], file_data[1], file_data[2], file_data[3], file_data[4], file_data[5], file_data[6]
+
+                    if mesure_point_name in map_dict.keys():
+                        wind_turbine_name_set.add(wind_turbine_name)
+                        if self.min_date is None or self.min_date > time_stamp:
+                            self.min_date = time_stamp
+                        if self.max_date is None or self.max_date < time_stamp:
+                            self.max_date = time_stamp
+
+                        result_list.append(
+                            [wind_turbine_name, time_stamp, rotational_speed, sampling_frequency, mesure_point_name,
+                             type,
+                             mesure_data])
+
+            if result_list:
+                df = pd.DataFrame(result_list,
+                                  columns=['wind_turbine_name', 'time_stamp', 'rotational_speed', 'sampling_frequency',
+                                           'mesure_point_name', 'type', 'mesure_data'])
+                df['time_stamp'] = pd.to_datetime(df['time_stamp'], errors='coerce')
+                df['mesure_point_name'] = df['mesure_point_name'].map(map_dict)
+                df.dropna(subset=['mesure_point_name'], inplace=True)
+
+                df['wind_turbine_number'] = df['wind_turbine_name'].map(all_wind).fillna(df['wind_turbine_name'])
+
+                df['mesure_data'] = df['mesure_data'].apply(lambda x: json.dumps(x))
+
+                df.sort_values(by=['time_stamp', 'mesure_point_name'], inplace=True)
+                # self.del_exists_data(df)
+                save_df_to_db(self.wind_farm_code + '_wave', df, batch_count=400)
+            trans_print(f"总共{total_index}组,当前{index + 1}", "本次写入耗时:", datetime.datetime.now() - index_begin,
+                        "总耗时:", datetime.datetime.now() - self.begin)
+
+        update_trans_status_success(self.id, 'wave', len(wind_turbine_name_set), None,
+                                    self.min_date, self.max_date, self.data_count)
+
+        # update_trans_status_success(self.id)
+        trans_print("总耗时:", datetime.datetime.now() - self.begin)

+ 0 - 3
package.sh

@@ -1,3 +0,0 @@
-pyinstaller --clean -F -n etl_tool app_run.py
-
-#python -m nuitka --onefile --remove-output app_run.py

+ 249 - 10
requirements.txt

@@ -1,29 +1,268 @@
+aiofiles==22.1.0
+aiohttp==3.9.5
+aiosignal==1.3.1
+alabaster==0.7.16
+aliyun-python-sdk-core==2.15.1
+aliyun-python-sdk-kms==2.16.3
+aniso8601==9.0.1
+anyio==4.6.0
+APScheduler==3.10.4
+argcomplete==1.10.3
+arrow==1.3.0
+asgiref==3.8.1
+astroid==3.2.2
+asttokens==2.4.1
+async-timeout==4.0.3
+asyncio==3.4.3
+atomicwrites==1.4.1
+attrs==23.2.0
+autopep8==2.0.4
+Babel==2.15.0
+backcall==0.2.0
+backports-datetime-fromisoformat==2.0.1
+backports.tarfile==1.2.0
+bcrypt==4.1.3
+beautifulsoup4==4.8.2
+binaryornot==0.4.4
+black==24.4.2
+bleach==6.1.0
+blinker==1.8.2
+cache==1.0.3
+cachelib==0.9.0
+certifi==2024.6.2
+cffi==1.16.0
 chardet==5.2.0
 chardet==5.2.0
-contourpy==1.3.0
+charset-normalizer==3.3.2
+click==8.1.7
+cloudpickle==3.0.0
+colorama==0.4.6
+comm==0.2.2
+compressed_rtf==1.0.6
+contourpy==1.2.1
+cookiecutter==2.6.0
+crcmod==1.7
+cryptography==41.0.2
 cycler==0.12.1
 cycler==0.12.1
 DBUtils==3.1.0
 DBUtils==3.1.0
+debugpy==1.8.2
+decorator==5.1.1
+defusedxml==0.7.1
+diff-match-patch==20230430
+dill==0.3.8
+distro==1.9.0
+Django==4.1.13
+docopt==0.6.2
+docstring-to-markdown==0.15
+docutils==0.21.2
+docx2txt==0.8
+ebcdic==1.1.1
 et-xmlfile==1.1.0
 et-xmlfile==1.1.0
-fonttools==4.53.1
+exceptiongroup==1.2.1
+executing==2.0.1
+extract-msg==0.28.7
+fastapi==0.115.0
+fastapi-offline==1.7.3
+fastjsonschema==2.20.0
+flake8==7.1.0
+Flask==3.0.3
+Flask-APScheduler==1.13.1
+Flask-Caching==2.3.0
+Flask-Cors==4.0.1
+Flask-Excel==0.0.7
+Flask-Executor==1.0.0
+Flask-HTTPAuth==4.8.0
+Flask-Login==0.6.3
+flask-restx==1.3.0
+Flask-Script==2.0.6
+flask-siwadoc==0.2.2
+Flask-SQLAlchemy==3.1.1
+Flask-WTF==1.2.1
+fonttools==4.53.0
+frozenlist==1.4.1
+fsspec==2024.12.0
 greenlet==3.0.3
 greenlet==3.0.3
-importlib_resources==6.4.5
-kiwisolver==1.4.7
-matplotlib==3.9.2
+h11==0.14.0
+idna==3.7
+imagesize==1.4.1
+IMAPClient==2.1.0
+importlib_metadata==8.0.0
+importlib_resources==6.4.0
+inflection==0.5.1
+iniconfig==2.0.0
+intervaltree==3.1.0
+ipykernel==6.29.4
+ipython==8.12.3
+isort==5.13.2
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jedi==0.19.1
+jellyfish==1.0.4
+Jinja2==3.1.4
+jmespath==0.10.0
+joblib==1.4.2
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+jupyterlab_pygments==0.3.0
+keyring==25.2.1
+kiwisolver==1.4.5
+lml==0.1.0
+loguru==0.7.2
+lxml==5.2.2
+m3u8==5.1.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.0
+matplotlib-inline==0.1.7
+mccabe==0.7.0
+mdurl==0.1.2
+mistune==3.0.2
+more-itertools==10.3.0
+multidict==6.0.5
+mypy-extensions==1.0.0
+mysqlclient==2.2.4
+nbclient==0.10.0
+nbconvert==7.16.4
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.2.1
 numpy==2.0.0
 numpy==2.0.0
-openpyxl==3.1.5
+numpydoc==1.7.0
+olefile==0.47
+openpyxl==3.1.4
+oss2==2.18.6
 packaging==24.1
 packaging==24.1
 pandas==2.2.2
 pandas==2.2.2
-pillow==10.4.0
+pandocfilters==1.5.1
+paramiko==3.4.0
+parso==0.8.4
+pathspec==0.12.1
+pdfminer==20191125
+pdfminer.six==20191110
+pdfminer3k==1.3.4
+peewee==3.17.5
+pexpect==4.9.0
+pickleshare==0.7.5
+pillow==10.3.0
+pipreqs==0.5.0
+platformdirs==4.2.2
+pluggy==1.5.0
+ply==3.11
+prompt_toolkit==3.0.47
 psutil==6.0.0
 psutil==6.0.0
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==18.1.0
+pycodestyle==2.11.1
+pycparser==2.22
+pycryptodome==3.20.0
+pydantic==1.9.0
+pydocstyle==6.3.0
+pyexcel==0.7.0
+pyexcel-io==0.6.6
+pyexcel-webio==0.1.4
+pyflakes==3.2.0
+Pygments==2.18.0
+PyJWT==2.8.0
+pylint==3.2.3
+pylint-venv==3.0.3
+pyls-spyder==0.4.0
 PyMySQL==1.1.0
 PyMySQL==1.1.0
-pyparsing==3.1.4
+PyNaCl==1.5.0
+pyparsing==3.1.2
+PyPDF2==3.0.1
+pypdfium2==4.30.0
+pyperclip==1.9.0
+PyQt-SiliconUI==1.0.1
+PyQt5==5.15.10
+PyQt5-Qt5==5.15.2
+PyQt5-sip==12.13.0
+PyQtWebEngine==5.15.6
+PyQtWebEngine-Qt5==5.15.2
+pytest==8.3.2
 python-calamine==0.2.3
 python-calamine==0.2.3
 python-dateutil==2.9.0.post0
 python-dateutil==2.9.0.post0
+python-lsp-black==2.0.0
+python-lsp-jsonrpc==1.1.2
+python-lsp-server==1.11.0
+python-pptx==0.6.23
+python-slugify==8.0.4
+pytoolconfig==1.3.1
 pytz==2024.1
 pytz==2024.1
+pywin32==306
+pywin32-ctypes==0.2.2
+pyxxl==0.3.6
 PyYAML==6.0.1
 PyYAML==6.0.1
+pyzmq==26.0.3
+QDarkStyle==3.2.3
+qstylizer==0.2.3
+QtAwesome==1.3.1
+qtconsole==5.5.2
+QtPy==2.4.1
 rarfile==4.2
 rarfile==4.2
-six==1.16.0
+redis==5.0.7
+referencing==0.35.1
+requests==2.32.3
+rich==13.7.1
+rope==1.13.0
+rpds-py==0.18.1
+Rtree==1.2.0
+scikit-learn==1.5.1
+scipy==1.13.1
+six==1.12.0
+sniffio==1.3.1
+snowballstemmer==2.2.0
+sortedcontainers==2.4.0
+soupsieve==2.5
+SpeechRecognition==3.8.1
+Sphinx==7.3.7
+sphinxcontrib-applehelp==1.0.8
+sphinxcontrib-devhelp==1.0.6
+sphinxcontrib-htmlhelp==2.0.5
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.7
+sphinxcontrib-serializinghtml==1.1.10
+spyder==5.5.5
+spyder-kernels==2.5.2
 SQLAlchemy==2.0.30
 SQLAlchemy==2.0.30
+sqlparse==0.5.0
+stack-data==0.6.3
+starlette==0.38.6
+tabula-py==2.9.3
+tabulate==0.9.0
+text-unidecode==1.3
+textdistance==4.6.2
+textract==1.6.5
+texttable==1.7.0
+threadpoolctl==3.5.0
+three-merge==0.1.1
+tinycss2==1.3.0
+tomli==2.0.1
+tomlkit==0.12.5
+tornado==6.4.1
+traitlets==5.14.3
+types-python-dateutil==2.9.0.20240316
 typing_extensions==4.12.2
 typing_extensions==4.12.2
 tzdata==2024.1
 tzdata==2024.1
+tzlocal==5.2
+ufile==3.2.9
+ujson==5.10.0
+urllib3==2.2.2
+uvicorn==0.32.1
+watchdog==4.0.1
+wcwidth==0.2.13
+web.py==0.40.dev1
+webencodings==0.5.1
+Werkzeug==3.0.3
+whatthepatch==1.0.5
+win32-setctime==1.1.0
+WTForms==3.1.2
 xlrd==2.0.1
 xlrd==2.0.1
-zipp==3.20.1
+XlsxWriter==3.2.0
+yapf==0.40.2
+yarg==0.1.9
+yarl==1.9.4
+zipp==3.19.2

+ 0 - 145
service/plt_service.py

@@ -6,139 +6,6 @@ import datetime
 from service.common_connect import plt
 from service.common_connect import plt
 
 
 
 
-def update_timeout_trans_data():
-    sql = """
-    UPDATE data_transfer  
-    SET trans_sys_status = 2,err_info='运行超时失败',transfer_state=2
-    WHERE   
-        (  
-            (transfer_type = 'second' AND TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 24)  
-            OR  
-            (transfer_type = 'minute' AND TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 6)  
-            OR  
-            (transfer_type = 'warn' AND TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 6)  
-            OR  
-            (transfer_type = 'fault' AND TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 6)  
-        )  
-        AND trans_sys_status = 0
-    """
-    plt.execute(sql)
-
-
-def update_trans_status_running(batch_no, trans_type, schedule_exec=True):
-    if schedule_exec:
-        exec_sql = """
-        update data_transfer set transfer_state = 0,trans_sys_status = 0 ,transfer_start_time = now(),err_info='',
-        engine_count =0,time_granularity=0,transfer_finish_time=null,
-        data_min_time= null,data_max_time= null,transfer_data_count=null
-        where batch_code = %s  and transfer_type = %s
-        """
-        plt.execute(exec_sql, (batch_no, trans_type))
-
-
-def update_trans_status_error(batch_no, trans_type, message="", save_db=True):
-    if save_db:
-        exec_sql = """
-        update data_transfer set transfer_state = 2,trans_sys_status=2 ,err_info= %s,transfer_finish_time=now() 
-        where batch_code = %s  and  transfer_type = %s
-        """
-
-        message = message if len(message) <= 200 else message[0:200]
-        plt.execute(exec_sql, (message, batch_no, trans_type))
-
-
-def update_trans_status_success(batch_no, trans_type, wind_count=0, time_granularity=0,
-                                min_date=datetime.datetime.now(),
-                                max_date=datetime.datetime.now(),
-                                total_count=0, save_db=True):
-    if save_db:
-        if min_date is not None:
-            exec_sql = """
-            update data_transfer set transfer_state = 1,trans_sys_status = 1,transfer_progress=100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now(),
-            data_min_time= %s,data_max_time= %s,transfer_data_count=%s
-            where batch_code = %s  and transfer_type = %s
-            """
-            plt.execute(exec_sql, (wind_count, time_granularity, min_date, max_date, total_count, batch_no, trans_type))
-        else:
-            exec_sql = """
-            update data_transfer set transfer_state = 1,trans_sys_status = 1,transfer_progress = 100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now()
-            where batch_code = %s  and transfer_type = %s
-            """
-            plt.execute(exec_sql, (wind_count, time_granularity, batch_no, trans_type))
-
-
-def update_trans_transfer_progress(batch_no, trans_type, transfer_progress=0, save_db=True):
-    if save_db:
-        exec_sql = """
-        update data_transfer set transfer_progress =%s where batch_code = %s  and transfer_type = %s
-        """
-        plt.execute(exec_sql, (int(transfer_progress), batch_no, trans_type))
-
-
-# 获取执行的数据
-def get_batch_exec_data(run_count: int = 1) -> dict:
-    query_running_sql = "select count(1) as count from data_transfer where trans_sys_status = 0"
-    query_next_exec_sql = """
-    SELECT
-        t.*,a.field_name,b.batch_name
-    FROM
-        data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
-        inner join wind_field_batch b on t.batch_code = b.batch_code
-    WHERE
-         t.trans_sys_status in (-1,1,2) and t.transfer_state = 0
-    AND t.transfer_addr != ''
-    ORDER BY
-        t.update_time
-    LIMIT 1
-    """
-    data = plt.execute(query_running_sql)
-    now_count = int(data[0]['count'])
-    if now_count >= run_count:
-        return None
-    else:
-        data = plt.execute(query_next_exec_sql)
-        if type(data) == tuple:
-            return {}
-        return data[0]
-
-
-def get_data_by_batch_no_and_type(batch_no, transfer_type):
-    query_exec_sql = f"""
-    SELECT
-        t.*,a.field_name,b.batch_name
-    FROM
-        data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
-        inner join wind_field_batch b on t.batch_code = b.batch_code
-    WHERE
-         t.trans_sys_status in (-1,1,2) and t.transfer_state = 2 and t.batch_code = '{batch_no}' and t.transfer_type = '{transfer_type}'
-    AND t.transfer_addr != ''
-    """
-
-    data = plt.execute(query_exec_sql)
-    if type(data) == tuple:
-        return None
-    return data[0]
-
-
-## 合并多个batch_使用
-def get_hebing_data_by_batch_no_and_type(batch_no, transfer_type):
-    query_exec_sql = f"""
-    SELECT
-        t.*,a.field_name,b.batch_name
-    FROM
-        data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
-        inner join wind_field_batch b on t.batch_code = b.batch_code
-    WHERE
-         t.trans_sys_status = 1 and t.transfer_state = 1 and t.batch_code = '{batch_no}' and t.transfer_type = '{transfer_type}'
-    AND t.transfer_addr != ''
-    """
-
-    data = plt.execute(query_exec_sql)
-    if type(data) == tuple:
-        return None
-    return data[0]
-
-
 def get_all_wind(field_code, need_rated_param=True):
 def get_all_wind(field_code, need_rated_param=True):
     query_sql = """
     query_sql = """
     SELECT t.engine_code,t.engine_name,t.rated_capacity,a.rated_cut_out_windspeed 
     SELECT t.engine_code,t.engine_name,t.rated_capacity,a.rated_cut_out_windspeed 
@@ -173,15 +40,3 @@ def get_base_wind_and_power(wind_turbine_number):
         return None
         return None
     return dict_datas
     return dict_datas
 
 
-
-if __name__ == '__main__':
-    # print(get_batch_exec_data(run_count=1))
-    #
-    # print("**********************")
-    # print(get_batch_exec_data(run_count=2))
-    # print("**********************")
-    print(get_data_by_batch_no_and_type("test_", "second"))
-    # print(update_trans_status_success("test_唐龙-定时任务测试", "second", 10))
-    begin = datetime.datetime.now()
-
-    print(get_all_wind('WOF034900024'))

+ 145 - 0
service/trans_conf_service.py

@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2025/1/9
+# @Author  : 魏志亮
+from datetime import datetime
+
+from service.common_connect import trans
+
+
+def update_timeout_trans_data():
+    sql = """
+    UPDATE data_transfer  
+    SET trans_sys_status = 2,err_info='运行超时失败',transfer_status=2
+    WHERE   
+        TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 24 
+        AND trans_sys_status = 0
+    """
+    trans.execute(sql)
+
+
+def update_trans_status_running(id, trans_type, save_db=True):
+    if save_db:
+        exec_sql = """
+        update data_transfer set transfer_status = 0,trans_sys_status = 0 ,transfer_start_time = now(),err_info='',
+        engine_count =0,time_granularity=0,transfer_finish_time=null,transfer_progress=0,
+        data_min_time= null,data_max_time= null,transfer_data_count=null
+        where id = %s  and transfer_type = %s
+        """
+        trans.execute(exec_sql, (id, trans_type))
+
+
+def update_archive_success(id, trans_type, archive_path, save_db=True):
+    if save_db:
+        exec_sql = """
+        update data_transfer set transfer_progress=70,archive_path = %s
+        where id = %s  and transfer_type = %s
+        """
+        trans.execute(exec_sql, (archive_path, id, trans_type))
+
+
+def update_trans_status_error(id, trans_type, message="", save_db=True):
+    if save_db:
+        exec_sql = """
+        update data_transfer set transfer_status = 2,trans_sys_status=2 ,err_info= %s,transfer_finish_time=now() 
+        where id = %s  and  transfer_type = %s
+        """
+
+        message = message if len(message) <= 200 else message[0:200]
+        trans.execute(exec_sql, (message, id, trans_type))
+
+
+def update_trans_status_success(id, trans_type, wind_count=0, time_granularity=0,
+                                min_date=datetime.now(),
+                                max_date=datetime.now(),
+                                total_count=0, save_db=True):
+    if save_db:
+        if min_date is not None:
+            exec_sql = """
+            update data_transfer set transfer_status = 1,trans_sys_status = 1,transfer_progress=100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now(),
+            data_min_time= %s,data_max_time= %s,transfer_data_count=%s
+            where id = %s  and transfer_type = %s
+            """
+            trans.execute(exec_sql, (wind_count, time_granularity, min_date, max_date, total_count, id, trans_type))
+        else:
+            exec_sql = """
+            update data_transfer set transfer_status = 1,trans_sys_status = 1,transfer_progress = 100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now()
+            where id = %s  and transfer_type = %s
+            """
+            trans.execute(exec_sql, (wind_count, time_granularity, id, trans_type))
+
+
+def update_trans_transfer_progress(id, trans_type, transfer_progress=0, save_db=True):
+    print(id, trans_type, transfer_progress)
+    if save_db:
+        exec_sql = """
+        update data_transfer set transfer_progress =%s where id = %s  and transfer_type = %s
+        """
+        trans.execute(exec_sql, (int(transfer_progress), id, trans_type))
+
+
+def get_now_running_count():
+    query_running_sql = """
+    select count(1) as count from data_transfer where trans_sys_status = 0
+    """
+    data = trans.execute(query_running_sql)
+    now_count = int(data[0]['count'])
+    return now_count
+
+
+# 获取执行的数据
+def get_batch_exec_data() -> dict:
+    query_next_exec_sql = """
+    SELECT
+        *
+    FROM
+        data_transfer t 
+    WHERE
+         t.trans_sys_status in (-1,1,2) and t.transfer_status = -1
+    AND t.read_dir != ''
+    ORDER BY
+        t.update_time
+    LIMIT 1
+    """
+    data = trans.execute(query_next_exec_sql)
+    if type(data) == tuple:
+        return None
+    return data[0]
+
+
+def get_data_by_id(id):
+    query_exec_sql = f"""
+    SELECT
+        t.*,a.field_name,b.batch_name
+    FROM
+        data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
+        inner join wind_field_batch b on t.id = b.id
+    WHERE
+         t.trans_sys_status in (-1,1,2) and t.transfer_status = 2 and t.id = '{id}'
+    AND t.read_dir != ''
+    """
+
+    data = trans.execute(query_exec_sql)
+    if type(data) == tuple:
+        return None
+    return data[0]
+
+def create_wave_table(table_name, save_db=True):
+    if save_db:
+        exec_sql = f"""
+        CREATE TABLE `{table_name}` (
+          `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
+          `wind_turbine_number` varchar(20) DEFAULT NULL COMMENT '风机编号',
+          `wind_turbine_name` varchar(20) DEFAULT NULL COMMENT '原始风机编号',
+          `time_stamp` datetime DEFAULT NULL COMMENT '时间',
+          `rotational_speed` float DEFAULT NULL COMMENT '转速',
+          `sampling_frequency` varchar(50) DEFAULT NULL COMMENT '采样频率',
+          `mesure_point_name` varchar(100) DEFAULT NULL COMMENT '测点名称',
+          `type` int(11) DEFAULT '-1' COMMENT '-1:不存在 0:角度 1:速度 2:加速度 3:位移,默认 -1',
+          `mesure_data` longtext COMMENT '测点数据',
+          PRIMARY KEY (`id`),
+          KEY `wind_turbine_number` (`wind_turbine_number`),
+          KEY `time_stamp` (`time_stamp`),
+          KEY `mesure_point_name` (`mesure_point_name`)
+        ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4
+        """
+        trans.execute(exec_sql)

+ 137 - 128
service/trans_service.py

@@ -6,6 +6,7 @@ from os import *
 
 
 import pandas as pd
 import pandas as pd
 
 
+from service.trans_conf_service import create_wave_table
 from utils.file.trans_methods import split_array
 from utils.file.trans_methods import split_array
 from utils.log.trans_log import trans_print
 from utils.log.trans_log import trans_print
 from service.common_connect import trans
 from service.common_connect import trans
@@ -57,106 +58,141 @@ def get_wave_conf(field_code) -> dict:
     return res[0]
     return res[0]
 
 
 
 
-def creat_min_sec_table(table_name, win_names, read_type):
-    create_sql = f"""
-    CREATE TABLE
-    IF NOT EXISTS `{table_name}` (
-        `wind_turbine_number` VARCHAR (20) DEFAULT NULL COMMENT '风机编号',
-        `wind_turbine_name` VARCHAR(20) DEFAULT NULL COMMENT '风机原始名称',
-        `time_stamp` datetime NOT NULL COMMENT '时间戳',
-        `active_power` DOUBLE DEFAULT NULL COMMENT '有功功率',
-        `rotor_speed` DOUBLE DEFAULT NULL COMMENT '风轮转速',
-        `generator_speed` DOUBLE DEFAULT NULL COMMENT '发电机转速',
-        `wind_velocity` DOUBLE DEFAULT NULL COMMENT '风速',
-        `pitch_angle_blade_1` DOUBLE DEFAULT NULL COMMENT '桨距角1',
-        `pitch_angle_blade_2` DOUBLE DEFAULT NULL COMMENT '桨距角2',
-        `pitch_angle_blade_3` DOUBLE DEFAULT NULL COMMENT '桨距角3',
-        `cabin_position` DOUBLE DEFAULT NULL COMMENT '机舱位置',
-        `true_wind_direction` DOUBLE DEFAULT NULL COMMENT '绝对风向',
-        `yaw_error1` DOUBLE DEFAULT NULL COMMENT '对风角度',
-        `set_value_of_active_power` DOUBLE DEFAULT NULL COMMENT '有功功率设定值',
-        `gearbox_oil_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱油温',
-        `generatordrive_end_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '发电机驱动端轴承温度',
-        `generatornon_drive_end_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '发电机非驱动端轴承温度',
-        `cabin_temperature` DOUBLE DEFAULT NULL COMMENT '机舱内温度',
-        `twisted_cable_angle` DOUBLE DEFAULT NULL COMMENT '扭缆角度',
-        `front_back_vibration_of_the_cabin` DOUBLE DEFAULT NULL COMMENT '机舱前后振动',
-        `side_to_side_vibration_of_the_cabin` DOUBLE DEFAULT NULL COMMENT '机舱左右振动',
-        `actual_torque` DOUBLE DEFAULT NULL COMMENT '实际力矩',
-        `given_torque` DOUBLE DEFAULT NULL COMMENT '给定力矩',
-        `clockwise_yaw_count` DOUBLE DEFAULT NULL COMMENT '顺时针偏航次数',
-        `counterclockwise_yaw_count` DOUBLE DEFAULT NULL COMMENT '逆时针偏航次数',
-        `unusable` DOUBLE DEFAULT NULL COMMENT '不可利用',
-        `power_curve_available` DOUBLE DEFAULT NULL COMMENT '功率曲线可用',
-        `required_gearbox_speed` DOUBLE DEFAULT NULL COMMENT '齿轮箱转速',
-        `inverter_speed_master_control` DOUBLE DEFAULT NULL COMMENT '变频器转速(主控)',
-        `outside_cabin_temperature` DOUBLE DEFAULT NULL COMMENT '环境温度',
-        `main_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '主轴承轴承温度',
-        `gearbox_high_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱高速轴轴承温度',
-        `gearboxmedium_speed_shaftbearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱中速轴轴承温度',
-        `gearbox_low_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱低速轴轴承温度',
-        `generator_winding1_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组1温度',
-        `generator_winding2_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组2温度',
-        `generator_winding3_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组3温度',
-        `wind_turbine_status` DOUBLE DEFAULT NULL COMMENT '风机状态1',
-        `wind_turbine_status2` DOUBLE DEFAULT NULL COMMENT '风机状态2',
-        `turbulence_intensity` DOUBLE DEFAULT NULL COMMENT '湍流强度',
-        `lab` int DEFAULT NULL COMMENT '-1:停机 0:好点  1:欠发功率点;2:超发功率点;3:额定风速以上的超发功率点 4: 限电',
-        `year` INT (4) DEFAULT NULL COMMENT '年',
-        `month` INT (2) DEFAULT NULL COMMENT '月',
-        `day` INT (2) DEFAULT NULL COMMENT '日',
-        `param1` DOUBLE DEFAULT NULL COMMENT '预留1',
-        `param2` DOUBLE DEFAULT NULL COMMENT '预留2',
-        `param3` DOUBLE DEFAULT NULL COMMENT '预留3',
-        `param4` DOUBLE DEFAULT NULL COMMENT '预留4',
-        `param5` DOUBLE DEFAULT NULL COMMENT '预留5',
-        `param6` VARCHAR (20) DEFAULT NULL COMMENT '预留6',
-        `param7` VARCHAR (20) DEFAULT NULL COMMENT '预留7',
-        `param8` VARCHAR (20) DEFAULT NULL COMMENT '预留8',
-        `param9` VARCHAR (20) DEFAULT NULL COMMENT '预留9',
-        `param10` VARCHAR (20) DEFAULT NULL COMMENT '预留10',
-         KEY `time_stamp` (`time_stamp`),
-         KEY `wind_turbine_number` (`wind_turbine_number`)
-    ) ENGINE = myisam DEFAULT CHARSET = utf8mb4
+def creat_min_sec_table(table_name, trans_type):
+    exists_table_sql = f"""
+    select count(1) as count from information_schema.tables where table_schema = '{trans.database}' and table_name = '{table_name}'
     """
     """
+    count = trans.execute(exists_table_sql)[0]['count']
+    if count > 0:
+        trans_print(f"{table_name}已存在")
+
+    if trans_type == 'second':
+        add_key = 'KEY `year_month` (`year_month`)'
+        key = '`year_month`'
+    else:
+        add_key = 'KEY `year` (`year`)'
+        key = '`year`'
+
+    if count == 0:
+        create_sql = f"""
+        CREATE TABLE
+        IF NOT EXISTS `{table_name}` (
+            `wind_turbine_number` VARCHAR (20) DEFAULT NULL COMMENT '风机编号',
+            `wind_turbine_name` VARCHAR(20) DEFAULT NULL COMMENT '风机原始名称',
+            `time_stamp` datetime NOT NULL COMMENT '时间戳',
+            `active_power` DOUBLE DEFAULT NULL COMMENT '有功功率',
+            `rotor_speed` DOUBLE DEFAULT NULL COMMENT '风轮转速',
+            `generator_speed` DOUBLE DEFAULT NULL COMMENT '发电机转速',
+            `wind_velocity` DOUBLE DEFAULT NULL COMMENT '风速',
+            `pitch_angle_blade_1` DOUBLE DEFAULT NULL COMMENT '桨距角1',
+            `pitch_angle_blade_2` DOUBLE DEFAULT NULL COMMENT '桨距角2',
+            `pitch_angle_blade_3` DOUBLE DEFAULT NULL COMMENT '桨距角3',
+            `cabin_position` DOUBLE DEFAULT NULL COMMENT '机舱位置',
+            `true_wind_direction` DOUBLE DEFAULT NULL COMMENT '绝对风向',
+            `yaw_error1` DOUBLE DEFAULT NULL COMMENT '对风角度',
+            `set_value_of_active_power` DOUBLE DEFAULT NULL COMMENT '有功功率设定值',
+            `gearbox_oil_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱油温',
+            `generatordrive_end_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '发电机驱动端轴承温度',
+            `generatornon_drive_end_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '发电机非驱动端轴承温度',
+            `cabin_temperature` DOUBLE DEFAULT NULL COMMENT '机舱内温度',
+            `twisted_cable_angle` DOUBLE DEFAULT NULL COMMENT '扭缆角度',
+            `front_back_vibration_of_the_cabin` DOUBLE DEFAULT NULL COMMENT '机舱前后振动',
+            `side_to_side_vibration_of_the_cabin` DOUBLE DEFAULT NULL COMMENT '机舱左右振动',
+            `actual_torque` DOUBLE DEFAULT NULL COMMENT '实际力矩',
+            `given_torque` DOUBLE DEFAULT NULL COMMENT '给定力矩',
+            `clockwise_yaw_count` DOUBLE DEFAULT NULL COMMENT '顺时针偏航次数',
+            `counterclockwise_yaw_count` DOUBLE DEFAULT NULL COMMENT '逆时针偏航次数',
+            `unusable` DOUBLE DEFAULT NULL COMMENT '不可利用',
+            `power_curve_available` DOUBLE DEFAULT NULL COMMENT '功率曲线可用',
+            `required_gearbox_speed` DOUBLE DEFAULT NULL COMMENT '齿轮箱转速',
+            `inverter_speed_master_control` DOUBLE DEFAULT NULL COMMENT '变频器转速(主控)',
+            `outside_cabin_temperature` DOUBLE DEFAULT NULL COMMENT '环境温度',
+            `main_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '主轴承轴承温度',
+            `gearbox_high_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱高速轴轴承温度',
+            `gearboxmedium_speed_shaftbearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱中速轴轴承温度',
+            `gearbox_low_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱低速轴轴承温度',
+            `generator_winding1_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组1温度',
+            `generator_winding2_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组2温度',
+            `generator_winding3_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组3温度',
+            `wind_turbine_status` DOUBLE DEFAULT NULL COMMENT '风机状态1',
+            `wind_turbine_status2` DOUBLE DEFAULT NULL COMMENT '风机状态2',
+            `turbulence_intensity` DOUBLE DEFAULT NULL COMMENT '湍流强度',
+            `lab` int DEFAULT NULL COMMENT '-1:停机 0:好点  1:欠发功率点;2:超发功率点;3:额定风速以上的超发功率点 4: 限电',
+            `year` INT (4) DEFAULT NULL COMMENT '年',
+            `month` INT (2) DEFAULT NULL COMMENT '月',
+            `day` INT (2) DEFAULT NULL COMMENT '日',
+            `year_month` int(6) DEFAULT NULL COMMENT '年-月',
+            `param1` DOUBLE DEFAULT NULL COMMENT '预留1',
+            `param2` DOUBLE DEFAULT NULL COMMENT '预留2',
+            `param3` DOUBLE DEFAULT NULL COMMENT '预留3',
+            `param4` DOUBLE DEFAULT NULL COMMENT '预留4',
+            `param5` DOUBLE DEFAULT NULL COMMENT '预留5',
+            `param6` VARCHAR (20) DEFAULT NULL COMMENT '预留6',
+            `param7` VARCHAR (20) DEFAULT NULL COMMENT '预留7',
+            `param8` VARCHAR (20) DEFAULT NULL COMMENT '预留8',
+            `param9` VARCHAR (20) DEFAULT NULL COMMENT '预留9',
+            `param10` VARCHAR (20) DEFAULT NULL COMMENT '预留10',
+             KEY `time_stamp` (`time_stamp`),
+             KEY `wind_turbine_number` (`wind_turbine_number`),
+             {add_key}
+        )
+        PARTITION BY LIST COLUMNS ({key}, `wind_turbine_number`) (
+        PARTITION pDefault VALUES IN ((000000, 'wind_turbine_number'))
+        ) 
+        """
+        trans.execute(create_sql)
 
 
-    if read_type == 'second' and win_names and len(win_names) > 1:
 
 
-        create_sql = create_sql + f" PARTITION BY LIST COLUMNS(`wind_turbine_number`) ("
-        partition_strs = list()
-        for wind_name in win_names:
-            partition_strs.append(f" PARTITION p{wind_name} VALUES IN('{wind_name}')")
+def add_partation(table_name: str, date_str: str, wind_turbine_number):
+    p_name = f'p{date_str}_{wind_turbine_number}'
+    add_sql = f"""
+    alter table {table_name} add partition (
+        partition {p_name} VALUES IN (({date_str}, '{wind_turbine_number}'))
+    )
+    """
+    trans.execute(add_sql)
 
 
-        create_sql = create_sql + ",".join(partition_strs) + ")"
 
 
-    trans.execute(create_sql)
+def remove_partation(table_name: str, date_str: str, wind_turbine_number):
+    p_name = f'p{date_str}_{wind_turbine_number}'
+    remove_sql = f"""
+    alter table {table_name} DROP PARTITION {p_name}
+    """
+    trans.execute(remove_sql)
 
 
 
 
-def rename_table(table_name, renamed_table_name, save_db=True):
-    if save_db:
-        rename_sql = f"RENAME TABLE {table_name} TO {renamed_table_name}"
-        try:
-            trans.execute(rename_sql)
-        except:
-            trans_print(traceback.format_exc())
+def add_or_remove_partation(table_name: str, date_str: str, wind_turbine_number):
+    p_name = f'p{date_str}_{wind_turbine_number}'
+    query_partation = f"""
+    SELECT count(1) as count from information_schema.`PARTITIONS` t 
+    where t.TABLE_SCHEMA = '{trans.database}' 
+    and t.TABLE_NAME = '{table_name}' 
+    and t.PARTITION_NAME = '{p_name}'
+    """
+    count = trans.execute(query_partation)[0]['count']
+    if count == 0:
+        add_partation(table_name, date_str, wind_turbine_number)
+    else:
+        remove_partation(table_name, date_str, wind_turbine_number)
+        add_partation(table_name, date_str, wind_turbine_number)
 
 
 
 
-def drop_table(table_name, save_db=True):
-    if save_db:
-        rename_sql = f"drop TABLE `{table_name}`"
-        try:
-            trans.execute(rename_sql)
-        except:
-            trans_print(traceback.format_exc())
+def save_partation_file_to_db(table_name: str, file: str, wind_turbine_number, date_str, batch_count=100000):
+    base_name = path.basename(file)
+    # wind_turbine_number = path.basename(file).split(".")[0]
+    # date_str = path.basename(path.dirname(file))
 
 
+    add_or_remove_partation(table_name, date_str, wind_turbine_number)
 
 
-def clear_table(table_name, save_db=True):
-    if save_db:
-        rename_sql = f"truncate TABLE `{table_name}`"
-        try:
-            trans.execute(rename_sql)
-        except:
-            trans_print(traceback.format_exc())
+    try:
+        for i, df in enumerate(pd.read_csv(file, chunksize=batch_count)):
+            trans.execute_df_save(df, table_name)
+            count = (i + 1) * batch_count
+            trans_print(base_name, f"Chunk {count} written to MySQL.")
+    except Exception as e:
+        trans_print(traceback.format_exc())
+        message = base_name + str(e)
+        raise Exception(message)
 
 
 
 
 def save_file_to_db(table_name: str, file: str, batch_count=100000):
 def save_file_to_db(table_name: str, file: str, batch_count=100000):
@@ -218,6 +254,14 @@ def create_warn_fault_table(table_name):
     trans.execute(sql)
     trans.execute(sql)
 
 
 
 
+def drop_table(table_name):
+    drop_sql = f"DROP TABLE `{table_name}`"
+    try:
+        trans.execute(drop_sql)
+    except:
+        pass
+
+
 def get_or_create_wave_table(table_name):
 def get_or_create_wave_table(table_name):
     create_table = False
     create_table = False
     query_sql = f"select 1 from `{table_name}` limit 1"
     query_sql = f"select 1 from `{table_name}` limit 1"
@@ -227,23 +271,7 @@ def get_or_create_wave_table(table_name):
         create_table = True
         create_table = True
 
 
     if create_table:
     if create_table:
-        sql = f"""
-        CREATE TABLE `{table_name}` (
-          `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
-          `wind_turbine_number` varchar(20) DEFAULT NULL COMMENT '风机编号',
-          `wind_turbine_name` varchar(20) DEFAULT NULL COMMENT '原始风机编号',
-          `time_stamp` datetime DEFAULT NULL COMMENT '时间',
-          `sampling_frequency` varchar(50) DEFAULT NULL COMMENT '分析频率',
-          `mesure_point_name` varchar(100) DEFAULT NULL COMMENT '测点名称',
-          `mesure_data` mediumtext COMMENT '测点数据',
-          PRIMARY KEY (`id`),
-          KEY `wind_turbine_number` (`wind_turbine_number`),
-          KEY `time_stamp` (`time_stamp`),
-          KEY `mesure_point_name` (`mesure_point_name`)
-        ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4
-        """
-
-        trans.execute(sql)
+        create_wave_table(table_name)
 
 
 
 
 def get_wave_data(table_name, min_data, max_data):
 def get_wave_data(table_name, min_data, max_data):
@@ -261,34 +289,15 @@ def delete_exist_wave_data(table_name, ids):
         trans.execute(delete_sql, array)
         trans.execute(delete_sql, array)
 
 
 
 
-def get_trans_exec_code(batch_no, query_type):
-    query_sql = f"SELECT * from batch_exec_code t where t.batch_no = '{batch_no}' and type='{query_type}' and t.`status` = 1 limit 1"
+def get_trans_exec_code(id, query_type):
+    query_sql = f"SELECT * from batch_exec_code t where t.id = '{id}' and type='{query_type}' and t.`status` = 1 limit 1"
     res = trans.execute(query_sql)
     res = trans.execute(query_sql)
     if type(res) == tuple or type(res) == str:
     if type(res) == tuple or type(res) == str:
         return None
         return None
     exec_code = res[0]['exec_code']
     exec_code = res[0]['exec_code']
-    trans_print("批次", batch_no, '类型', type, '获取到执行代码:', exec_code)
+    trans_print("任务ID", id, '类型', type, '获取到执行代码:', exec_code)
     return exec_code
     return exec_code
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    # path_prix = r"/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF063100040-WOB00013/second"
-    # files = ["WOG00030.csv", "WOG00034.csv"]
-    # for path in files:
-    #     save_file_to_db("WOF063100040-WOB00013_second", path_prix + sep + path, batch_count=100000)
-
-    # sql = """
-    # SELECT wind_turbine_number, time_stamp, wind_velocity, active_power
-    #                            FROM `WOF085500002-WOB000001_second`
-    #                            WHERE  time_stamp >= '2024-02-17 00:00:00' AND time_stamp <= '2024-05-14 00:00:00' AND lab = 0
-    # """
-    #
-    # begin = datetime.datetime.now()
-    # df = trans.read_sql_to_df(sql)
-    # end = datetime.datetime.now()
-    # print(df.shape)
-    # print(df.info())
-    # print("Time used:", (end - begin).seconds)
-    # get_fault_warn_conf("test", "fault")
-
     delete_exist_wave_data('SKF001_wave', [1, 2, 3])
     delete_exist_wave_data('SKF001_wave', [1, 2, 3])

+ 0 - 111
service/wave_service.py

@@ -1,111 +0,0 @@
-import datetime
-
-from service.common_connect import plt
-
-
-def update_timeout_wave_trans_data():
-    sql = """
-    UPDATE wave_data_transfer  
-    SET trans_sys_status = 2,err_info='运行超时失败',transfer_state=2
-    WHERE  TIMESTAMPDIFF(HOUR, transfer_start_time, NOW()) > 6  
-        AND trans_sys_status = 0
-    """
-    plt.execute(sql)
-
-
-def update_wave_trans_status_running(id, schedule_exec=True):
-    if schedule_exec:
-        exec_sql = """
-        update wave_data_transfer set transfer_state = 0,trans_sys_status = 0 ,transfer_start_time = now(),err_info='',
-        engine_count =0,time_granularity=0,transfer_finish_time=null,
-        data_min_time= null,data_max_time= null,transfer_data_count=null
-        where id = %s 
-        """
-        plt.execute(exec_sql, id)
-
-
-def update_wave_trans_status_error(id, message="", save_db=True):
-    if save_db:
-        exec_sql = """
-        update wave_data_transfer set transfer_state = 2,trans_sys_status=2 ,err_info= %s,transfer_finish_time=now() 
-        where id = %s  
-        """
-
-        message = message if len(message) <= 200 else message[0:200]
-        plt.execute(exec_sql, (message, id))
-
-
-def update_wave_trans_status_success(id, wind_count=0, time_granularity=0,
-                                     min_date=datetime.datetime.now(),
-                                     max_date=datetime.datetime.now(),
-                                     total_count=0, save_db=True):
-    if save_db:
-        if min_date is not None:
-            exec_sql = """
-            update wave_data_transfer set transfer_state = 1,trans_sys_status = 1,transfer_progress=100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now(),
-            data_min_time= %s,data_max_time= %s,transfer_data_count=%s
-            where id = %s  
-            """
-            plt.execute(exec_sql, (wind_count, time_granularity, min_date, max_date, total_count, id))
-        else:
-            exec_sql = """
-            update wave_data_transfer set transfer_state = 1,trans_sys_status = 1,transfer_progress = 100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now()
-            where id = %s  
-            """
-            plt.execute(exec_sql, (wind_count, time_granularity, id))
-
-
-def update_wave_trans_transfer_progress(id, transfer_progress=0, save_db=True):
-    if save_db:
-        exec_sql = """
-        update wave_data_transfer set transfer_progress = %s where id = %s
-        """
-        plt.execute(exec_sql, (int(transfer_progress), id))
-
-
-def create_wave_table(table_name, save_db=True):
-    if save_db:
-        exec_sql = f"""
-        CREATE TABLE `{table_name}` (
-          `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
-          `wind_turbine_number` varchar(20) DEFAULT NULL COMMENT '风机编号',
-          `wind_turbine_name` varchar(20) DEFAULT NULL COMMENT '原始风机编号',
-          `time_stamp` datetime DEFAULT NULL COMMENT '时间',
-          `rotational_speed` float DEFAULT NULL COMMENT '转速',
-          `sampling_frequency` varchar(50) DEFAULT NULL COMMENT '采样频率',
-          `mesure_point_name` varchar(100) DEFAULT NULL COMMENT '测点名称',
-          `mesure_data` mediumtext COMMENT '测点数据',
-          PRIMARY KEY (`id`),
-          KEY `wind_turbine_number` (`wind_turbine_number`),
-          KEY `time_stamp` (`time_stamp`),
-          KEY `mesure_point_name` (`mesure_point_name`)
-        ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4
-        """
-        plt.execute(exec_sql)
-
-
-# 获取执行的数据
-def get_wave_exec_data(run_count: int = 1) -> dict:
-    query_running_sql = "select count(1) as count from data_transfer where trans_sys_status = 0"
-    query_next_exec_sql = """
-    SELECT
-        t.*,a.field_name,b.batch_name
-    FROM
-        wave_data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
-        inner join wind_field_batch b on t.batch_code = b.batch_code
-    WHERE
-         t.trans_sys_status in (-1,1,2) and t.transfer_state = 0
-    AND t.transfer_addr != ''
-    ORDER BY
-        t.update_time
-    LIMIT 1
-    """
-    data = plt.execute(query_running_sql)
-    now_count = int(data[0]['count'])
-    if now_count >= run_count:
-        return None
-    else:
-        data = plt.execute(query_next_exec_sql)
-        if type(data) == tuple:
-            return {}
-        return data[0]

+ 8 - 12
test_run_local.py

@@ -6,8 +6,6 @@ import sys
 import traceback
 import traceback
 from os import *
 from os import *
 
 
-from utils.conf.read_conf import yaml_conf, read_conf
-
 
 
 def get_exec_data(batch_no=None, read_type=None, run_count=1):
 def get_exec_data(batch_no=None, read_type=None, run_count=1):
     if batch_no and read_type:
     if batch_no and read_type:
@@ -41,7 +39,9 @@ def run(data: dict = dict(), save_db=False, step=0, end=4):
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    env = 'dev'
+    from utils.conf.read_conf import yaml_conf, read_conf
+
+    env = 'prod'
     if len(sys.argv) >= 2:
     if len(sys.argv) >= 2:
         env = sys.argv[1]
         env = sys.argv[1]
 
 
@@ -55,18 +55,14 @@ if __name__ == '__main__':
     from service.plt_service import get_batch_exec_data, get_data_by_batch_no_and_type
     from service.plt_service import get_batch_exec_data, get_data_by_batch_no_and_type
     from etl.wind_power.fault_warn.FaultWarnTrans import FaultWarnTrans
     from etl.wind_power.fault_warn.FaultWarnTrans import FaultWarnTrans
     from etl.wind_power.min_sec.MinSecTrans import MinSecTrans
     from etl.wind_power.min_sec.MinSecTrans import MinSecTrans
+    from etl.wind_power.wave.WaveTrans import WaveTrans
 
 
     begin = datetime.datetime.now()
     begin = datetime.datetime.now()
-    data = dict()
-
-    data['batch_code'] = 'xinhuashuidian'
-    data['batch_name'] = '新华水电故障'
-    data['transfer_type'] = 'fault'
-    data['transfer_addr'] = r'D:\data\新华水电\收资数据\故障告警\汇能机组数据-故障'
-    data['field_code'] = 'xinhuashuidian'
-    data['field_name'] = '新华水电'
+
     try:
     try:
-        run(data=data, save_db=False, step=0, end=3)
+        exec_process = WaveTrans(1, 'WOF091200030',
+                                 r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/振动/CMSFTPServer/ZYXFDC2')
+        exec_process.run()
     except Exception as e:
     except Exception as e:
         trans_print(traceback.format_exc())
         trans_print(traceback.format_exc())
 
 

+ 0 - 91
test_run_local_piliang.py

@@ -1,91 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Time    : 2024/6/11
-# @Author  : 魏志亮
-import datetime
-import sys
-import traceback
-from os import *
-
-from utils.conf.read_conf import yaml_conf, read_conf
-
-
-def get_exec_data(batch_no=None, read_type=None, run_count=1):
-    if batch_no and read_type:
-        data = get_data_by_batch_no_and_type(batch_no, read_type)
-        if data is None:
-            raise ValueError(f"未找到批次号:{batch_no},类型:{read_type}")
-
-    else:
-        data = get_batch_exec_data(run_count)
-        if data is None:
-            trans_print("当前有任务在执行")
-            sys.exit(0)
-        elif len(data.keys()) == 0:
-            trans_print("当前无任务")
-            sys.exit(0)
-
-    return data
-
-
-def run(data: dict = dict(), save_db=False):
-    exec_process = None
-    if data['transfer_type'] in ['second', 'minute']:
-        exec_process = MinSecTrans(data=data, save_db=save_db)
-
-    if data['transfer_type'] in ['fault', 'warn']:
-        exec_process = FaultWarnTrans(data=data, save_db=save_db)
-
-    if exec_process is None:
-        raise Exception("No exec process")
-    exec_process.run()
-
-
-if __name__ == '__main__':
-    env = 'dev'
-    if len(sys.argv) >= 2:
-        env = sys.argv[1]
-
-    conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
-    environ['ETL_CONF'] = conf_path
-    yaml_config = yaml_conf(conf_path)
-    environ['env'] = env
-    run_count = int(read_conf(yaml_config, "run_batch_count", 1))
-
-    from utils.log.trans_log import trans_print
-    from service.plt_service import get_batch_exec_data, get_data_by_batch_no_and_type
-    from etl.wind_power.fault_warn.FaultWarnTrans import FaultWarnTrans
-    from etl.wind_power.min_sec.MinSecTrans import MinSecTrans
-    from utils.file.trans_methods import read_file_to_df
-
-    begin = datetime.datetime.now()
-    df = read_file_to_df("tmp_file/rebuild_data.csv")
-    results = list()
-    data = dict()
-    for batch_code, batch_name, transfer_type, transfer_addr, field_code, field_name \
-            in zip(df['batch_code'], df['batch_name'], df['transfer_type'], df['transfer_addr'], df['field_code'],
-                   df['field_name']):
-        batch_begin = datetime.datetime.now()
-        transfer_addr = transfer_addr.replace(r"/data/download/collection_data",
-                                              r"/data/download/datang_shangxian")
-        trans_print("开始执行批次:", batch_code, batch_name, transfer_type, field_code, field_name)
-        trans_print("批次路径:", transfer_addr)
-
-        data['batch_code'] = batch_code
-        data['batch_name'] = batch_name
-        data['transfer_type'] = transfer_type
-        data['transfer_addr'] = transfer_addr
-        data['field_code'] = field_code
-        data['field_name'] = field_name
-        try:
-            run(data=data, save_db=True)
-            results.append((batch_code, batch_name, transfer_type, field_code, field_name, 'success'))
-        except Exception as e:
-            results.append((batch_code, batch_name, transfer_type, field_code, field_name, 'error'))
-            trans_print(traceback.format_exc())
-        finally:
-            trans_print("执行结束,耗时:", datetime.datetime.now() - batch_begin, "总耗时:", datetime.datetime.now() - begin)
-
-    for data in results:
-        trans_print(data)
-
-    trans_print("执行结束,总耗时:", datetime.datetime.now() - begin)

+ 0 - 755
tmp_file/ClassIdentifier_1.py_bak

@@ -1,755 +0,0 @@
-import numpy as np
-from pandas import DataFrame
-
-from service.plt_service import get_base_wind_and_power
-from utils.file.trans_methods import read_file_to_df
-
-
-class ClassIdentifier(object):
-
-    def __init__(self, wind_turbine_number, file_path: str = None, origin_df: DataFrame = None, index='time_stamp',
-                 wind_velocity='wind_velocity',
-                 active_power='active_power'):
-        """
-        :param wind_turbine_number: The wind turbine number.
-        :param file_path: The file path of the input data.
-        :param origin_df: The pandas DataFrame containing the input data.
-        :param index: 索引字段
-        :param wind_velocity: 风速字段
-        :param active_power: 有功功率字段
-        """
-        self.wind_turbine_number = wind_turbine_number
-        self.index = index
-        self.wind_velocity = wind_velocity
-        self.active_power = active_power
-
-        self.rated_wind_speed = 'rated_wind_speed'
-        self.rated_capacity = 'rated_capacity'
-
-        if file_path is None and origin_df is None:
-            raise ValueError("Either file_path or origin_df should be provided.")
-
-        if file_path:
-            self.df = read_file_to_df(file_path)
-        else:
-            self.df = origin_df
-
-        self.df = self.df.set_index(keys=self.index)
-
-    def identifier(self):
-        # 风速 和 有功功率 df
-        wind_and_power_df = self.df[[self.wind_velocity, self.active_power]]
-        wind_and_power_df.reset_index(inplace=True)
-        wind_and_power_df_count = wind_and_power_df.shape[0]
-        PowerMax = wind_and_power_df[self.active_power].max()
-        PowerRated = np.ceil(PowerMax / 100) * 100
-        PRated = 1500  # 额定功率1500kw,可改为2000kw
-        VCutOut = 25
-        VCutIn = 3
-        VRated = 10
-        # 网格法确定风速风向分区数量,功率方向分区数量,
-        # PNum = (PRated+100)/25  #功率分区间隔25kW
-        PNum = int(np.ceil(PowerRated / 25))  # 功率分区间隔25kW
-        VNum = int(np.ceil(VCutOut / 0.25))  # 风速分区间隔0.25m/s
-
-        # 实发电量
-        EPActualTotal = 0  # 实发电量
-        for i in range(wind_and_power_df_count):
-            if wind_and_power_df.loc[i, self.active_power] >= 0:
-                EPActualTotal = EPActualTotal + wind_and_power_df.loc[i, self.active_power] / 6
-
-        print("EPActualTotal", EPActualTotal)
-        # 平均风速
-        WindSpeedAvr = 0
-        WindSum = 0
-        for i in range(wind_and_power_df_count):
-            if wind_and_power_df.loc[i, self.wind_velocity] >= 0:
-                WindSum = WindSum + wind_and_power_df.loc[i, self.wind_velocity]
-        WindSpeedAvr = WindSum / wind_and_power_df_count
-        print("windSpeedAvr", WindSpeedAvr)
-        # 用于计算损失电量的标杆功率曲线,可更换为风机设计功率曲线
-        # base_wind_and_power_df = get_base_wind_and_power(self.wind_turbine_number)
-        base_wind_and_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\好点坏点matlib计算\A型风机设计功率曲线.csv", header=None)
-        base_wind_and_power_df.columns = [self.rated_wind_speed, self.rated_capacity]
-        if base_wind_and_power_df.empty:
-            raise ValueError("风场编号:" + self.wind_turbine_number + "未查询到风速功率信息")
-        base_wind_and_power_count = base_wind_and_power_df.shape[0]
-
-        # 风机可利用率,计算方法:大于切入风速但发电功率小于0
-        TurbineRunRate = 0
-        nShouldGP = 0
-        nRealGP = 0
-        for i in range(wind_and_power_df_count):
-            if wind_and_power_df.loc[i, self.wind_velocity] >= VCutIn:
-                nShouldGP = nShouldGP + 1
-                if wind_and_power_df.loc[i, self.active_power] > 0:
-                    nRealGP = nRealGP + 1
-        if nShouldGP > 0:
-            TurbineRunRate = nRealGP / nShouldGP * 100
-
-        print("disp(TurbineRunRate)", TurbineRunRate)
-        # 理论电量-
-        EPIdealTotalAAA = 0  # 理论电量-
-        nWhichBin = 0
-        IdealPower = 0
-        for i in range(wind_and_power_df_count):
-            # 应发电量-理论
-            nWhichBin = 0
-            for m in range(base_wind_and_power_count - 1):
-                if base_wind_and_power_df.loc[m, self.rated_wind_speed] < wind_and_power_df.loc[
-                    i, self.wind_velocity] <= \
-                        base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
-                    nWhichBin = m
-                    break
-
-            # 插值计算对应设计功率
-            if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
-                continue
-
-            IdealPower = (wind_and_power_df.loc[i, self.wind_velocity] - base_wind_and_power_df.loc[nWhichBin,
-                                                                                                    self.rated_wind_speed]) / (
-                                 base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
-                                 base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) * (
-                                 base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
-                                 base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) \
-                         + base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
-            EPIdealTotalAAA = EPIdealTotalAAA + IdealPower / 6
-
-        print('EPIdealTotalAAA', EPIdealTotalAAA)
-        #
-        # 存储功率大于零的运行数据
-        DzMarch809 = np.zeros([wind_and_power_df_count, 2], dtype=float)
-        nCounter1 = 0
-        for i in range(wind_and_power_df_count):
-            if wind_and_power_df.loc[i, self.active_power] > 0:
-                DzMarch809[nCounter1, 0] = wind_and_power_df.loc[i, self.wind_velocity]
-                DzMarch809[nCounter1, 1] = wind_and_power_df.loc[i, self.active_power]
-
-                nCounter1 = nCounter1 + 1
-
-        print('nCounter1', nCounter1)
-
-        # 统计各网格落入的散点个数
-        XBoxNumber = np.ones([PNum, VNum], dtype=int)
-        nWhichP = -1
-        nWhichV = -1
-        for i in range(nCounter1):
-            for m in range(PNum):
-                if m * 25 < DzMarch809[i, 1] <= (m + 1) * 25:
-                    nWhichP = m
-                    break
-            for n in range(VNum):
-                if ((n + 1) * 0.25 - 0.125) < DzMarch809[i, 0] <= ((n + 1) * 0.25 + 0.125):
-                    nWhichV = n
-                    break
-
-            if nWhichP > -1 and nWhichV > -1:
-                XBoxNumber[nWhichP, nWhichV] = XBoxNumber[nWhichP, nWhichV] + 1
-
-        for m in range(PNum):
-            for n in range(VNum):
-                XBoxNumber[m, n] = XBoxNumber[m, n] - 1
-
-        print('XBoxNumber', XBoxNumber)
-        # 在功率方向将网格内散点绝对个数转换为相对百分比,备用
-        PBoxPercent = np.zeros([PNum, VNum], dtype=float)
-        PBinSum = np.zeros(PNum, dtype=int)
-
-        for i in range(PNum):
-            for m in range(VNum):
-                PBinSum[i] = PBinSum[i] + XBoxNumber[i, m]
-
-            for m in range(VNum):
-                if PBinSum[i] > 0:
-                    PBoxPercent[i, m] = XBoxNumber[i, m] / PBinSum[i] * 100
-
-        # 在风速方向将网格内散点绝对个数转换为相对百分比,备用
-        VBoxPercent = np.zeros([PNum, VNum], dtype=float)
-        VBinSum = np.zeros(VNum, dtype=int)
-
-        for i in range(VNum):
-            for m in range(PNum):
-                VBinSum[i] = VBinSum[i] + XBoxNumber[m, i]
-
-            for m in range(PNum):
-                if VBinSum[i] > 0:
-                    VBoxPercent[m, i] = XBoxNumber[m, i] / VBinSum[i] * 100
-
-        # 以水平功率带方向为准,分析每个水平功率带中,功率主带中心,即找百分比最大的网格位置。
-        PBoxMaxIndex = np.zeros(PNum, dtype=int)  # 水平功率带最大网格位置索引
-        PBoxMaxP = np.zeros(PNum, dtype=int)  # 水平功率带最大网格百分比
-
-        for m in range(PNum):
-            # 确定每一水平功率带的最大网格位置索引即百分比值
-            PBoxMaxP[m], PBoxMaxIndex[m] = PBoxPercent[m, :].max(), PBoxPercent[m, :].argmax()
-
-        # 以垂直风速方向为准,分析每个垂直风速带中,功率主带中心,即找百分比最大的网格位置。
-        VBoxMaxIndex = np.zeros(VNum, dtype=int)
-        VBoxMaxV = np.zeros(VNum, dtype=int)
-
-        for m in range(VNum):
-            [VBoxMaxV[m], VBoxMaxIndex[m]] = VBoxPercent[:, m].max(), VBoxPercent[:, m].argmax()
-
-        # 切入风速特殊处理,如果切入风速过于偏右,向左拉回
-        if PBoxMaxIndex[0] > 14:
-            PBoxMaxIndex[0] = 9
-
-        # 以水平功率带方向为基准,进行分析
-        DotDense = np.zeros(PNum, dtype=int)  # 每一水平功率带的功率主带包含的网格数
-        DotDenseLeftRight = np.zeros([PNum, 2], dtype=int)  # 存储每一水平功率带的功率主带以最大网格为中心,向向左,向右扩展的网格数
-        DotValve = 90  # 从中心向左右对称扩展网格的散点百分比和的阈值。
-        PDotDenseSum = 0
-
-        iSpreadLeft = 1  # 向左扩展网格计数,初值为1
-        iSpreadRight = 1  # 向右扩展网格技术,初值为1
-        for i in range(PNum - 6):  # 从最下层水平功率带1开始,向上到第PNum-6个水平功率带(额定功率一下水平功率带),逐一分析
-            PDotDenseSum = PBoxMaxP[i]  # 以中心最大水平功率带为基准,向左向右对称扩展网格,累加各网格散点百分比
-            iSpreadRight = 1
-            iSpreadLeft = 1
-            while PDotDenseSum < DotValve:
-
-                if (PBoxMaxIndex[i] + iSpreadRight) < VNum - 1:
-                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]  # 向右侧扩展
-                    iSpreadRight = iSpreadRight + 1
-
-                if (PBoxMaxIndex[i] + iSpreadRight) > VNum - 1:
-                    break
-
-                if (PBoxMaxIndex[i] - iSpreadLeft) > 0:
-                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]  # 向左侧扩展
-                    iSpreadLeft = iSpreadLeft + 1
-
-                if (PBoxMaxIndex[i] - iSpreadLeft) <= 0:
-                    break
-
-            iSpreadRight = iSpreadRight - 1
-
-            iSpreadLeft = iSpreadLeft - 1
-            # 向左右对称扩展完毕
-
-            DotDenseLeftRight[i, 0] = iSpreadLeft
-            DotDenseLeftRight[i, 1] = iSpreadRight
-            DotDense[i] = iSpreadLeft + iSpreadRight + 1
-
-        # 各行功率主带右侧宽度的中位数最具有代表性
-        DotDenseWidthLeft = np.zeros([PNum - 6, 1], dtype=int)
-        for i in range(PNum - 6):
-            DotDenseWidthLeft[i] = DotDenseLeftRight[i, 1]
-
-        MainBandRight = np.median(DotDenseWidthLeft)
-
-        # 散点向右显著延展分布的水平功率带为限功率水平带
-        PowerLimit = np.zeros([PNum, 1], dtype=int)  # 各水平功率带是否为限功率标识,==1:是;==0:不是
-        WidthAverage = 0  # 功率主带平均宽度
-        WidthVar = 0  # 功率主带方差
-        # PowerLimitValve = 6    #限功率主带判别阈值
-        PowerLimitValve = np.ceil(MainBandRight) + 3  # 限功率主带判别阈值
-
-        nCounterLimit = 0
-        nCounter = 0
-
-        for i in range(PNum - 6):
-            if DotDenseLeftRight[i, 1] > PowerLimitValve and PBinSum[i] > 20:  # 如果向右扩展网格数大于阈值,且该水平功率带点总数>20,是
-                PowerLimit[i] = 1
-                nCounterLimit = nCounterLimit + 1
-
-            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
-                WidthAverage = WidthAverage + DotDenseLeftRight[i, 1]  # 统计正常水平功率带右侧宽度
-                nCounter = nCounter + 1
-
-        WidthAverage = WidthAverage / nCounter  # 功率主带平均宽度
-
-        print("WidthAverage", WidthAverage)
-
-        # 各水平功率带的功率主带宽度的方差,反映从下到上宽度是否一致,或是否下宽上窄等异常情况
-        for i in range(PNum - 6):
-            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
-                WidthVar = WidthVar + (DotDenseLeftRight[i, 1] - WidthAverage) * (
-                        DotDenseLeftRight[i, 1] - WidthAverage)
-
-        WidthVar = np.sqrt(WidthVar / nCounter)
-
-        # 各水平功率带,功率主带的风速范围,右侧扩展网格数*2*0.25
-        PowerBandWidth = WidthAverage * 2 * 0.25
-
-        # 对限负荷水平功率带的最大网格较下面相邻层显著偏右,拉回
-        for i in range(1, PNum - 6):
-            if PowerLimit[i] == 1 and abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5:
-                PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1
-
-        # 输出各层功率主带的左右边界网格索引
-        DotDenseInverse = np.zeros([PNum, 2], dtype=int)
-
-        for i in range(PNum):
-            DotDenseInverse[i, :] = DotDenseLeftRight[PNum - i - 1, :]
-
-        # print('DotDenseInverse', DotDenseInverse)
-
-        # 功率主带的右边界
-        CurveWidthR = int(np.ceil(WidthAverage) + 2)
-
-        # CurveWidthL = 6    #功率主带的左边界
-        CurveWidthL = CurveWidthR
-
-        BBoxLimit = np.zeros([PNum, VNum], dtype=int)  # 网格是否为限功率网格的标识,如果为限功率水平功率带,从功率主带右侧边缘向右的网格为限功率网格
-        for i in range(2, PNum - 6):
-            if PowerLimit[i] == 1:
-                for j in range(PBoxMaxIndex[i] + CurveWidthR, VNum):
-                    BBoxLimit[i, j] = 1
-
-        BBoxRemove = np.zeros([PNum, VNum], dtype=int)  # 数据异常需要剔除的网格标识,标识==1:功率主带右侧的欠发网格;==2:功率主带左侧的超发网格
-        for m in range(PNum - 6):
-            for n in range(PBoxMaxIndex[m] + CurveWidthR - 1, VNum):
-                BBoxRemove[m, n] = 1
-
-            for n in range(PBoxMaxIndex[m] - CurveWidthL - 1, 0, -1):
-                BBoxRemove[m, n] = 2
-
-        # 确定功率主带的左上拐点,即额定风速位置的网格索引
-        CurveTop = np.zeros(2, dtype=int)
-        CurveTopValve = 3  # 网格的百分比阈值
-        BTopFind = 0
-        for m in range(PNum - 4 - 1, 0, -1):
-            for n in range(VNum):
-                if VBoxPercent[m, n] > CurveTopValve and XBoxNumber[m, n] >= 10:  # 如左上角网格的百分比和散点个数大于阈值。
-                    CurveTop[0] = m
-                    CurveTop[1] = n
-                    BTopFind = 1
-                    break
-
-            if BTopFind == 1:
-                break
-
-        IsolateValve = 3
-        for m in range(PNum - 6):
-            for n in range(PBoxMaxIndex[m] + CurveWidthR - 1, VNum):
-                if PBoxPercent[m, n] < IsolateValve:
-                    BBoxRemove[m, n] = 1
-
-        # 功率主带顶部宽度
-        CurveWidthT = 2
-        for m in range(PNum - CurveWidthT - 1, PNum):
-            for n in range(VNum):
-                BBoxRemove[m, n] = 3  # 网格为额定功率以上的超发点
-
-        # 功率主带拐点左侧的欠发网格标识
-        for m in range(PNum - 5 - 1, PNum):
-            for n in range(CurveTop[1] - 2 - 1):
-                BBoxRemove[m, n] = 2
-
-        # 以网格的标识,决定该网格内数据的标识。Dzwind_and_power_dfSel功率非零数据的标识位。散点在哪个网格,此网格的标识即为该点的标识
-        Dzwind_and_power_dfSel = np.zeros(nCounter1, dtype=int)  # is ==1,欠发功率点;==2,超发功率点;==3,额定风速以上的超发功率点 ==4, 限电
-        nWhichP = 0
-        nWhichV = 0
-        nBadA = 0
-
-        for i in range(nCounter1):
-            for m in range(PNum):
-                if DzMarch809[i, 1] > (m - 1) * 25 and DzMarch809[i, 1] <= m * 25:
-                    nWhichP = m
-                    break
-
-            for n in range(VNum):
-                if DzMarch809[i, 0] > (n * 0.25 - 0.125) and DzMarch809[i, 0] <= (n * 0.25 + 0.125):
-                    nWhichV = n
-                    break
-
-            if nWhichP > 0 and nWhichV > 0:
-
-                if BBoxRemove[nWhichP, nWhichV] == 1:
-                    Dzwind_and_power_dfSel[i] = 1
-                    nBadA = nBadA + 1
-
-                if BBoxRemove[nWhichP, nWhichV] == 2:
-                    Dzwind_and_power_dfSel[i] = 2
-
-                if BBoxRemove[nWhichP, nWhichV] == 3:
-                    Dzwind_and_power_dfSel[i] = 0  # 3  # 额定风速以上的超发功率点认为是正常点,不再标识。
-
-                if BBoxLimit[nWhichP, nWhichV] == 1 and nWhichP>16:
-                    Dzwind_and_power_dfSel[i] = 4
-
-        print("nWhichP", nWhichP)
-        print("nWhichV", nWhichV)
-        print("nBadA", nBadA)
-
-        # 限负荷数据标识方法2:把数据切割为若干个窗口。对每一窗口,以第一个点为基准,连续nWindowLength个数据的功率在方差范围内,呈现显著水平分布的点
-        PVLimit = np.zeros([nCounter1, 2], dtype=int)  # 存储限负荷数据
-        nLimitTotal = 0
-        nWindowLength = 3
-        LimitWindow = np.zeros(nWindowLength, dtype=int)
-        UpLimit = 0  # 上限
-        LowLimit = 0  # 下限
-        PowerStd = 15  # 功率波动方差
-        bAllInUpLow = 1  # ==1:窗口内所有数据均在方差上下限之内,限负荷==0,不满足条件
-        bAllInAreas = 1  # ==1:窗口所有数据均在200~PRated-300kW范围内;==0:不满足此条件
-        nWindowNum = int(np.floor(nCounter1 / nWindowLength))
-        PowerLimitUp = PRated - 300
-        PowerLimitLow = 200
-        for i in range(nWindowNum):
-            for j in range(nWindowLength):
-                LimitWindow[j] = DzMarch809[i * nWindowLength + j, 1]
-
-            bAllInAreas = 1
-            for j in range(nWindowLength):
-                if LimitWindow[j] < PowerLimitLow or LimitWindow[j] > PowerLimitUp:
-                    bAllInAreas = 0
-
-            if bAllInAreas == 0:
-                continue
-
-            UpLimit = LimitWindow[0] + PowerStd
-            LowLimit = LimitWindow[0] - PowerStd
-            bAllInUpLow = 1
-            for j in range(1, nWindowLength):
-                if LimitWindow[j] < LowLimit or LimitWindow[j] > UpLimit:
-                    bAllInUpLow = 0
-
-            if bAllInUpLow == 1:
-                for j in range(nWindowLength):
-                    Dzwind_and_power_dfSel[i * nWindowLength + j] = 4  # 标识窗口内的数据为限负荷数据
-
-                for j in range(nWindowLength):
-                    PVLimit[nLimitTotal, :] = DzMarch809[i * nWindowLength + j, :]
-                    nLimitTotal = nLimitTotal + 1
-
-        print("nLimitTotal", nLimitTotal)
-
-        # 相邻水平功率主带的锯齿平滑
-        PVLeftDown = np.zeros(2, dtype=int)
-        PVRightUp = np.zeros(2, dtype=int)
-        nSmooth = 0
-        for i in range(PNum - 6 - 1):
-            PVLeftDown = np.zeros(2, dtype=int)
-            PVRightUp = np.zeros(2, dtype=int)
-
-            if (PBoxMaxIndex[i + 1] - PBoxMaxIndex[i]) >= 1:
-                PVLeftDown[0] = (PBoxMaxIndex[i] + CurveWidthR) * 0.25 - 0.125
-                PVLeftDown[1] = (i - 1) * 25
-
-                PVRightUp[0] = (PBoxMaxIndex[i + 1] + CurveWidthR) * 0.25 - 0.125
-                PVRightUp[1] = (i + 1 - 1) * 25
-
-                for m in range(nCounter1):
-                    if DzMarch809[m, 0] > PVLeftDown[0] and DzMarch809[m, 0] < PVRightUp[0] and PVLeftDown[1] < \
-                            DzMarch809[m, 1] < PVRightUp[1]:  # 在该锯齿中
-                        if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (
-                                PVRightUp[1] - PVLeftDown[1]) / (
-                                PVRightUp[0] - PVLeftDown[0]):  # 斜率大于对角连线,则在锯齿左上三角形中,选中
-                            Dzwind_and_power_dfSel[m] = 0
-                            nSmooth = nSmooth + 1
-
-        print("nSmooth", nSmooth)
-
-        # 存储好点
-        nCounterPV = 0
-        PVDot = np.zeros([nCounter1, 2], dtype=int)
-        for i in range(nCounter1):
-            if Dzwind_and_power_dfSel[i] == 0:
-                PVDot[nCounterPV, :] = DzMarch809[i, :]
-                nCounterPV = nCounterPV + 1
-
-        nCounterVP = nCounterPV
-        print("nCounterVP", nCounterVP)
-
-        # 存储坏点
-        nCounterBad = 0
-        PVBad = np.zeros([nCounter1, 2], dtype=int)
-        for i in range(nCounter1):
-            if Dzwind_and_power_dfSel[i] == 1 or Dzwind_and_power_dfSel[i] == 2 or Dzwind_and_power_dfSel[i] == 3:
-                PVBad[nCounterBad, :] = DzMarch809[i, :]
-                nCounterBad = nCounterBad + 1
-
-        print("nCounterBad", nCounterBad)
-
-        # 用功率主带中的好点绘制实测功率曲
-        XBinNumber = np.ones(50, dtype=int)
-        PCurve = np.zeros([50, 2], dtype=int)
-        PCurve[:, 0] = [i / 2 for i in range(1, 51)]
-        XBinSum = np.zeros([50, 2], dtype=int)
-        nWhichBin = 0
-
-        for i in range(nCounterVP):
-            nWhichBin = 0
-
-            for b in range(50):
-                if PVDot[i, 0] > (b * 0.5 - 0.25) and PVDot[i, 0] <= (b * 0.5 + 0.25):
-                    nWhichBin = b
-                    break
-
-            if nWhichBin > 0:
-                XBinSum[nWhichBin, 0] = XBinSum[nWhichBin, 0] + PVDot[i, 0]  # wind speed
-                XBinSum[nWhichBin, 1] = XBinSum[nWhichBin, 1] + PVDot[i, 1]  # Power
-                XBinNumber[nWhichBin] = XBinNumber[nWhichBin] + 1
-
-        for b in range(50):
-            XBinNumber[b] = XBinNumber[b] - 1
-
-        for b in range(50):
-            if XBinNumber[b] > 0:
-                PCurve[b, 0] = XBinSum[b, 0] / XBinNumber[b]
-                PCurve[b, 1] = XBinSum[b, 1] / XBinNumber[b]
-
-        # 对额定风速以上的功率直接赋额定功率
-        VRatedNum = int(VRated / 0.5)
-        for m in range(VRatedNum, 50):
-            if PCurve[m, 1] == 0:
-                PCurve[m, 1] = PRated
-
-        # print("PCurve", PCurve)
-
-        # 绘制标准正则功率曲线,以0.5m/s标准为间隔
-        # 15m/s以上为额定功率,15m/s以下为计算得到
-        PCurveNorm = np.zeros([50, 2], dtype=int)
-        for i in range(30, 50):
-            PCurveNorm[i, 0] = i * 0.5
-            PCurveNorm[i, 1] = PRated
-
-        # 15m/s一下正则功率曲线
-        CurveData = np.zeros([30, 2], dtype=int)
-        for i in range(30):
-            CurveData[i, :] = PCurve[i, :]
-
-        CurveNorm = np.zeros([30, 2], dtype=int)
-        VSpeed = [i / 2 for i in range(1, 31)]
-
-        WhichBin = 0
-
-        K = 0
-        a = 0
-        for m in range(30):
-            K = 0
-            a = 0
-
-            for n in range(30):
-                if abs(CurveData[n, 0] - VSpeed[m]) < 0.1:
-                    WhichBin = n
-                    break
-
-            if WhichBin > 1:
-                if CurveData[WhichBin, 0] - CurveData[WhichBin - 1, 0] > 0:
-                    K = (CurveData[WhichBin, 1] - CurveData[WhichBin - 1, 1]) / (
-                            CurveData[WhichBin, 0] - CurveData[WhichBin - 1, 0])
-                    a = CurveData[WhichBin, 1] - K * CurveData[WhichBin, 0]
-
-            CurveNorm[m, 0] = VSpeed[m]
-            CurveNorm[m, 1] = a + K * VSpeed[m]
-
-        for i in range(30):
-            PCurveNorm[i, :] = CurveNorm[i, :]
-
-        # 子模块3:损失电量计算及发电性能评价
-        CC = len(PCurve[:, 0])
-        EPIdealTotal = 0
-        # 计算停机损失
-        EPLostStopTotal = 0
-        EPLost = 0
-
-        nWhichBin = 0
-        IdealPower = 0
-        nStopTotal = 0
-        for i in range(wind_and_power_df_count):
-            if wind_and_power_df.loc[i, self.active_power] <= 0:
-                nWhichBin = 0
-                for m in range(base_wind_and_power_count - 1):
-                    if wind_and_power_df.loc[i, self.wind_velocity] > base_wind_and_power_df.loc[
-                        m, self.rated_wind_speed] and wind_and_power_df.loc[i, self.wind_velocity] <= \
-                            base_wind_and_power_df.loc[
-                                m + 1, self.rated_wind_speed]:
-                        nWhichBin = m
-                        break
-
-                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
-                    continue
-
-                IdealPower = (wind_and_power_df.loc[i, self.wind_velocity] - base_wind_and_power_df.loc[
-                    nWhichBin, self.rated_wind_speed]) / (
-                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
-                                     base_wind_and_power_df.loc[
-                                         nWhichBin, self.rated_wind_speed]) * (
-                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity]
-                                     - base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) \
-                             + base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
-
-                EPLost = IdealPower / 6
-                EPLostStopTotal = EPLostStopTotal + EPLost
-                nStopTotal = nStopTotal + 1
-
-        print("EPLost", EPLost)
-        print("nStopTotal", nStopTotal)
-        print("EPLostStopTotal", EPLostStopTotal)
-
-        nWhichP = 0
-        nWhichV = 0
-        nWhichBin = 0
-        IdealPower = 0
-
-        # 计算欠发损失,此欠发损失已不包括限电损失,限电点在前面已经从欠发点中去除。
-        EPLostBadTotal = 0
-        EPLost = 0
-
-        nBadTotal = 0
-
-        LostBadPercent = 0
-
-        EPOverTotal = 0
-        EPOver = 0
-        nOverTotal = 0
-
-        for i in range(nCounter1):
-            if Dzwind_and_power_dfSel[i] == 1:
-                nWhichBin = 0
-                for m in range(base_wind_and_power_count - 1):
-                    if DzMarch809[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] \
-                            and DzMarch809[i, 0] <= base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
-                        nWhichBin = m
-                        break
-
-                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
-                    continue
-
-                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
-                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
-                    nWhichBin, self.rated_wind_speed]) * (
-                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
-                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
-                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
-                EPLost = abs(IdealPower - DzMarch809[i, 1]) / 6
-                EPLostBadTotal = EPLostBadTotal + EPLost
-                nBadTotal = nBadTotal + 1
-
-            # 额定风速以上超发电量
-            if Dzwind_and_power_dfSel[i] == 3:
-                EPOver = (DzMarch809[i, 1] - PRated) / 6
-                EPOverTotal = EPOverTotal + EPOver
-                nOverTotal = nOverTotal + 1
-
-        print("EPLost", EPLost)
-        print("nBadTotal", nBadTotal)
-        print("EPLostBadTotal", EPLostBadTotal)
-        print("EPOverTotal", EPOverTotal)
-        print("nOverTotal", nOverTotal)
-
-        # 功率曲线未达标损失
-        EPLostPerformTotal = 0
-        nWhichBinI = 0
-        IdealPower = 0
-
-        for i in range(nCounterVP):
-
-            for m in range(base_wind_and_power_count - 1):
-                if PVDot[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] and PVDot[i, 0] <= \
-                        base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
-                    nWhichBinI = m
-                    break
-
-            if nWhichBinI > base_wind_and_power_count - 1 or nWhichBinI == 0:
-                continue
-
-            IdealPower = (PVDot[i, 0] - base_wind_and_power_df.loc[nWhichBinI, self.rated_wind_speed]) / (
-                    base_wind_and_power_df.loc[nWhichBinI + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
-                nWhichBinI, self.rated_wind_speed]) * \
-                         (base_wind_and_power_df.loc[nWhichBinI + 1, self.rated_capacity] -
-                          base_wind_and_power_df.loc[nWhichBinI, self.rated_capacity]) + \
-                         base_wind_and_power_df.loc[nWhichBinI, self.rated_capacity]
-
-            EPLostPerformTotal = EPLostPerformTotal + (IdealPower - PVDot[i, 1]) / 6
-
-        print("EPLostPerformTotal", EPLostPerformTotal)
-
-        # 限电损失
-        EPLostLimitTotal = 0
-        EPLost = 0
-        nLimitTotal = 0
-
-        PVLimit = np.zeros([nCounter1, 2])
-
-        for i in range(nCounter1):
-            if Dzwind_and_power_dfSel[i] == 4:
-                nWhichBin = 0
-                for m in range(base_wind_and_power_count - 1):
-                    if DzMarch809[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] and DzMarch809[i, 0] <= \
-                            base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
-                        nWhichBin = m
-                        break
-
-                # 插值计算对应设计功率
-                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
-                    continue
-
-                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
-                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
-                        base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) * (
-                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
-                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
-                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
-                EPLost = np.abs(IdealPower - DzMarch809[i, 1]) / 6
-                EPLostLimitTotal = EPLostLimitTotal + EPLost
-
-                PVLimit[nLimitTotal, :] = DzMarch809[i, :]
-                nLimitTotal = nLimitTotal + 1
-
-        nLimitTotal = nLimitTotal - 1
-
-        print("nLimitTotal", nLimitTotal)
-
-        # 欠发和限点损失总和
-        EPLostBadLimitTotal = EPLostBadTotal + EPLostLimitTotal
-
-        # 如果功率曲线未达标损失为正
-        if EPLostPerformTotal >= 0:
-            EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal + EPLostPerformTotal
-
-        # 如果功率曲线未达标损失为负
-        if EPLostPerformTotal < 0:
-            EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal
-
-        print("EPIdealTotal", EPIdealTotal)
-        # 可以比较求和得到的应发功率EPIdealTotal与理论计算得到的应发功率EPIdealTotalAAA的差别
-        # 需要去除的超发功率:(1)功率主带左侧的超发点;(2)额定风速以上的超发点。
-        RemoveOverEP = 0
-        nType2 = 0
-        for i in range(nCounter1):
-            if Dzwind_and_power_dfSel[i] == 2:  # 功率主带左侧的超发坏点
-                nWhichBin = 0
-                for m in range(base_wind_and_power_count - 1):
-                    if base_wind_and_power_df.loc[m, self.rated_wind_speed] < DzMarch809[i, 0] <= base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
-                        nWhichBin = m
-                        break
-
-                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
-                    continue
-
-                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
-                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
-                    nWhichBin, self.rated_wind_speed]) * (
-                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
-                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
-                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
-
-                RemoveOverEP = RemoveOverEP + (DzMarch809[i, 1] - IdealPower) / 6
-                nType2 = nType2 + 1
-
-        print("RemoveOverEP", RemoveOverEP)
-        print("nType2", nType2)
-        # 额定功率以上的超发点
-        nTypeOver = 0
-        for i in range(nCounter1):
-            if DzMarch809[i, 1] > PRated:
-                RemoveOverEP = RemoveOverEP + (DzMarch809[i, 1] - PRated) / 6
-                nTypeOver = nTypeOver + 1
-
-        print("RemoveOverEP", RemoveOverEP)
-        print("nTypeOver", nTypeOver)
-
-    def run(self):
-        # Implement your class identification logic here
-        self.identifier()
-
-
-if __name__ == '__main__':
-    test = ClassIdentifier('test', r"D:\中能智能\matlib计算相关\好点坏点matlib计算\A01.csv", index='时间',
-                           wind_velocity='风速',
-                           active_power='功率')
-
-    test.run()

+ 0 - 196
tmp_file/baiyushan_20240906.py

@@ -1,196 +0,0 @@
-from multiprocessing import Pool
-from os import *
-
-import chardet
-import pandas as pd
-
-
-# 获取文件编码
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
-        encoding = 'gb18030'
-    return encoding
-
-
-# 读取数据到df
-def read_file_to_df(file_path, read_cols=list(), header=0):
-    df = pd.DataFrame()
-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
-        encoding = detect_file_encoding(file_path)
-        end_with_gz = str(file_path).lower().endswith("gz")
-        if read_cols:
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
-        else:
-
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
-
-    else:
-        xls = pd.ExcelFile(file_path)
-        # 获取所有的sheet名称
-        sheet_names = xls.sheet_names
-        for sheet in sheet_names:
-            if read_cols:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
-            else:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
-
-    return df
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-    # 读取所有文件
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    if is_file_path:
-        path = path.dirname(path)
-
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-def read_status(status_path):
-    all_files = read_excel_files(status_path)
-
-    with Pool(20) as pool:
-        dfs = pool.starmap(read_file_to_df, [(file, ['设备名称', '状态码', '开始时间'], 2) for file in all_files])
-
-    df = pd.concat(dfs)
-    df = df[df['状态码'].isin([3, 5])]
-    df['开始时间'] = pd.to_datetime(df['开始时间'])
-
-    df['处理后时间'] = (df['开始时间'] + pd.Timedelta(minutes=10)).apply(
-        lambda x: f"{x.year}-{str(x.month).zfill(2)}-{str(x.day).zfill(2)} {str(x.hour).zfill(2)}:{x.minute // 10}0:00")
-
-    df['处理后时间'] = pd.to_datetime(df['处理后时间'])
-    df = df[(df['处理后时间'] >= '2023-09-01 00:00:00')]
-    df[df['处理后时间'] >= '2024-09-01 00:00:00'] = '2024-09-01 00:00:00'
-    df.sort_values(by=['设备名称', '处理后时间'], inplace=True)
-
-    return df
-
-
-def read_fault_data(fault_path):
-    all_files = read_excel_files(fault_path)
-
-    with Pool(20) as pool:
-        dfs = pool.starmap(read_file_to_df, [(file, ['设备名称', '故障开始时间'], 2) for file in all_files])
-
-    df = pd.concat(dfs)
-    df = df[df['设备名称'].str.startswith("#")]
-    df['故障开始时间'] = pd.to_datetime(df['故障开始时间'])
-
-    df['处理后故障开始时间'] = (df['故障开始时间'] + pd.Timedelta(minutes=10)).apply(
-        lambda x: f"{x.year}-{str(x.month).zfill(2)}-{str(x.day).zfill(2)} {str(x.hour).zfill(2)}:{x.minute // 10}0:00")
-
-    df['处理后故障开始时间'] = pd.to_datetime(df['处理后故障开始时间'])
-    df = df[(df['处理后故障开始时间'] >= '2023-09-01 00:00:00') & (df['处理后故障开始时间'] < '2024-09-01 00:00:00')]
-    df.sort_values(by=['设备名称', '处理后故障开始时间'], inplace=True)
-
-    return df
-
-
-def read_10min_data(data_path):
-    all_files = read_excel_files(data_path)
-
-    with Pool(20) as pool:
-        dfs = pool.starmap(read_file_to_df,
-                           [(file, ['设备名称', '时间', '平均风速(m/s)', '平均网侧有功功率(kW)'], 1) for file in all_files])
-
-    df = pd.concat(dfs)
-    df['时间'] = pd.to_datetime(df['时间'])
-
-    df = df[(df['时间'] >= '2023-09-01 00:00:00') & (df['时间'] < '2024-09-01 00:00:00')]
-    df.sort_values(by=['设备名称', '时间'], inplace=True)
-    return df
-
-
-def select_data_and_save(name, fault_df, origin_df):
-    df = pd.DataFrame()
-    for i in range(fault_df.shape[0]):
-        fault = fault_df.iloc[i]
-        con1 = origin_df['时间'] >= fault['处理后故障开始时间']
-        con2 = origin_df['时间'] <= fault['结束时间']
-        df = pd.concat([df, origin_df[con1 & con2]])
-
-    name = name.replace('#', 'F')
-    df.drop_duplicates(inplace=True)
-    df.to_csv(save_path + sep + name + '.csv', index=False, encoding='utf8')
-
-
-if __name__ == '__main__':
-    base_path = r'/data/download/白玉山/需要整理的数据'
-    save_path = base_path + sep + 'sele_data_202409261135'
-    create_file_path(save_path)
-    status_df = read_status(base_path + sep + '设备状态')
-    fault_df = read_fault_data(base_path + sep + '故障')
-    data_df = read_10min_data(base_path + sep + '十分钟')
-
-    status_df.to_csv(base_path + sep + '设备状态' + '.csv', index=False, encoding='utf8')
-    fault_df.to_csv(base_path + sep + '故障' + '.csv', index=False, encoding='utf8')
-    data_df.to_csv(base_path + sep + '十分钟' + '.csv', index=False, encoding='utf8')
-
-    print(status_df.shape)
-    print(fault_df.shape)
-    print(data_df.shape)
-
-    fault_list = list()
-    for i in range(fault_df.shape[0]):
-        data = fault_df.iloc[i]
-        con1 = status_df['设备名称'] == data['设备名称']
-        con2 = status_df['处理后时间'] >= data['处理后故障开始时间']
-        fault_list.append(status_df[con1 & con2]['处理后时间'].min())
-    fault_df['结束时间'] = fault_list
-
-    status_df.to_csv(base_path + sep + '设备状态' + '.csv', index=False, encoding='utf8')
-    fault_df.to_csv(base_path + sep + '故障' + '.csv', index=False, encoding='utf8')
-    data_df.to_csv(base_path + sep + '十分钟' + '.csv', index=False, encoding='utf8')
-
-    names = set(fault_df['设备名称'])
-    fault_map = dict()
-    data_map = dict()
-    for name in names:
-        fault_map[name] = fault_df[fault_df['设备名称'] == name]
-        data_map[name] = data_df[data_df['设备名称'] == name]
-
-    with Pool(20) as pool:
-        pool.starmap(select_data_and_save, [(name, fault_map[name], data_map[name]) for name in names])

+ 0 - 48
tmp_file/changing_hebing_guzhang.py

@@ -1,48 +0,0 @@
-import copy
-import datetime
-
-import pandas as pd
-
-read_path = r'D:\data\长清\故障记录_20230420_20240419.csv'
-
-df = pd.read_csv(read_path, encoding='gb18030')
-
-df['风机名'] = df['风机名'].apply(lambda wind_name: 'A' + wind_name.replace('号风机', '').zfill(2))
-
-df = df[~df['状态码描述'].isin(['高偏航误差穿越', '手动偏航'])]
-
-df['激活时间'] = pd.to_datetime(df['激活时间'].apply(lambda x: x[0:x.rfind(":")]), errors='coerce')
-df['复位时间'] = pd.to_datetime(df['复位时间'].apply(lambda x: x[0:x.rfind(":")]), errors='coerce')
-
-df.dropna(subset=['激活时间', '复位时间'], inplace=True)
-
-
-def generate_next_10_min(dt):
-    minute = dt.minute
-    chazhi = 10 - int(minute % 10)
-    now = dt + datetime.timedelta(minutes=chazhi)
-    now = now.replace(second=0, microsecond=0)
-
-    return now
-
-
-df['begin_time'] = df['激活时间'].apply(generate_next_10_min)
-df['end_time'] = df['复位时间'].apply(generate_next_10_min)
-
-df['chazhi_count'] = ((df['end_time'] - df['begin_time']).dt.seconds) // 600 + 1
-
-result_df = df[df['chazhi_count'] == 1]
-
-datas = [[]]
-for index, row in df[df['chazhi_count'] > 1].iterrows():
-    for i in range(row['chazhi_count']):
-        data = copy.deepcopy(row.values)
-        data[6] = data[6] + datetime.timedelta(minutes=10 * i)
-        datas.append(data)
-
-now_df = pd.DataFrame(datas, columns=df.columns)
-result_df = pd.concat([result_df, now_df])
-
-result_df.reset_index(inplace=True, drop=True)
-result_df.sort_values(by=['风机名', '激活时间', 'begin_time'], inplace=True)
-result_df.to_csv("故障记录.csv", encoding='utf8')

+ 0 - 94
tmp_file/cp_online_data_to_other.py

@@ -1,94 +0,0 @@
-import datetime
-import multiprocessing
-import shutil
-from os import *
-
-not_move_dir = ["乌梅山风电场-江西-大唐",
-                "诺木洪风电场-甘肃-华电",
-                "平陆风电场-山西-中广核",
-                "泗洪协合风电场-安徽-深能南控",
-                "诺木洪风电场-青海-华电",
-                "长清风电场-山东-国电"
-                ]
-
-read_dir = r"/data/download/collection_data"
-# read_dir = r'Z:\collection_data'
-save_base_dir = r"/data/download/datang_shangxian"
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        if item not in not_move_dir:
-            item_path = path.join(path, item)
-            if path.isdir(item_path):
-                __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-            elif path.isfile(item_path):
-                if path not in directory_dict:
-                    directory_dict[path] = []
-
-                if filter_types is None or len(filter_types) == 0:
-                    directory_dict[path].append(item_path)
-                elif str(item_path).split(".")[-1] in filter_types:
-                    if str(item_path).count("~$") == 0:
-                        directory_dict[path].append(item_path)
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    if path.isfile(read_path):
-        return [read_path]
-
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 读取路径下所有的文件
-def read_files(read_path):
-    if path.isfile(read_path):
-        return [read_path]
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    """
-    创建路径
-    :param path:创建文件夹的路径
-    :param is_file_path: 传入的path是否包含具体的文件名
-    """
-    if is_file_path:
-        path = path.dirname(path)
-
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-def copy_to_new(from_path):
-    to_path = from_path.replace(read_dir, save_base_dir)
-    is_file = False
-    if to_path.count('.') > 0:
-        is_file = True
-
-    create_file_path(to_path, is_file_path=is_file)
-
-    shutil.copy(from_path, to_path)
-
-
-print("开始:", datetime.datetime.now())
-begin = datetime.datetime.now()
-read_all_files = [i for i in read_files(read_dir) if i.find("收资数据") > -1]
-print(len(read_all_files))
-print("统计耗时:", datetime.datetime.now() - begin)
-cp_begin = datetime.datetime.now()
-
-with multiprocessing.Pool(40) as pool:
-    pool.starmap(copy_to_new, [(path,) for path in read_all_files])
-
-print(len(read_all_files), "耗时:", datetime.datetime.now() - cp_begin, "总耗时:", datetime.datetime.now() - begin)
-print("结束:", datetime.datetime.now())

+ 0 - 47
tmp_file/curge_read.py

@@ -1,47 +0,0 @@
-import os
-
-import chardet
-import pandas as pd
-
-
-# 获取文件编码
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding.lower() in ['utf-8', 'ascii', 'utf8', 'utf-8-sig']:
-        return 'utf-8'
-
-    return 'gb18030'
-
-
-def read_file_to_df(file_path, nrows=None):
-    df = pd.DataFrame()
-    try:
-        if str(file_path).lower().endswith("csv"):
-            encoding = detect_file_encoding(file_path)
-            df = pd.read_csv(file_path, encoding=encoding, on_bad_lines='warn', nrows=nrows)
-        else:
-            xls = pd.ExcelFile(file_path)
-            sheet_names = xls.sheet_names
-            for sheet_name in sheet_names:
-                now_df = pd.read_excel(xls, sheet_name=sheet_name, nrows=nrows)
-                now_df['sheet_name'] = sheet_name
-                df = pd.concat([df, now_df])
-            xls.close()
-    except Exception as e:
-        message = '文件:' + os.path.basename(file_path) + ',' + str(e)
-        raise ValueError(message)
-
-    return df
-
-
-if __name__ == '__main__':
-    df = read_file_to_df(r"D:\data\11-12月.xls")
-    print(df)

+ 0 - 40
tmp_file/error_ms_data.py

@@ -1,40 +0,0 @@
-from datetime import datetime
-
-import pandas as pd
-
-
-def convert_date(date_str):
-    cut_index = str(date_str).rfind("_")
-    date = date_str[0:cut_index].replace("_", "-")
-    time = date_str[cut_index + 1:].replace(":", ".")
-
-    return datetime.strptime(f"{date} {time}", '%Y-%m-%d %H.%M.%S.%f')
-
-
-df = pd.read_csv(r"d:/data/b2_240828_2324_Err 1.csv", header=1)
-df.dropna(subset='TimeStamp', inplace=True)
-df.drop_duplicates(subset='TimeStamp', keep="first", inplace=True)
-
-origin_columns = list(df.columns)
-
-df['TimeStamp1'] = df['TimeStamp'].apply(convert_date)
-df.sort_values(by='TimeStamp1', inplace=True)
-
-# df['DateTime'] = pd.to_datetime(df['TimeStamp'], format="%Y-%m-%d %H:%M:%S")
-df['DateTime'] = df['TimeStamp1'].apply(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
-
-print(df.shape)
-
-dateTime_count = df['DateTime'].value_counts()
-
-dateTime_count_1 = dateTime_count[dateTime_count == 1]
-dateTime_count_gt1 = dateTime_count[dateTime_count > 1]
-
-df1 = df[df['DateTime'].isin(dateTime_count_1.index.values)]
-df2 = df[df['DateTime'].isin(dateTime_count_gt1.index.values)]
-
-print(df1.shape)
-print(df2.shape)
-origin_columns.insert(0, 'DateTime')
-df1.to_csv("1秒数据.csv", encoding='utf-8', index=False, columns=origin_columns, date_format="%Y-%m-%d %H:%M:%S.%f")
-df2.to_csv("毫秒数据.csv", encoding='utf-8', index=False, columns=origin_columns, date_format="%Y-%m-%d %H:%M:%S.%f")

+ 0 - 57
tmp_file/fengxiang_fengdianchang.py

@@ -1,57 +0,0 @@
-import sys
-from multiprocessing import Pool
-from os import path
-path = path.dirname(path.dirname(path.abspath(__file__)))
-print(path)
-sys.path.insert(0, path)
-print(sys.path)
-
-from utils.file.trans_methods import *
-from utils.systeminfo.sysinfo import use_files_get_max_cpu_count
-
-
-def read_and_save_file(filename):
-    try:
-        basename = path.basename(filename)
-        wind_number = basename.split("_")[0]
-        df = read_file_to_df(filename, header=1)
-        df['风机号'] = wind_number
-        df['描述'] = pd.to_datetime(df['描述'], format='%d-%m-%Y %H:%M:%S')
-        df.set_index(keys=['描述', '风机号'], inplace=True)
-        return wind_number, df
-    except Exception as e:
-        print(basename, 'error')
-        raise e
-
-
-if __name__ == '__main__':
-    read_path = r'/data/download/collection_data/1进行中/枫香风电场-贵州-大唐/收资数据/枫香风电场收资表/1.10分钟SCADA数据'
-    save_path = r'/data/download/collection_data/1进行中/枫香风电场-贵州-大唐/清理数据/枫香风电场收资表/1.10分钟SCADA数据'
-    # read_path = r'D:\trans_data\枫香\收资数据\min'
-    # save_path = r'D:\trans_data\枫香\清理数据\min'
-    create_file_path(save_path, False)
-    all_fils = read_excel_files(read_path)
-    process_count = use_files_get_max_cpu_count(all_fils)
-
-    with Pool(process_count) as pool:
-        results = pool.starmap(read_and_save_file, [(i,) for i in all_fils])
-
-    df_dict = dict()
-    for result in results:
-        wind_number, df = result
-        cols = list(df.columns)
-        cols.sort()
-        cols_str = '-'.join(cols)
-        if wind_number in df_dict.keys():
-            if cols_str in df_dict[wind_number].keys():
-                df_dict[wind_number][cols_str] = pd.concat([df_dict[wind_number][cols_str], df], axis=0)
-            else:
-                df_dict[wind_number][cols_str] = df
-        else:
-            df_dict[wind_number] = {cols_str: df}
-
-    for wind_number, cols_dict in df_dict.items():
-        df = pd.concat(cols_dict.values(), axis=1)
-        df.sort_index(inplace=True)
-        df.reset_index(inplace=True)
-        df.to_csv(path.join(save_path, f"{wind_number}.csv"), encoding="utf-8", index=False)

+ 0 - 48
tmp_file/filter_lose_data.py

@@ -1,48 +0,0 @@
-import datetime
-
-import pandas as pd
-
-df = pd.read_csv("D:\data\白玉山后评估数据资料\十分钟.csv", encoding='utf8')
-
-df['时间'] = pd.to_datetime(df['时间'])
-df['plus_10min'] = df['时间'] + pd.Timedelta(minutes=10)
-
-names = set(df['设备名称'])
-
-
-def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
-    """
-    获取俩个时间之间的个数
-    :return: 查询时间间隔
-    """
-    delta = end_time - start_time
-    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
-
-    return abs(int(total_seconds / time_space))
-
-
-result_dict = dict()
-for name in names:
-    q_df = df[df['设备名称'] == name]
-    q_df['unshift'] = q_df['时间'].shift(-1)
-    q_df.fillna('2024-09-01 00:00:00', inplace=True)
-    result_df = q_df[~(q_df['plus_10min'] == q_df['unshift'])]
-    result_df.reset_index(inplace=True)
-    q_list = list()
-    count = 0
-    result_df.to_csv('test.csv', encoding='utf8')
-    for i in range(result_df.shape[0]):
-        data = result_df.iloc[i]
-        begin = data['时间']
-        end = data['unshift']
-        count = count + get_time_space_count(begin, end, 600) - 1
-        # if end is not None and end != np.nan:
-        #     q_list.append(f"{begin} ~ {end}")
-
-    result_dict[name] = count
-
-with open("缺失_数量.csv", 'w', encoding='utf8') as f:
-    for k, v in result_dict.items():
-        # v.insert(0, k)
-        # f.write(",".join(v) + "\n")
-        f.write(f"{k},{v}\n")

+ 0 - 205
tmp_file/gradio_web.py

@@ -1,205 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Time    : 2024/6/3
-# @Author  : 魏志亮
-import copy
-
-import gradio as gr
-import yaml
-
-from service.plt_service import get_all_wind_company
-from service.trans_service import get_min_sec_conf_test
-
-
-# from utils.db.trans_mysql import *
-
-
-def test_click(wind_name, wind_full_name, type, is_vertical_table, merge_columns, vertical_read_cols,
-               vertical_index_cols, vertical_col_key, vertical_col_value, resolve_col_prefix, wind_name_exec,
-               wind_turbine_number, time_stamp, active_power, rotor_speed, generator_speed, wind_velocity,
-               pitch_angle_blade_1, pitch_angle_blade_2, pitch_angle_blade_3, cabin_position, true_wind_direction,
-               yaw_error1, set_value_of_active_power, gearbox_oil_temperature, generatordrive_end_bearing_temperature,
-               generatornon_drive_end_bearing_temperature, wind_turbine_status, wind_turbine_status2, cabin_temperature,
-               twisted_cable_angle, front_back_vibration_of_the_cabin, side_to_side_vibration_of_the_cabin,
-               actual_torque, given_torque, clockwise_yaw_count, counterclockwise_yaw_count, unusable,
-               power_curve_available, required_gearbox_speed, inverter_speed_master_control, outside_cabin_temperature,
-               main_bearing_temperature, gearbox_high_speed_shaft_bearing_temperature,
-               gearboxmedium_speed_shaftbearing_temperature, gearbox_low_speed_shaft_bearing_temperature,
-               generator_winding1_temperature, generator_winding2_temperature, generator_winding3_temperature,
-               turbulence_intensity, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10
-               ):
-    params = copy.deepcopy(vars())
-
-    error_message = ""
-    if wind_name is None or wind_name.strip() == '':
-        error_message += "风机名称必选"
-        gr.Warning(error_message)
-        return error_message
-
-    if wind_full_name is None or wind_full_name.strip() == '':
-        error_message += "风机全称必选"
-        gr.Warning(error_message)
-        return error_message
-
-    # save_to_trans_conf(params)
-    return yaml.dump(vars(), allow_unicode=True, sort_keys=False)
-
-
-def fill_data(wind_name, type):
-    select_cols = ['wind_full_name', 'is_vertical_table', 'merge_columns', 'vertical_read_cols',
-                   'vertical_index_cols', 'vertical_col_key', 'vertical_col_value', 'resolve_col_prefix',
-                   'wind_name_exec',
-                   'wind_turbine_number', 'time_stamp', 'active_power', 'rotor_speed', 'generator_speed',
-                   'wind_velocity', 'pitch_angle_blade_1', 'pitch_angle_blade_2', 'pitch_angle_blade_3',
-                   'cabin_position', 'true_wind_direction', 'yaw_error1', 'set_value_of_active_power',
-                   'gearbox_oil_temperature', 'generatordrive_end_bearing_temperature',
-                   'generatornon_drive_end_bearing_temperature', 'wind_turbine_status', 'wind_turbine_status2',
-                   'cabin_temperature', 'twisted_cable_angle', 'front_back_vibration_of_the_cabin',
-                   'side_to_side_vibration_of_the_cabin', 'actual_torque', 'given_torque', 'clockwise_yaw_count',
-                   'counterclockwise_yaw_count', 'unusable', 'power_curve_available', 'required_gearbox_speed',
-                   'inverter_speed_master_control', 'outside_cabin_temperature', 'main_bearing_temperature',
-                   'gearbox_high_speed_shaft_bearing_temperature', 'gearboxmedium_speed_shaftbearing_temperature',
-                   'gearbox_low_speed_shaft_bearing_temperature', 'generator_winding1_temperature',
-                   'generator_winding2_temperature', 'generator_winding3_temperature', 'turbulence_intensity', 'param1',
-                   'param2', 'param3', 'param4', 'param5', 'param6', 'param7', 'param8', 'param9', 'param10']
-    print(wind_name, type)
-    df = get_min_sec_conf_test(wind_name, type)
-    print(df)
-    if df.keys() == 0:
-        return [''] * len(select_cols)
-    result = []
-    for col in select_cols:
-        result.append(df[col])
-    return result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7], \
-        result[8], result[9], \
-        result[10], result[11], result[12], result[13], result[14], result[15], result[16], result[17], result[18], \
-        result[19], result[20], result[21], result[22], result[23], result[24], result[25], result[26], result[27], \
-        result[28], result[29], result[30], result[31], result[32], result[33], result[34], result[35], result[36], \
-        result[37], result[38], result[39], result[40], result[41], result[42], result[43], result[44], result[45], \
-        result[46], result[47], result[48], result[49], result[50], result[51], result[52], result[53], result[54], \
-        result[55], result[56], result[57]
-
-
-with gr.Blocks(css=".container.svelte-1sk0pyu.svelte-1sk0pyu {width: 300px}", title='中能智能') as demo:
-    wind_name = gr.Dropdown(label="电场名称", choices=get_all_wind_company())
-
-    types = {
-        '分钟映射': 'minute', '秒映射': 'second'
-    }
-
-    for name in types.keys():
-        with gr.Tab(label=name):
-            type = gr.Text(label="映射类型", value=types[name], visible=False)
-            wind_full_name = gr.Textbox(label="完整的电场名称")
-            merge_columns = gr.Checkbox(label="是否需合并(多个excel列合并成一个才需要选择)", value=False)
-            is_vertical_table = gr.Checkbox(label="是否是竖表", value=False)
-            vertical_read_cols = gr.Textbox(label="竖表--读取的字段", placeholder="逗号分隔")
-            vertical_index_cols = gr.Textbox(label="竖表--分组的字段", placeholder="逗号分隔,一般都是时间,机组")
-            vertical_col_key = gr.Textbox(label="竖表--数据点字段")
-            vertical_col_value = gr.Textbox(label="竖表--数据点数值")
-            resolve_col_prefix = gr.Textbox(label="处理列名",
-                                            placeholder="比如重庆海装 25_#桨距角,只需要 桨距角 可以用 column[column.find('#')+1:]")
-
-            wind_name_exec = gr.Textbox(label="风机编号代码处理",
-                                        placeholder="比如 昌平001号风机,可以配置 wind_name.replace('昌平','').replace('号风机','')")
-
-            wind_turbine_number = gr.Textbox(label="风机编号(wind_turbine_number)")
-            time_stamp = gr.Textbox(label="时间戳(time_stamp)")
-            active_power = gr.Textbox(label="有功功率(active_power)")
-            rotor_speed = gr.Textbox(label="风轮转速(rotor_speed)")
-            generator_speed = gr.Textbox(label="发电机转速(generator_speed)")
-            wind_velocity = gr.Textbox(label="风速(wind_velocity)")
-            pitch_angle_blade_1 = gr.Textbox(label="桨距角1(pitch_angle_blade_1)")
-            pitch_angle_blade_2 = gr.Textbox(label="桨距角2(pitch_angle_blade_2)")
-            pitch_angle_blade_3 = gr.Textbox(label="桨距角3(pitch_angle_blade_3)")
-            cabin_position = gr.Textbox(label="机舱位置(cabin_position)")
-            true_wind_direction = gr.Textbox(label="绝对风向(true_wind_direction)")
-            yaw_error1 = gr.Textbox(label="对风角度(yaw_error1)")
-            set_value_of_active_power = gr.Textbox(label="有功功率设定值(set_value_of_active_power)")
-            gearbox_oil_temperature = gr.Textbox(label="齿轮箱油温(gearbox_oil_temperature)")
-            generatordrive_end_bearing_temperature = gr.Textbox(
-                label="发电机驱动端轴承温度(generatordrive_end_bearing_temperature)")
-            generatornon_drive_end_bearing_temperature = gr.Textbox(
-                label="发电机非驱动端轴承温度(generatornon_drive_end_bearing_temperature)")
-            wind_turbine_status = gr.Textbox(label="风机状态1(wind_turbine_status)")
-            wind_turbine_status2 = gr.Textbox(label="风机状态2(wind_turbine_status2)")
-            cabin_temperature = gr.Textbox(label="机舱内温度(cabin_temperature)")
-            twisted_cable_angle = gr.Textbox(label="扭缆角度(twisted_cable_angle)")
-            front_back_vibration_of_the_cabin = gr.Textbox(label="机舱前后振动(front_back_vibration_of_the_cabin)")
-            side_to_side_vibration_of_the_cabin = gr.Textbox(label="机舱左右振动(side_to_side_vibration_of_the_cabin)")
-            actual_torque = gr.Textbox(label="实际力矩(actual_torque)")
-            given_torque = gr.Textbox(label="给定力矩(given_torque)")
-            clockwise_yaw_count = gr.Textbox(label="顺时针偏航次数(clockwise_yaw_count)")
-            counterclockwise_yaw_count = gr.Textbox(label="逆时针偏航次数(counterclockwise_yaw_count)")
-            unusable = gr.Textbox(label="不可利用(unusable)")
-            power_curve_available = gr.Textbox(label="功率曲线可用(power_curve_available)")
-            required_gearbox_speed = gr.Textbox(label="齿轮箱转速(required_gearbox_speed)")
-            inverter_speed_master_control = gr.Textbox(label="变频器转速(主控)(inverter_speed_master_control)")
-            outside_cabin_temperature = gr.Textbox(label="环境温度(outside_cabin_temperature)")
-            main_bearing_temperature = gr.Textbox(label="主轴承轴承温度(main_bearing_temperature)")
-            gearbox_high_speed_shaft_bearing_temperature = gr.Textbox(
-                label="齿轮箱高速轴轴承温度(gearbox_high_speed_shaft_bearing_temperature)")
-            gearboxmedium_speed_shaftbearing_temperature = gr.Textbox(
-                label="齿轮箱中速轴轴承温度(gearboxmedium_speed_shaftbearing_temperature)")
-            gearbox_low_speed_shaft_bearing_temperature = gr.Textbox(
-                label="齿轮箱低速轴轴承温度(gearbox_low_speed_shaft_bearing_temperature)")
-            generator_winding1_temperature = gr.Textbox(label="发电机绕组1温度(generator_winding1_temperature)")
-            generator_winding2_temperature = gr.Textbox(label="发电机绕组2温度(generator_winding2_temperature)")
-            generator_winding3_temperature = gr.Textbox(label="发电机绕组3温度(generator_winding3_temperature)")
-            turbulence_intensity = gr.Textbox(label="湍流强度(turbulence_intensity)")
-            param1 = gr.Textbox(label="齿轮箱油压(param1)")
-            param2 = gr.Textbox(label="预留字段2(param2)")
-            param3 = gr.Textbox(label="预留字段3(param3)")
-            param4 = gr.Textbox(label="预留字段4(param4)")
-            param5 = gr.Textbox(label="预留字段5(param5)")
-            param6 = gr.Textbox(label="预留字段6(param6)")
-            param7 = gr.Textbox(label="预留字段7(param7)")
-            param8 = gr.Textbox(label="预留字段8(param8)")
-            param9 = gr.Textbox(label="预留字段9(param9)")
-            param10 = gr.Textbox(label="预留字段10(param10)")
-
-            button = gr.Button(value="提交")
-            result = gr.Textbox(label="结果")
-
-            button.click(fn=test_click,
-                         inputs=[wind_name, wind_full_name, type, is_vertical_table, merge_columns, vertical_read_cols,
-                                 vertical_index_cols, vertical_col_key, vertical_col_value, resolve_col_prefix,
-                                 wind_name_exec, wind_turbine_number, time_stamp, active_power, rotor_speed,
-                                 generator_speed, wind_velocity, pitch_angle_blade_1, pitch_angle_blade_2,
-                                 pitch_angle_blade_3, cabin_position, true_wind_direction, yaw_error1,
-                                 set_value_of_active_power, gearbox_oil_temperature,
-                                 generatordrive_end_bearing_temperature, generatornon_drive_end_bearing_temperature,
-                                 wind_turbine_status, wind_turbine_status2, cabin_temperature, twisted_cable_angle,
-                                 front_back_vibration_of_the_cabin, side_to_side_vibration_of_the_cabin, actual_torque,
-                                 given_torque, clockwise_yaw_count, counterclockwise_yaw_count, unusable,
-                                 power_curve_available, required_gearbox_speed, inverter_speed_master_control,
-                                 outside_cabin_temperature, main_bearing_temperature,
-                                 gearbox_high_speed_shaft_bearing_temperature,
-                                 gearboxmedium_speed_shaftbearing_temperature,
-                                 gearbox_low_speed_shaft_bearing_temperature, generator_winding1_temperature,
-                                 generator_winding2_temperature, generator_winding3_temperature, turbulence_intensity,
-                                 param1, param2, param3, param4, param5, param6, param7, param8, param9, param10
-                                 ], outputs=[result])
-            wind_name.change(fill_data, inputs=[wind_name, type],
-                             outputs=[wind_full_name, is_vertical_table, merge_columns, vertical_read_cols,
-                                      vertical_index_cols, vertical_col_key, vertical_col_value, resolve_col_prefix,
-                                      wind_name_exec, wind_turbine_number, time_stamp, active_power, rotor_speed,
-                                      generator_speed, wind_velocity, pitch_angle_blade_1, pitch_angle_blade_2,
-                                      pitch_angle_blade_3, cabin_position, true_wind_direction, yaw_error1,
-                                      set_value_of_active_power, gearbox_oil_temperature,
-                                      generatordrive_end_bearing_temperature,
-                                      generatornon_drive_end_bearing_temperature,
-                                      wind_turbine_status, wind_turbine_status2, cabin_temperature, twisted_cable_angle,
-                                      front_back_vibration_of_the_cabin, side_to_side_vibration_of_the_cabin,
-                                      actual_torque,
-                                      given_torque, clockwise_yaw_count, counterclockwise_yaw_count, unusable,
-                                      power_curve_available, required_gearbox_speed, inverter_speed_master_control,
-                                      outside_cabin_temperature, main_bearing_temperature,
-                                      gearbox_high_speed_shaft_bearing_temperature,
-                                      gearboxmedium_speed_shaftbearing_temperature,
-                                      gearbox_low_speed_shaft_bearing_temperature, generator_winding1_temperature,
-                                      generator_winding2_temperature, generator_winding3_temperature,
-                                      turbulence_intensity,
-                                      param1, param2, param3, param4, param5, param6, param7, param8, param9, param10])
-
-if __name__ == "__main__":
-    demo.launch(server_name='0.0.0.0', server_port=7860, auth=('znzn', "znzn123"))

+ 0 - 28
tmp_file/hebing_matlib_result.py

@@ -1,28 +0,0 @@
-from os import *
-
-import pandas as pd
-
-read_path = r"D:\data\电量损失及散点图"
-df = pd.DataFrame()
-
-cols = ['风机', '应发电量', '实发电量', '停机损失电量', '坏点+限电损失电量', '性能损失电量', '坏点损失电量', '限电损失电量', '超发电量', '应发电量百分比', '实发电量百分比',
-        '停机损失电量百分比', '坏点+限电损失电量百分比', '性能损失电量百分比', '坏点损失电量百分比', '限电损失电量百分比', '超发电量百分比', '平均风速', '可利用率']
-
-for root, dir, files in walk(read_path):
-    if files:
-        base_name = path.basename(root)
-        wind_df = pd.DataFrame()
-        print(root)
-        df1 = pd.read_excel(path.join(root, "EPPer.xls"), usecols=['应发电量百分比', '实发电量百分比',
-                                                                     '停机损失电量百分比', '坏点+限电损失电量百分比', '性能损失电量百分比',
-                                                                     '坏点损失电量百分比',
-                                                                     '限电损失电量百分比', '超发电量百分比', '平均风速', '可利用率'])
-        df2 = pd.read_excel(path.join(root, "EPKW.xls"),
-                            usecols=['应发电量', '实发电量', '停机损失电量', '坏点+限电损失电量', '性能损失电量', '坏点损失电量', '限电损失电量', '超发电量'])
-        wind_df = pd.concat([df1, df2], axis=1)
-        wind_df['风机'] = base_name
-        wind_df.reset_index(inplace=True)
-        print(wind_df.columns)
-        df = pd.concat([df, wind_df], ignore_index=True)
-
-df.to_csv("合并结果.csv", index=False, encoding='utf8', columns=cols)

+ 0 - 77
tmp_file/hebing_muti_batch.py

@@ -1,77 +0,0 @@
-import multiprocessing
-import sys
-from os import *
-
-import pandas as pd
-
-sys.path.insert(0, path.abspath(__file__).split("tmp_file")[0])
-
-
-def hebing_and_save(new_batch_save_path, name, paths):
-    df = pd.DataFrame()
-    for path in paths:
-        now_df = read_file_to_df(path)
-        df = pd.concat([df, now_df])
-
-    df.sort_values(by=['time_stamp'], inplace=True)
-
-    create_file_path(new_batch_save_path)
-    df.to_csv(path.join(new_batch_save_path, name), index=False, encoding='utf8')
-
-
-if __name__ == '__main__':
-
-    env = 'prod'
-    if len(sys.argv) >= 2:
-        env = sys.argv[1]
-
-    from utils.conf.read_conf import yaml_conf
-
-    conf_path = path.abspath(__file__).split("tmp_file")[0] + f"/conf/etl_config_{env}.yaml"
-    environ['ETL_CONF'] = conf_path
-    yaml_config = yaml_conf(conf_path)
-    environ['env'] = env
-
-    from utils.file.trans_methods import read_file_to_df, create_file_path
-
-    from etl.wind_power.fault_warn.FaultWarnTrans import FaultWarnTrans
-    from etl.wind_power.min_sec.MinSecTrans import MinSecTrans
-    from service.plt_service import get_hebing_data_by_batch_no_and_type
-
-    save_batch = 'WOF085500008-2-3'
-    save_batch_name = '合并'
-    trans_type = 'second'
-    read_batchs = ['WOF085500008-WOB000003', 'WOF085500008-WOB000002']
-    read_paths = list()
-
-    new_batch_save_path = ''
-
-    for read_data in read_batchs:
-        data = get_hebing_data_by_batch_no_and_type(read_data, trans_type)
-        save_db = True
-
-        exec_process = None
-        if data['transfer_type'] in ['second', 'minute']:
-            exec_process = MinSecTrans(data=data, save_db=save_db)
-
-        if data['transfer_type'] in ['fault', 'warn']:
-            exec_process = FaultWarnTrans(data=data, save_db=save_db)
-
-        if exec_process is None:
-            raise Exception("No exec process")
-
-        read_paths.append(exec_process.pathsAndTable.get_save_path())
-        new_batch_save_path = path.join(exec_process.pathsAndTable.save_path, save_batch + "_" + save_batch_name,
-                                           trans_type)
-
-    file_dict = dict()
-
-    for read_path in read_paths:
-        for file in listdir(read_path):
-            if file in file_dict:
-                file_dict[file].append(path.join(read_path, file))
-            else:
-                file_dict[file] = [path.join(read_path, file)]
-
-    with multiprocessing.Pool(len(file_dict.keys())) as pool:
-        pool.starmap(hebing_and_save, [(new_batch_save_path, name, paths) for name, paths in file_dict.items()])

+ 0 - 173
tmp_file/organize_xinhua_files.py

@@ -1,173 +0,0 @@
-import datetime
-import multiprocessing
-import warnings
-from os import *
-
-import numpy as np
-import pandas as pd
-
-warnings.filterwarnings("ignore")
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    if path.isfile(read_path):
-        return [read_path]
-
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    """
-    创建路径
-    :param path:创建文件夹的路径
-    :param is_file_path: 传入的path是否包含具体的文件名
-    """
-    if is_file_path:
-        path = path.dirname(path)
-
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-def boolean_is_check_data(df_cols):
-    fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机', '工作模式', '风机自身故障停机', '限功率运行状态']
-
-    df_cols = [str(i).split('_')[-1] for i in df_cols]
-    for fault in fault_list:
-        if fault in df_cols:
-            return True
-
-    return False
-
-
-def read_fle_to_df(file_path):
-    df = pd.read_excel(file_path)
-    wind_name = [i for i in df.columns if i.find('_') > -1][0].split('_')[0]
-    df.columns = [i.split('_')[-1] for i in df.columns]
-    df['wind_name'] = wind_name
-
-    return boolean_is_check_data(df.columns), wind_name, df
-
-
-def save_to_file(dfs, wind_name, save_path='', param='', is_check=False, all_cols=list(),
-                 result_data_list=multiprocessing.Manager().list()):
-    try:
-        if is_check:
-            df = pd.concat(dfs)
-        else:
-            df = dfs[0]
-            for index, now_df in enumerate(dfs):
-                if index > 0:
-                    df = pd.merge(df, now_df, on=['采样时间', 'wind_name'], how='outer')
-    except Exception as e:
-        print(wind_name, e)
-        raise e
-
-    df.reset_index(inplace=True)
-    df.drop_duplicates(inplace=True, subset=['采样时间', 'wind_name'])
-    if 'index' in df.columns:
-        del df['index']
-    create_file_path(save_path)
-    df.sort_values(by='采样时间', inplace=True)
-
-    loss_cols = list([i for i in df.columns if i != 'wind_name'])
-    loss_cols.sort()
-
-    loss_cols.insert(0, wind_name)
-    loss_cols.insert(0, path.basename(save_path) + '-' + param)
-
-    result_data_list.append(loss_cols)
-
-    for col in set(all_cols):
-        if col not in df.columns:
-            df[col] = np.nan
-
-    df.to_csv(path.join(save_path, param, wind_name + '.csv'), encoding='utf8', index=False)
-
-
-if __name__ == '__main__':
-    begin = datetime.datetime.now()
-    # dir1 = r'D:\data\新华水电\测试'
-    # save_path = r'D:\data\新华水电\整理数据'
-    result_datas = [
-        (r'/data/download/collection_data/1进行中/新华水电/风机SCADA数据/8月风机数据',
-         r'/data/download/collection_data/1进行中/新华水电/整理数据/8月'),
-        (r'/data/download/collection_data/1进行中/新华水电/风机SCADA数据/9月风机数据',
-         r'/data/download/collection_data/1进行中/新华水电/整理数据/9月')
-    ]
-
-    result_data_list = multiprocessing.Manager().list()
-
-    for dir1, save_path in result_datas:
-        files = read_excel_files(dir1)
-        with multiprocessing.Pool(30) as pool:
-            datas = pool.starmap(read_fle_to_df, [(file,) for file in files])
-        data_wind_name = dict()
-        check_wind_name = dict()
-
-        data_all_cols = list()
-        check_all_cols = list()
-        for data in datas:
-            check_data, wind_name, df = data[0], data[1], data[2]
-
-            if '工作模式' not in df.columns:
-                # df.reset_index(inplace=True)
-                # df.set_index(keys=['采样时间'], inplace=True)
-                if check_data:
-                    check_all_cols.extend(list(df.columns))
-                    if wind_name in check_wind_name.keys():
-                        check_wind_name[wind_name].append(df)
-                    else:
-                        check_wind_name[wind_name] = [df]
-                else:
-                    data_all_cols.extend(list(df.columns))
-                    if wind_name in data_wind_name.keys():
-                        data_wind_name[wind_name].append(df)
-                    else:
-                        data_wind_name[wind_name] = [df]
-
-        # with multiprocessing.Pool(30) as pool:
-        #     pool.starmap(combine_df,
-        #                  [(dfs, wind_name, save_path, "事件数据", True, check_all_cols, result_data_list) for wind_name, dfs
-        #                   in
-        #                   check_wind_name.items()])
-
-        with multiprocessing.Pool(30) as pool:
-            pool.starmap(save_to_file,
-                         [(dfs, wind_name, save_path, "数据", False, data_all_cols, result_data_list) for wind_name, dfs
-                          in
-                          data_wind_name.items()])
-
-        print(datetime.datetime.now() - begin)
-
-    normal_list = list(result_data_list)
-    normal_list.sort(key=lambda x: (x[0], int(x[1][2:])))
-
-    with open('loss_col.csv', 'w', encoding='utf8') as f:
-        for datas in normal_list:
-            f.write(",".join(datas))
-            f.write('\n')
-
-    print(datetime.datetime.now() - begin)

+ 0 - 205
tmp_file/organize_xinhua_files_data.py

@@ -1,205 +0,0 @@
-import datetime
-import multiprocessing
-import warnings
-from os import *
-
-import pandas as pd
-
-warnings.filterwarnings("ignore")
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    if path.isfile(read_path):
-        return [read_path]
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    """
-    创建路径
-    :param path:创建文件夹的路径
-    :param is_file_path: 传入的path是否包含具体的文件名
-    """
-    if is_file_path:
-        path = path.dirname(path)
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-def boolean_is_check_data(df_cols, need_valid=True):
-    if not need_valid:
-        return True
-    fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机', '风机自身故障停机', '限功率运行状态']
-    df_cols = [str(i).split('_')[-1] for i in df_cols]
-    for fault in fault_list:
-        if fault in df_cols:
-            return True
-    return False
-
-
-def read_fle_to_df(file_path):
-    df = pd.read_excel(file_path)
-    wind_name = [i for i in df.columns if i.find('_') > -1][0].split('_')[0]
-    df.columns = [i.split('_')[-1] for i in df.columns]
-    df['wind_name'] = wind_name
-    df['采样时间'] = pd.to_datetime(df['采样时间'])
-    df['采样时间'] = df['采样时间'].dt.ceil('T')
-    return boolean_is_check_data(df.columns, file_path.find('批次') > -1), wind_name, df
-
-
-def read_guzhangbaojing(file_path):
-    try:
-        df = pd.read_excel(file_path)
-        df.rename(columns={'风机名': 'wind_name'}, inplace=True)
-        df['采样时间'] = pd.to_datetime(df['采样时间'])
-        df['采样时间'] = df['采样时间'].dt.ceil('T')
-        df = df[(df['采样时间'] >= '2024-08-01 00:00:00') & (df['采样时间'] < '2024-10-01 00:00:00')]
-        return df
-    except Exception as e:
-        print(file_path, e)
-        raise e
-
-
-def combine_df(dfs, wind_name, save_path=''):
-    print(wind_name)
-    cols = list()
-    col_map = dict()
-    try:
-        df = dfs[0]
-        cols.extend(df.columns)
-        for index, now_df in enumerate(dfs):
-            if index > 0:
-                for col in now_df.columns:
-                    if col in cols and col not in ['采样时间', 'wind_name']:
-                        if col in col_map.keys():
-                            count = col_map[col]
-                            col_map[col] = count + 1
-                        else:
-                            count = 1
-                            col_map[col] = 1
-                        now_df.rename(columns={col: col + '__' + str(count)}, inplace=True)
-                df = pd.merge(df, now_df, on=['采样时间', 'wind_name'], how='outer')
-                cols.extend(now_df.columns)
-    except Exception as e:
-        print(wind_name, e)
-        raise e
-    df.reset_index(inplace=True)
-    df.drop_duplicates(inplace=True, subset=['采样时间', 'wind_name'])
-    if 'index' in df.columns:
-        del df['index']
-    create_file_path(save_path)
-    df.sort_values(by='采样时间', inplace=True)
-    df.set_index(keys=['采样时间', 'wind_name'], inplace=True)
-    return wind_name, df
-
-
-def sae_to_csv(wind_name, df):
-    try:
-        col_tuples = [(col.split('__')[0], col) for col in df.columns if col.find('__') > -1]
-        col_dict = dict()
-        for origin, col in col_tuples:
-            if origin in col_dict.keys():
-                col_dict[origin].add(col)
-            else:
-                col_dict[origin] = {col}
-
-        for origin, cols in col_dict.items():
-            print(wind_name, origin, cols)
-            if pd.api.types.is_numeric_dtype(df[origin]):
-                df[origin] = df[list(cols)].max(axis=1)
-            else:
-                df[origin] = df[list(cols)].apply(lambda x: [i for i in x.values if i][0], axis=1)
-            for col in cols:
-                if col != origin:
-                    del df[col]
-
-        df.to_csv(path.join(save_path, wind_name + '.csv'), encoding='utf8')
-
-    except Exception as e:
-        print(wind_name, df.columns)
-        raise e
-
-
-if __name__ == '__main__':
-    begin = datetime.datetime.now()
-
-    base_path = r'/data/download/collection_data/1进行中/新华水电/收资数据/风机SCADA数据'
-
-    dir1 = base_path + r'/data'
-    dir2 = base_path + r'/故障报警/汇能机组数据-故障'
-    dir3 = base_path + r'/故障报警/报警'
-    save_path = r'/data/download/collection_data/1进行中/新华水电/清理数据/合并批次1-2故障报警'
-
-    create_file_path(save_path)
-
-    # result_datas = [
-    #     (r'/data/download/collection_data/1进行中/新华水电/风机SCADA数据',
-    #      r'/data/download/collection_data/1进行中/新华水电/整理数据/批次1-2合并'),
-    # ]
-
-    data_wind_name = dict()
-    files = read_excel_files(dir1)
-    with multiprocessing.Pool(30) as pool:
-        datas = pool.starmap(read_fle_to_df, [(file,) for file in files])
-    for data in datas:
-        check_data, wind_name, df = data[0], data[1], data[2]
-        if wind_name in data_wind_name.keys():
-            data_wind_name[wind_name].append(df)
-        else:
-            data_wind_name[wind_name] = [df]
-
-    with multiprocessing.Pool(30) as pool:
-        data_dfs = pool.starmap(combine_df,
-                                [(dfs, wind_name, save_path) for wind_name, dfs
-                                 in
-                                 data_wind_name.items()])
-
-    result_data_dict = dict()
-    for wind_name, df in data_dfs:
-        result_data_dict[wind_name] = df
-
-    for dir4 in [dir2, dir3]:
-        guzhang_files = read_excel_files(dir4)
-        with multiprocessing.Pool(30) as pool:
-            guzhang_datas = pool.starmap(read_guzhangbaojing, [(file,) for file in guzhang_files])
-        guzhang_df = pd.DataFrame()
-        for df in guzhang_datas:
-            if not df.empty:
-                guzhang_df = pd.concat([guzhang_df, df])
-        wind_names = set(list(guzhang_df['wind_name'].values))
-        for wind_name in wind_names:
-            now_df = guzhang_df[guzhang_df['wind_name'] == wind_name]
-            if wind_name in result_data_dict.keys():
-                now_df.reset_index(inplace=True)
-                now_df.drop_duplicates(inplace=True, subset=['采样时间', 'wind_name'])
-                if 'index' in now_df.columns:
-                    del now_df['index']
-                now_df.sort_values(by='采样时间', inplace=True)
-                now_df.set_index(keys=['采样时间', 'wind_name'], inplace=True)
-                res_df = result_data_dict[wind_name]
-                result_data_dict[wind_name] = pd.concat([res_df, now_df], axis=1)
-
-    with multiprocessing.Pool(30) as pool:
-        pool.starmap(sae_to_csv, [(wind_name, df) for wind_name, df in result_data_dict.items()])
-
-    print(datetime.datetime.now() - begin)

+ 0 - 97
tmp_file/orgranize_hongyang.py

@@ -1,97 +0,0 @@
-import copy
-import multiprocessing
-import warnings
-from os import *
-
-import chardet
-import pandas as pd
-
-warnings.filterwarnings("ignore")
-
-# read_path = r'/home/wzl/test_data/红阳'
-# save_dir = r'/home/wzl/test_data/整理'
-
-read_path = r'D:\data\红阳\红阳秒级分测点\红阳'
-save_dir = r'D:\data\红阳\红阳秒级分测点\整理'
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    if path.isfile(read_path):
-        return [read_path]
-
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-all_files = read_excel_files(read_path)
-
-
-# 获取文件编码
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding.lower() in ['utf-8', 'ascii', 'utf8']:
-        return 'utf-8'
-
-    return 'gb18030'
-
-
-def read_and_organize(file):
-    df = pd.read_csv(file, encoding=detect_file_encoding(file))
-    return file, df
-
-
-if __name__ == '__main__':
-
-    with multiprocessing.Pool(10) as pool:
-        bak_datas = pool.starmap(read_and_organize, [(i,) for i in all_files])
-
-    datas = copy.deepcopy(bak_datas)
-    wind_name_df = dict()
-    for file, df in datas:
-        all_cols = [i for i in df.columns if i.find('#') > -1]
-        col = all_cols[0]
-        cedian = str(col).split("_")[-1]
-        wind_names = set([str(i).split("#")[0].replace("红阳风电场_", "") for i in all_cols])
-
-        print(file, df.columns)
-        for wind_name in wind_names:
-            cols = [i for i in all_cols if i.find('_' + wind_name) > -1]
-            cols.insert(0, '统计时间')
-            query_df = df[cols]
-            query_df.columns = [str(i).split('_')[-1] for i in query_df.columns]
-            query_df['风机编号'] = wind_name
-            if wind_name in wind_name_df.keys():
-                now_df = wind_name_df[wind_name]
-                wind_name_df[wind_name] = pd.merge(now_df, query_df, on=['统计时间', '风机编号'], how='outer')
-            else:
-                wind_name_df[wind_name] = query_df
-
-    for wind_name, df in wind_name_df.items():
-        df.to_csv(path.join(save_dir, wind_name + '#.csv'), index=False, encoding='utf8')

+ 0 - 91
tmp_file/power_derating.py

@@ -1,91 +0,0 @@
-import multiprocessing
-from os import *
-
-import matplotlib
-
-matplotlib.use('Agg')
-matplotlib.rcParams['font.family'] = 'SimHei'
-matplotlib.rcParams['font.sans-serif'] = ['SimHei']
-
-import numpy as np
-from matplotlib import pyplot as plt
-
-from utils.file.trans_methods import read_file_to_df
-from utils.file.trans_methods import read_excel_files
-import pandas as pd
-
-
-def select_data(file, curve_wv, curve_ap, save_path):
-    name = path.basename(file).split("@")[0]
-    try:
-        df = read_file_to_df(file)
-        df.dropna(subset=['有功功率 kW均值', '风速 m/s均值', '有功功率设定 kW均值'], inplace=True)
-        ap_gt_0_df = df[df['有功功率 kW均值'] > 0]
-        ap_le_0_df = df[df['有功功率 kW均值'] <= 0]
-        ap_le_0_df["marker"] = -1
-
-        ap = ap_gt_0_df['有功功率 kW均值'].values
-        wv = ap_gt_0_df['风速 m/s均值'].values
-        ap_set = ap_gt_0_df['有功功率设定 kW均值'].values
-
-        ap_gt_0_in = [0] * ap_gt_0_df.shape[0]
-
-        for i in range(len(ap_set)):
-            wind_speed = wv[i]
-            active_power = ap[i]
-            active_power_set = ap_set[i]
-
-            if active_power >= 2200 - 200:
-                ap_gt_0_in[i] = 1
-            else:
-                diffs = np.abs(curve_wv - wind_speed)
-                # 找到差值最小的索引和对应的差值
-                minDiff, idx = np.min(diffs), np.argmin(diffs)
-
-                # 使用找到的索引获取对应的值
-                closestValue = curve_ap[idx]
-                if active_power - closestValue >= -100:
-                    ap_gt_0_in[i] = 1
-
-        ap_gt_0_df['marker'] = ap_gt_0_in
-        df = pd.concat([ap_gt_0_df, ap_le_0_df])
-
-        df.to_csv(path.join(save_path, name + '.csv'), index=False, encoding='utf-8')
-
-        df = df[['时间', '风速 m/s均值', '有功功率 kW均值', '有功功率设定 kW均值', 'marker']]
-
-        df = df[df['marker'] == 1]
-
-        x = df['风速 m/s均值'].values
-        y = df['有功功率 kW均值'].values
-        # 使用scatter函数绘制散点图
-        if not df.empty:
-            plt.scatter(x, y, s=10, c='blue')
-
-            # 添加标题和坐标轴标签
-            plt.title(name)
-            plt.xlabel('风速均值')
-            plt.ylabel('有功功率均值')
-
-            # 保存
-            plt.savefig(path.join(save_path, name + '均值.png'))
-
-    except Exception as e:
-        print(path.basename(file), "出错", str(e))
-        raise e
-
-
-if __name__ == '__main__':
-    wind_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\标记derating\PV_Curve.csv")
-    curve_wv = wind_power_df["风速"].values
-    curve_ap = wind_power_df["功率"].values
-
-    all_files = read_excel_files(r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666")
-    save_path = r"D:\trans_data\诺木洪\清理数据\min-666-derating"
-
-    # save_path = r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666-marker"
-
-    # for file in all_files:
-
-    with multiprocessing.Pool(10) as pool:
-        pool.starmap(select_data, [(i, curve_wv, curve_ap, save_path) for i in all_files])

+ 0 - 90
tmp_file/power_derating_biaozhun.py

@@ -1,90 +0,0 @@
-from os import *
-
-import matplotlib
-import numpy as np
-
-from utils.draw.draw_file import scatter
-
-matplotlib.use('Agg')
-matplotlib.rcParams['font.family'] = 'SimHei'  # 或者 'Microsoft YaHei'
-matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 或者 ['Microsoft YaHei']
-
-from utils.file.trans_methods import read_file_to_df
-from utils.file.trans_methods import read_excel_files
-import pandas as pd
-
-
-class ContractPowerCurve(object):
-
-    def __init__(self, df: pd.DataFrame, wind_velocity='风速', active_power='功率'):
-        self.df = df
-        self.wind_velocity = wind_velocity
-        self.active_power = active_power
-
-
-def marker_active_power(contract_power_curve_class: ContractPowerCurve, df: pd.DataFrame, active_power='有功功率 kW均值',
-                        wind_velocity='风速 m/s均值'):
-    """
-    标记有功功率为正的记录
-    :param contract_power_curve_class: 合同功率曲线
-    :param df: 原始数据
-    :return: 标记有功功率为正的原始数据
-    """
-    contract_power_curve_df = contract_power_curve_class.df
-    curve_wv = contract_power_curve_df[contract_power_curve_class.wind_velocity].values
-    curve_ap = contract_power_curve_df[contract_power_curve_class.active_power].values
-
-    df.dropna(subset=[active_power, wind_velocity], inplace=True)
-    ap_gt_0_df = df[df[active_power] > 0]
-    ap_le_0_df = df[df[active_power] <= 0]
-    ap_le_0_df["marker"] = -1
-
-    active_power_values = ap_gt_0_df[active_power].values
-    wind_speed_values = ap_gt_0_df[wind_velocity].values
-    ap_gt_0_in = [0] * ap_gt_0_df.shape[0]
-
-    for i in range(len(ap_gt_0_in)):
-        wind_speed = wind_speed_values[i]
-        active_power = active_power_values[i]
-
-        # if active_power >= 2200 - 200:
-        #     ap_gt_0_in[i] = 1
-        # else:
-        diffs = np.abs(curve_wv - wind_speed)
-        # 找到差值最小的索引和对应的差值
-        minDiff, idx = np.min(diffs), np.argmin(diffs)
-
-        # 使用找到的索引获取对应的值
-        closestValue = curve_ap[idx]
-        if active_power - closestValue >= -100:
-            ap_gt_0_in[i] = 1
-
-    ap_gt_0_df['marker'] = ap_gt_0_in
-    return pd.concat([ap_gt_0_df, ap_le_0_df])
-
-
-if __name__ == '__main__':
-    wind_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\标记derating\PV_Curve.csv")
-
-    all_files = read_excel_files(r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666")
-    save_path = r"D:\trans_data\诺木洪\清理数据\min-666-derating"
-
-    wind_power_df_class = ContractPowerCurve(wind_power_df)
-
-    for file in all_files:
-        name = path.basename(file).split("@")[0]
-        try:
-            df = read_file_to_df(file)
-            df = marker_active_power(wind_power_df_class, df)
-            df = df[df['marker'] == 1]
-            df.to_csv(path.join(save_path, name + '.csv'), index=False, encoding='utf-8')
-
-            # 使用scatter函数绘制散点图
-            if not df.empty:
-                scatter(name, x_label='风速均值', y_label='有功功率均值', x_values=df['风速 m/s均值'].values,
-                        y_values=df['有功功率 kW均值'].values, color='green',
-                        save_file_path=path.join(save_path, name + '均值.png'))
-
-        except Exception as e:
-            print(path.basename(file), "出错", str(e))
-            raise e

+ 0 - 213
tmp_file/power_derating_for_chunlin.py

@@ -1,213 +0,0 @@
-from os import *
-
-import matplotlib
-import numpy as np
-from matplotlib import pyplot as plt
-
-matplotlib.use('Agg')
-matplotlib.rcParams['font.family'] = 'SimHei'  # 或者 'Microsoft YaHei'
-matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 或者 ['Microsoft YaHei']
-
-import pandas as pd
-import chardet
-import warnings
-
-warnings.filterwarnings("ignore")
-
-
-# 获取文件编码
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
-        encoding = 'gb18030'
-    return encoding
-
-
-def del_blank(df=pd.DataFrame(), cols=list()):
-    for col in cols:
-        if df[col].dtype == object:
-            df[col] = df[col].str.strip()
-    return df
-
-
-# 切割数组到多个数组
-def split_array(array, num):
-    return [array[i:i + num] for i in range(0, len(array), num)]
-
-
-# 读取数据到df
-def read_file_to_df(file_path, read_cols=list(), header=0):
-    try:
-        df = pd.DataFrame()
-        if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
-            encoding = detect_file_encoding(file_path)
-            end_with_gz = str(file_path).lower().endswith("gz")
-            if read_cols:
-                if end_with_gz:
-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
-                else:
-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header,
-                                     on_bad_lines='warn')
-            else:
-
-                if end_with_gz:
-                    df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
-                else:
-                    df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
-
-        else:
-            xls = pd.ExcelFile(file_path)
-            # 获取所有的sheet名称
-            sheet_names = xls.sheet_names
-            for sheet in sheet_names:
-                if read_cols:
-                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)
-                else:
-                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header)
-
-                df = pd.concat([df, now_df])
-
-        print('文件读取成功', file_path, '文件数量', df.shape)
-    except Exception as e:
-        print('读取文件出错', file_path, str(e))
-        message = '文件:' + path.basename(file_path) + ',' + str(e)
-        raise ValueError(message)
-
-    return df
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-    # 读取所有文件
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-class ContractPowerCurve(object):
-
-    def __init__(self, df: pd.DataFrame, wind_velocity='风速', active_power='功率'):
-        self.df = df
-        self.wind_velocity = wind_velocity
-        self.active_power = active_power
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    if is_file_path:
-        path = path.dirname(path)
-
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-def scatter(title, x_label, y_label, x_values, y_values, color='blue', size=10, save_file_path=''):
-    if save_file_path:
-        create_file_path(save_file_path, True)
-    else:
-        save_file_path = title + '.png'
-
-    plt.figure(figsize=(8, 6))
-    plt.title(title, fontsize=16)
-    plt.xlabel(x_label, fontsize=14)
-    plt.ylabel(y_label, fontsize=14)
-    plt.scatter(x_values, y_values, s=size, c=color)
-    plt.savefig(save_file_path)
-    plt.close()
-
-
-def marker_active_power(contract_power_curve_class: ContractPowerCurve, df: pd.DataFrame, active_power='有功功率 kW均值',
-                        wind_velocity='风速 m/s均值'):
-    """
-    标记有功功率为正的记录
-    :param contract_power_curve_class: 合同功率曲线
-    :param df: 原始数据
-    :return: 标记有功功率为正的原始数据
-    """
-    contract_power_curve_df = contract_power_curve_class.df
-    curve_wv = contract_power_curve_df[contract_power_curve_class.wind_velocity].values
-    curve_ap = contract_power_curve_df[contract_power_curve_class.active_power].values
-
-    df.dropna(subset=[active_power, wind_velocity], inplace=True)
-    ap_gt_0_df = df[df[active_power] > 0]
-    ap_le_0_df = df[df[active_power] <= 0]
-    ap_le_0_df["marker"] = -1
-
-    active_power_values = ap_gt_0_df[active_power].values
-    wind_speed_values = ap_gt_0_df[wind_velocity].values
-    ap_gt_0_in = [0] * ap_gt_0_df.shape[0]
-
-    for i in range(len(ap_gt_0_in)):
-        wind_speed = wind_speed_values[i]
-        active_power = active_power_values[i]
-
-        # if active_power >= 2200 - 200:
-        #     ap_gt_0_in[i] = 1
-        # else:
-        diffs = np.abs(curve_wv - wind_speed)
-        # 找到差值最小的索引和对应的差值
-        minDiff, idx = np.min(diffs), np.argmin(diffs)
-
-        # 使用找到的索引获取对应的值
-        closestValue = curve_ap[idx]
-        if active_power - closestValue >= -100:
-            ap_gt_0_in[i] = 1
-
-    ap_gt_0_df['marker'] = ap_gt_0_in
-    return pd.concat([ap_gt_0_df, ap_le_0_df])
-
-
-if __name__ == '__main__':
-    wind_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\标记derating\PV_Curve.csv")
-
-    all_files = read_excel_files(r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666")
-    save_path = r"D:\trans_data\诺木洪\清理数据\min-666-derating"
-
-    wind_power_df_class = ContractPowerCurve(wind_power_df)
-
-    for file in all_files:
-        name = path.basename(file).split("@")[0]
-        try:
-            df = read_file_to_df(file)
-            df = marker_active_power(wind_power_df_class, df)
-            df = df[df['marker'] == 1]
-            # 保存筛选后数据
-            name = name.replace('HD', 'HD2')
-            df.to_csv(path.join(save_path, name + '.csv'), index=False, encoding='utf-8')
-
-            # 使用scatter函数绘制散点图
-            if not df.empty:
-                scatter(name, x_label='风速均值', y_label='有功功率均值', x_values=df['风速 m/s均值'].values,
-                        y_values=df['有功功率 kW均值'].values, color='green',
-                        save_file_path=path.join(save_path, name + '均值.png'))
-
-        except Exception as e:
-            print(path.basename(file), "出错", str(e))
-            raise e

+ 0 - 262
tmp_file/pv_youxiaoxing.py

@@ -1,262 +0,0 @@
-import multiprocessing
-from os import *
-
-import matplotlib
-
-matplotlib.use('Agg')
-matplotlib.rcParams['font.family'] = 'SimHei'  # 或者 'Microsoft YaHei'
-matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 或者 ['Microsoft YaHei']
-
-import chardet
-import warnings
-
-warnings.filterwarnings("ignore")
-
-import datetime
-
-import pandas as pd
-
-
-def get_time_space(df, time_str):
-    """
-    :return: 查询时间间隔
-    """
-    begin = datetime.datetime.now()
-    df1 = pd.DataFrame(df[time_str])
-    df1[time_str] = pd.to_datetime(df1[time_str], errors='coerce')
-    df1.sort_values(by=time_str, inplace=True)
-    df1['chazhi'] = df1[time_str].shift(-1) - df1[time_str]
-    result = df1.sample(int(df1.shape[0] / 100))['chazhi'].value_counts().idxmax().seconds
-    del df1
-    print(datetime.datetime.now() - begin)
-    return abs(result)
-
-
-def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
-    """
-    获取俩个时间之间的个数
-    :return: 查询时间间隔
-    """
-    delta = end_time - start_time
-    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
-
-    return abs(int(total_seconds / time_space)) + 1
-
-
-# 获取文件编码
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
-        encoding = 'gb18030'
-    return encoding
-
-
-def del_blank(df=pd.DataFrame(), cols=list()):
-    for col in cols:
-        if df[col].dtype == object:
-            df[col] = df[col].str.strip()
-    return df
-
-
-# 切割数组到多个数组
-def split_array(array, num):
-    return [array[i:i + num] for i in range(0, len(array), num)]
-
-
-# 读取数据到df
-def read_file_to_df(file_path, read_cols=list(), header=0):
-    try:
-        df = pd.DataFrame()
-        if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
-            encoding = detect_file_encoding(file_path)
-            end_with_gz = str(file_path).lower().endswith("gz")
-            if read_cols:
-                if end_with_gz:
-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
-                else:
-                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header,
-                                     on_bad_lines='warn')
-            else:
-
-                if end_with_gz:
-                    df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
-                else:
-                    df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
-
-        else:
-            xls = pd.ExcelFile(file_path)
-            # 获取所有的sheet名称
-            sheet_names = xls.sheet_names
-            for sheet in sheet_names:
-                if read_cols:
-                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)
-                else:
-                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header)
-
-                df = pd.concat([df, now_df])
-
-        print('文件读取成功', file_path, '文件数量', df.shape)
-    except Exception as e:
-        print('读取文件出错', file_path, str(e))
-        message = '文件:' + path.basename(file_path) + ',' + str(e)
-        raise ValueError(message)
-
-    return df
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-    # 读取所有文件
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    if is_file_path:
-        path = path.dirname(path)
-
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-def time_biaozhun(df):
-    time_space = get_time_space(df, '时间')
-    query_df = df[['时间']]
-    query_df['时间'] = pd.to_datetime(df['时间'], errors="coerce")
-    query_df = query_df.dropna(subset=['时间'])
-    total = get_time_space_count(query_df['时间'].min(), query_df['时间'].max(), time_space)
-    return total, save_percent(1 - query_df.shape[0] / total), save_percent(1 - df.shape[0] / total)
-
-
-def save_percent(value, save_decimal=7):
-    return round(value, save_decimal) * 100
-
-
-def calc(df, file_name):
-    error_dict = {}
-    lose_dict = {}
-    error_dict['箱变'] = "".join(file_name.split(".")[:-1])
-    lose_dict['箱变'] = "".join(file_name.split(".")[:-1])
-
-    total, lose_time, error_time = time_biaozhun(df)
-    error_dict['时间'] = error_time
-    lose_dict['时间'] = lose_time
-
-    error_df = pd.DataFrame()
-    lose_df = pd.DataFrame()
-
-    try:
-        df.columns = ["".join(["逆变器" + "".join(col.split("逆变器")[1:])]) if col.find("逆变器") > -1 else col for col in
-                      df.columns]
-
-        for col in df.columns:
-            if col == '时间':
-                continue
-            query_df = df[[col]]
-            query_df[col] = pd.to_numeric(query_df[col], errors="coerce")
-            query_df = query_df.dropna(subset=[col])
-            lose_dict[col] = save_percent(1 - query_df.shape[0] / total)
-
-            if col.find('电压') > -1:
-                error_dict[col] = save_percent(query_df[query_df[col] < 0].shape[0] / total)
-
-            if col.find('电流') > -1:
-                error_dict[col] = save_percent(query_df[query_df[col] < -0.1].shape[0] / total)
-
-            if col.find('逆变器效率') > -1:
-                error_dict[col] = save_percent(query_df[(query_df[col] <= 0) | (query_df[col] >= 100)].shape[0] / total)
-
-            if col.find('温度') > -1:
-                error_dict[col] = save_percent(query_df[(query_df[col] < 0) | (query_df[col] > 100)].shape[0] / total)
-
-            if col.find('功率因数') > -1:
-                error_dict[col] = save_percent(query_df[(query_df[col] < 0) | (query_df[col] > 1)].shape[0] / total)
-
-        total, count = 0, 0
-        for k, v in error_dict.items():
-            if k != '箱变':
-                total = total + error_dict[k]
-                count = count + 1
-
-        error_dict['平均异常率'] = save_percent(total / count / 100)
-
-        total, count = 0, 0
-        for k, v in lose_dict.items():
-            if k != '箱变':
-                total = total + lose_dict[k]
-                count = count + 1
-
-        lose_dict['平均缺失率'] = save_percent(total / count / 100)
-
-        error_df = pd.concat([error_df, pd.DataFrame(error_dict, index=[0])])
-        lose_df = pd.concat([lose_df, pd.DataFrame(lose_dict, index=[0])])
-
-        error_df_cols = ['箱变', '平均异常率']
-        for col in error_df.columns:
-            if col not in error_df_cols:
-                error_df_cols.append(col)
-
-        lose_df_cols = ['箱变', '平均缺失率']
-        for col in lose_df.columns:
-            if col not in lose_df_cols:
-                lose_df_cols.append(col)
-
-        error_df = error_df[error_df_cols]
-        lose_df = lose_df[lose_df_cols]
-    except Exception as e:
-        print("异常文件", path.basename(file_name))
-        raise e
-
-    return error_df, lose_df
-
-
-def run(file_path):
-    df = read_file_to_df(file_path)
-    return calc(df, path.basename(file_path))
-
-
-if __name__ == '__main__':
-    # read_path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/逆变器数据'
-    # save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/逆变器数据'
-
-    read_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\yuanshi'
-    save_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\zhengli'
-    all_files = read_excel_files(read_path)
-
-    with multiprocessing.Pool(2) as pool:
-        df_arrys = pool.starmap(run, [(file,) for file in all_files])
-
-    error_df = pd.concat([df[0] for df in df_arrys])
-    lose_df = pd.concat([df[1] for df in df_arrys])
-    with pd.ExcelWriter(path.join(save_path, "玉湖光伏数据统计.xlsx")) as writer:
-        error_df.to_excel(writer, sheet_name='error_percent', index=False)
-        lose_df.to_excel(writer, sheet_name='lose_percent', index=False)

+ 0 - 134
tmp_file/qinghai-nuomuhong-guifan.py

@@ -1,134 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Spyder 编辑器
-
-这是一个临时脚本文件。
-"""
-import datetime
-import multiprocessing
-from os import *
-
-import numpy as np
-import pandas as pd
-
-dianjian_str = """
-wind_turbine_number		
-time_stamp		时间
-active_power		有功功率 kW
-rotor_speed		风轮转速 rpm
-generator_speed		发电机转速 rpm
-wind_velocity		风速 m/s
-pitch_angle_blade_1		叶片1角度 °
-pitch_angle_blade_2		叶片2角度 °
-pitch_angle_blade_3		叶片3角度 °
-cabin_position		机舱位置 °
-true_wind_direction		
-yaw_error1		风向 °
-twisted_cable_angle		
-main_bearing_temperature		主轴温度 ℃
-gearbox_oil_temperature		齿轮箱温度 ℃
-gearbox_low_speed_shaft_bearing_temperature		齿轮箱轴承温度 ℃
-gearboxmedium_speed_shaftbearing_temperature		
-gearbox_high_speed_shaft_bearing_temperature		齿轮箱轴承温度2 ℃
-generatordrive_end_bearing_temperature		发电机驱动侧轴承温度 ℃
-generatornon_drive_end_bearing_temperature		发电机非驱动侧轴承温度 ℃
-cabin_temperature		机舱温度 ℃
-outside_cabin_temperature		舱外温度 ℃
-generator_winding1_temperature		
-generator_winding2_temperature		
-generator_winding3_temperature		
-front_back_vibration_of_the_cabin		
-side_to_side_vibration_of_the_cabin		
-required_gearbox_speed		
-inverter_speed_master_control		
-actual_torque		
-given_torque		
-clockwise_yaw_count		
-counterclockwise_yaw_count		
-unusable		
-power_curve_available		
-set_value_of_active_power		有功功率设定 kW
-wind_turbine_status		
-wind_turbine_status2		
-turbulence_intensity		
-"""
-
-datas = [i for i in dianjian_str.split("\n") if i]
-
-dianjian_dict = dict()
-
-for data in datas:
-    ds = data.split("\t")
-
-    if len(ds) == 3:
-        dianjian_dict[ds[0]] = ds[2]
-    else:
-        dianjian_dict[ds[0]] = ''
-
-
-def read_df(file_path):
-    df = pd.read_csv(file_path, header=[0, 1])
-
-    col_nams_map = dict()
-    pre_col = ""
-    for tuple_col in df.columns:
-        col1 = tuple_col[0]
-        col2 = tuple_col[1]
-        if str(col1).startswith("Unnamed"):
-            if pre_col:
-                col1 = pre_col
-                pre_col = ''
-            else:
-                col1 = ''
-        else:
-            pre_col = col1
-
-        if str(col2).startswith("Unnamed"):
-            col2 = ''
-
-        col_nams_map[str(tuple_col)] = ''.join([col1, col2])
-    # print(col_nams_map)
-    # for k, v in col_nams_map.items():
-    #     if str(v).endswith('采样值'):
-    #         col_nams_map[k] = str(v)[:-3]
-
-    df.columns = [str(col) for col in df.columns]
-    df.rename(columns=col_nams_map, inplace=True)
-
-    # for col, name in dianjian_dict.items():
-    #     if name in df.columns:
-    #         df.rename(columns={name: col}, inplace=True)
-
-    # for col in df.columns:
-    #     if col not in dianjian_dict.keys():
-    #         del df[col]
-
-    return df
-
-
-def get_wind_name_files(path):
-    files = listdir(path)
-    return files
-
-
-def combine_df(save_path, file):
-    begin = datetime.datetime.now()
-    df = read_df(file)
-    print("读取", file, df.shape)
-    df.replace("-", np.nan,inplace=True)
-    df.to_csv(path.join(save_path, path.basename(file)), encoding='utf-8', index=False)
-
-    print('整理完成', '耗时:', (datetime.datetime.now() - begin).seconds)
-
-
-if __name__ == '__main__':
-    read_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/min-666'
-    save_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/清理数据/min-666'
-
-    # read_path = r'D:\trans_data\诺木洪\收资数据\min-666'
-    # save_path = r'D:\trans_data\诺木洪\清理数据\min-666'
-    if not path.exists(save_path):
-        makedirs(save_path, exist_ok=True)
-
-    with multiprocessing.Pool(20) as pool:
-        pool.starmap(combine_df, [(save_path, read_path + sep + file) for file in listdir(read_path)])

+ 0 - 162
tmp_file/qinghai-nuomuhong.py

@@ -1,162 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Spyder 编辑器
-
-这是一个临时脚本文件。
-"""
-import copy
-import datetime
-import multiprocessing
-from os import *
-
-import numpy as np
-import pandas as pd
-
-dianjian_str = """
-wind_turbine_number		
-time_stamp		时间
-active_power		有功功率 kW
-rotor_speed		风轮转速 rpm
-generator_speed		发电机转速 rpm
-wind_velocity		风速 m/s
-pitch_angle_blade_1		叶片1角度 °
-pitch_angle_blade_2		叶片2角度 °
-pitch_angle_blade_3		叶片3角度 °
-cabin_position		机舱位置 °
-true_wind_direction		
-yaw_error1		风向 °
-twisted_cable_angle		
-main_bearing_temperature		主轴温度 ℃
-gearbox_oil_temperature		齿轮箱温度 ℃
-gearbox_low_speed_shaft_bearing_temperature		齿轮箱轴承温度 ℃
-gearboxmedium_speed_shaftbearing_temperature		
-gearbox_high_speed_shaft_bearing_temperature		齿轮箱轴承温度2 ℃
-generatordrive_end_bearing_temperature		发电机驱动侧轴承温度 ℃
-generatornon_drive_end_bearing_temperature		发电机非驱动侧轴承温度 ℃
-cabin_temperature		机舱温度 ℃
-outside_cabin_temperature		舱外温度 ℃
-generator_winding1_temperature		
-generator_winding2_temperature		
-generator_winding3_temperature		
-front_back_vibration_of_the_cabin		
-side_to_side_vibration_of_the_cabin		
-required_gearbox_speed		
-inverter_speed_master_control		
-actual_torque		
-given_torque		
-clockwise_yaw_count		
-counterclockwise_yaw_count		
-unusable		
-power_curve_available		
-set_value_of_active_power		有功功率设定 kW
-wind_turbine_status		
-wind_turbine_status2		
-turbulence_intensity		
-"""
-
-datas = [i for i in dianjian_str.split("\n") if i]
-
-dianjian_dict = dict()
-
-for data in datas:
-    ds = data.split("\t")
-
-    if len(ds) == 3:
-        dianjian_dict[ds[0]] = ds[2]
-    else:
-        dianjian_dict[ds[0]] = ''
-
-
-def read_df(file_path):
-    df = pd.read_csv(file_path, header=[0, 1])
-
-    col_nams_map = dict()
-    pre_col = ""
-    for tuple_col in df.columns:
-        col1 = tuple_col[0]
-        col2 = tuple_col[1]
-        if str(col1).startswith("Unnamed"):
-            if pre_col:
-                col1 = pre_col
-                pre_col = ''
-            else:
-                col1 = ''
-        else:
-            pre_col = col1
-
-        if str(col2).startswith("Unnamed"):
-            col2 = ''
-
-        col_nams_map[str(tuple_col)] = ''.join([col1, col2])
-    print(col_nams_map)
-    for k, v in col_nams_map.items():
-        if str(v).endswith('采样值'):
-            col_nams_map[k] = str(v)[:-3]
-
-    df.columns = [str(col) for col in df.columns]
-    df.rename(columns=col_nams_map, inplace=True)
-
-    for col, name in dianjian_dict.items():
-        if name in df.columns:
-            df.rename(columns={name: col}, inplace=True)
-
-    for col in df.columns:
-        if col not in dianjian_dict.keys():
-            del df[col]
-
-    return df
-
-
-def get_wind_name_files(path):
-    files = listdir(path)
-
-    wind_files_map = dict()
-    for file in files:
-        full_file = path.join(path, file)
-        file_datas = str(file).split("@")
-        key = file_datas[0].replace("HD", "HD2")
-        if key in wind_files_map.keys():
-            wind_files_map[key].append(full_file)
-        else:
-            wind_files_map[key] = [full_file]
-
-    return wind_files_map
-
-
-def combine_df(save_path, wind_name, files):
-    begin = datetime.datetime.now()
-    df = pd.DataFrame()
-    for file in files:
-        query_df = read_df(file)
-        print("读取", file, query_df.shape)
-        query_df['time_stamp'] = pd.to_datetime(query_df['time_stamp'])
-        query_df.set_index(keys='time_stamp', inplace=True)
-        query_df = query_df[~query_df.index.duplicated(keep='first')]
-        if df.empty:
-            df = copy.deepcopy(query_df)
-        else:
-            df = pd.concat([df, query_df], join='inner')
-    df.reset_index(inplace=True)
-    df['wind_turbine_number'] = wind_name
-    for col, name in dianjian_dict.items():
-        if col not in df.columns:
-            df[col] = np.nan
-
-    df = df[dianjian_dict.keys()]
-    df.to_csv(path.join(save_path, wind_name + ".csv"), encoding='utf-8', index=False)
-
-    print(wind_name, '整理完成', '耗时:', (datetime.datetime.now() - begin).seconds)
-
-
-if __name__ == '__main__':
-    read_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/sec'
-    save_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/sec_采样值'
-
-    # read_path = r'D:\trans_data\诺木洪\收资数据\min'
-    # save_path = r'D:\trans_data\诺木洪\清理数据\min'
-    if not path.exists(save_path):
-        makedirs(save_path, exist_ok=True)
-    wind_files_map = get_wind_name_files(read_path)
-
-    with multiprocessing.Pool(20) as pool:
-        pool.starmap(combine_df, [(save_path, wind_name, files) for wind_name, files in wind_files_map.items()])

+ 0 - 38
tmp_file/queshi_bili.py

@@ -1,38 +0,0 @@
-import datetime
-
-import pandas as pd
-
-
-def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
-    """
-    获取俩个时间之间的个数
-    :return: 查询时间间隔
-    """
-    delta = end_time - start_time
-    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
-
-    return abs(int(total_seconds / time_space))
-
-
-df = pd.read_csv("D:\data\白玉山后评估数据资料\十分钟.csv", encoding='utf8')
-
-df['时间'] = pd.to_datetime(df['时间'])
-df['plus_10min'] = df['时间'] + pd.Timedelta(minutes=10)
-
-names = list(set(df['设备名称']))
-names.sort()
-
-count = get_time_space_count(datetime.datetime.strptime('2023-09-01 00:00:00', '%Y-%m-%d %H:%M:%S'),
-                             datetime.datetime.strptime('2024-09-01 00:00:00', '%Y-%m-%d %H:%M:%S'), 600)
-
-result_df = pd.DataFrame(df['设备名称'].value_counts())
-result_df.reset_index(inplace=True)
-result_df.columns = ['风机', '数量']
-
-result_df['总数'] = count
-
-result_df['完整度'] = result_df['数量'].apply(lambda x: round(x * 100 / count, 2))
-
-result_df.sort_values(by=['风机'], inplace=True)
-
-print(result_df)

+ 0 - 42
tmp_file/read_and_draw_png.py

@@ -1,42 +0,0 @@
-import multiprocessing
-from os import *
-
-from etl.wind_power.min_sec.ClassIdentifier import ClassIdentifier
-from utils.draw.draw_file import scatter
-from utils.file.trans_methods import read_file_to_df
-
-
-def draw(file, fengchang='测试'):
-    name = path.basename(file).split('.')[0]
-    df = read_file_to_df(file)
-    del df['lab']
-    identifier = ClassIdentifier(wind_turbine_number='test', origin_df=df, rated_power=5000, cut_out_speed=20,
-                                 active_power='active_power', wind_velocity='wind_velocity',
-                                 pitch_angle_blade='pitch_angle_blade_1')
-    df = identifier.run()
-
-    df.loc[df['active_power'] <= 0, 'lab'] = -1
-
-    print(df.groupby('lab').count())
-    color_map = {-1: 'red', 0: 'green', 1: 'blue', 2: 'black', 3: 'orange', 4: 'magenta'}
-    c = df['lab'].map(color_map)
-
-    # -1:停机 0:好点  1:欠发功率点;2:超发功率点;3:额定风速以上的超发功率点 4: 限电
-    legend_map = {"停机": 'red', "好点": 'green', "欠发": 'blue', "超发": 'black', "额定风速以上的超发": 'orange',
-                  "限电": 'magenta'}
-    scatter(name, x_label='风速', y_label='有功功率', x_values=df['wind_velocity'].values,
-            y_values=df['active_power'].values, color=c, col_map=legend_map,
-            save_file_path=path.dirname(
-                path.dirname(__file__)) + sep + "tmp" + sep + str(fengchang) + sep + name + '结果.png')
-
-
-if __name__ == '__main__':
-    read_dir = r"D:\data\logs\matlib-test"
-
-    files = [read_dir + sep + i for i in listdir(read_dir)]
-
-    if len(files) == 1:
-        draw(files[0], "和风元宝山4")
-    else:
-        with multiprocessing.Pool(4) as pool:
-            pool.starmap(draw, [(file, "和风元宝山4") for file in files])

+ 0 - 27
tmp_file/select_part_cols.py

@@ -1,27 +0,0 @@
-import datetime
-import multiprocessing
-from os import *
-
-import pandas as pd
-
-read_dir = r'/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF01000010-WOB000002_ZY24年7-9月秒级/second'
-save_dir = r'/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF01000010-WOB000002_ZY24年7-9月秒级/second_select_yaw_error1_20241014'
-
-
-def read_and_select_and_save(file):
-    df = pd.read_csv(read_dir + sep + file,
-                     usecols=['active_power', 'wind_velocity', 'pitch_angle_blade_1', 'yaw_error1', 'lab'])
-    df = df[df['yaw_error1'] <= 360]
-    df['yaw_error1'] = df['yaw_error1'].apply(lambda x: x - 360 if 180 <= x <= 360 else x)
-    condition = (df['active_power'] > 0) & (df['wind_velocity'] > 0)
-    df = df[condition]
-
-    df.to_csv(path.join(save_dir, file), index=False, encoding='utf8')
-    print(f'{file}处理完成')
-
-
-if __name__ == '__main__':
-    begin = datetime.datetime.now()
-    with multiprocessing.Pool(32) as pool:
-        pool.starmap(read_and_select_and_save, [(file,) for file in listdir(read_dir)])
-    print(f'总耗时:{datetime.datetime.now() - begin}')

+ 0 - 19
tmp_file/test_wave.py

@@ -1,19 +0,0 @@
-import sys
-from os import *
-
-sys.path.insert(0, path.abspath(__file__).split("tmp_file")[0])
-
-
-if __name__ == '__main__':
-    env = 'dev'
-    if len(sys.argv) >= 2:
-        env = sys.argv[1]
-
-    conf_path = path.abspath(__file__).split("tmp_file")[0] + f"/conf/etl_config_{env}.yaml"
-    environ['ETL_CONF'] = conf_path
-    environ['env'] = env
-    from etl.wind_power.wave.WaveTrans import WaveTrans
-    test = WaveTrans('SKF001', r'/home/wzl/test_data/sdk_data/sdk_data', r'/home/wzl/test_data/sdk_data')
-    # test = WaveTrans('SKF001', r'D:\data\sdk_data\sdk_data_less', r'/home/wzl/test_data/sdk_data')
-
-    test.run()

+ 0 - 55
tmp_file/zibo_guzhang_select_time.py

@@ -1,55 +0,0 @@
-from datetime import datetime, timedelta
-
-from utils.file.trans_methods import *
-
-
-def convert_and_calculate_time_range(time_str):
-    # 解析原始字符串
-    date_part = time_str[:6]
-    time_part = time_str[7:]
-
-    # 将短日期格式转换为完整年份
-    year = '20' + date_part[:2]
-    month = date_part[2:4]
-    day = date_part[4:]
-
-    hour = time_part[:2]
-    minute = time_part[2:]
-
-    # 创建 datetime 对象
-    base_time = datetime.datetime.strptime(f"{year}-{month}-{day} {hour}:{minute}", "%Y-%m-%d %H:%M")
-
-    # 计算时间区间
-    start_time = base_time.replace(second=0, microsecond=0) - timedelta(minutes=2)
-    end_time = base_time.replace(second=0, microsecond=0) + timedelta(minutes=3)
-
-    return base_time.strftime("%Y-%m-%d %H:%M"), start_time.strftime("%Y-%m-%d %H:%M:%S"), end_time.strftime(
-        "%Y-%m-%d %H:%M:%S")
-
-
-all_df = read_file_to_df(r"D:\data\淄博\故障记录_filtered.csv")
-all_df['激活时间'] = pd.to_datetime(all_df['激活时间'])
-
-all_files = read_excel_files(r"D:\data\淄博\淄博风场buffer文件(1)")
-
-dfs = pd.DataFrame()
-
-for file in all_files:
-    base_name = path.basename(file)
-    if base_name.startswith("b"):
-        try:
-            turbnine_no = int(base_name.split("_")[0].replace("b", ""))
-            base_time, start_time, end_time = convert_and_calculate_time_range(
-                base_name.replace(base_name.split("_")[0] + "_", "")[0:11])
-        except Exception as e:
-            print("error:", file)
-            raise e
-
-        condation1 = (all_df['激活时间'] >= start_time) & (all_df['风机名'] == turbnine_no)
-        condation2 = (all_df['激活时间'] < end_time) & (all_df['风机名'] == turbnine_no)
-        condation = condation1 & condation2
-        dfs = pd.concat([dfs, all_df[condation]])
-
-dfs.drop_duplicates(inplace=True)
-
-dfs.to_csv(r"D:\data\淄博\result.csv", encoding='utf8', index=False)

+ 0 - 98
tmp_file/偏航误差验证.py

@@ -1,98 +0,0 @@
-import os
-import sys
-from concurrent.futures.thread import ThreadPoolExecutor
-
-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
-
-import datetime
-import multiprocessing
-
-import pandas as pd
-
-from utils.file.trans_methods import read_files, copy_to_new, read_excel_files, read_file_to_df
-from utils.zip.unzip import get_desc_path, unzip
-
-
-def get_real_path(win_path):
-    return win_path.replace(r'Z:', r'/data/download').replace("\\", '/')
-
-
-def unzip_or_remove(file, tmp_dir):
-    if str(file).endswith("zip"):
-        unzip(file, tmp_dir)
-    else:
-        copy_to_new(file, file.replace(file, tmp_dir))
-
-
-def read_file_to_df_and_select(file_path):
-    select_cols = ['Timestamp', 'Los', 'Distance', 'HWS(hub)', 'HWS(hub)status', 'DIR(hub)', 'DIR(hub)status']
-    df = read_file_to_df(file_path, read_cols=select_cols)
-    condition1 = df['HWS(hub)status'] > 0.8
-    condition2 = df['DIR(hub)status'] > 0.8
-    condition3 = df['Distance'].isin([70, 90])
-
-    df = df[condition1 & condition2 & condition3]
-    return df
-
-
-def read_month_data_and_select(month, files, gonglv_df):
-    with ThreadPoolExecutor(max_workers=10) as executor:
-        dfs = list(executor.map(read_file_to_df_and_select, files))
-
-    df = pd.concat(dfs, ignore_index=True)
-
-    df['Time1'] = df['Timestamp'].apply(lambda x: x.split('.')[0])
-    df['Time1'] = pd.to_datetime(df['Time1'], errors='coerce')
-    df['Time1'] = df['Time1'].apply(
-        lambda x: x + datetime.timedelta(seconds=10 - x.second % 10) if x.second % 10 != 0 else x)
-    del gonglv_df['month']
-    result_df = pd.merge(df, gonglv_df, left_on='Time1', right_on='Time1')
-    result_df.sort_values(by='Time1', inplace=True)
-    save_dir = get_real_path('Z:\偏航误差验证数据\整理结果')
-    # save_dir = r'D:\data\pianhang\result'
-    result_df.to_csv(os.path.join(save_dir, f'{month}.csv'), encoding='utf8', index=False)
-
-
-if __name__ == '__main__':
-    read_dir = 'Z:\偏航误差验证数据\新华佳县雷达数据'
-    read_dir = get_real_path(read_dir)
-
-    tmp_dir = get_real_path(r'Z:\偏航误差验证数据\tmp_data')
-    gonglv_dir = get_real_path(r'Z:\偏航误差验证数据\陕西建工陕西智华\report\output')
-
-    # read_dir = r'D:\data\pianhang\1'
-    # tmp_dir = r'D:\data\pianhang\tmp'
-    # gonglv_dir = r'D:\data\pianhang\2'
-
-    gonglv_files = read_excel_files(gonglv_dir)
-
-    with multiprocessing.Pool(20) as pool:
-        dfs = pool.starmap(read_file_to_df, [(i, ['collect_time', 'a0216']) for i in gonglv_files])
-
-    gonglv_df = pd.concat(dfs, ignore_index=True)
-    gonglv_df.columns = ['Time1', '功率']
-    gonglv_df['Time1'] = pd.to_datetime(gonglv_df['Time1'], errors='coerce')
-    gonglv_df['month'] = gonglv_df['Time1'].dt.month
-
-    all_files = read_files(tmp_dir)
-
-    all_files = [i for i in all_files if str(os.path.basename(i)).startswith('WindSpeed2024')]
-
-    # with multiprocessing.Pool(20) as pool:
-    #     pool.starmap(unzip_or_remove, [(file, tmp_dir) for file in all_files])
-
-    month_map = dict()
-    for file in all_files:
-        base_name = os.path.basename(file)
-        month = base_name[13:15]
-        if month in month_map.keys():
-            month_map[month].append(file)
-        else:
-            month_map[month] = [file]
-
-    excel_files = read_excel_files(tmp_dir)
-
-    with multiprocessing.Pool(5) as pool:
-        pool.starmap(read_month_data_and_select,
-                     [(month, files, gonglv_df[gonglv_df['month'] == int(month)]) for month, files in
-                      month_map.items()])

+ 0 - 155
tmp_file/光伏箱体.py

@@ -1,155 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Jul  9 16:28:48 2024
-
-@author: Administrator
-"""
-
-from datetime import datetime
-from os import *
-
-import chardet
-import pandas as pd
-
-
-# 获取文件编码
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
-        encoding = 'gb18030'
-    return encoding
-
-
-# 读取数据到df
-def read_file_to_df(file_path, read_cols=list(), header=0):
-    df = pd.DataFrame()
-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
-        encoding = detect_file_encoding(file_path)
-        end_with_gz = str(file_path).lower().endswith("gz")
-        if read_cols:
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
-        else:
-
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
-
-    else:
-        xls = pd.ExcelFile(file_path)
-        # 获取所有的sheet名称
-        sheet_names = xls.sheet_names
-        for sheet in sheet_names:
-            if read_cols:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
-            else:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
-
-    return df
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-    # 读取所有文件
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    if is_file_path:
-        path = path.dirname(path)
-
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-def read_and_save_csv(file_path):
-    begin = datetime.now()
-    base_name = path.basename(file_path)
-    print('开始', base_name)
-
-    df1 = read_file_to_df(file_path + "箱变(1-8号逆变器)数据1.xls")
-    del df1['Unnamed: 0']
-    df1['时间'] = pd.to_datetime(df1['时间'])
-    df1.set_index(keys='时间', inplace=True)
-
-    df2 = read_file_to_df(file_path + "箱变(9-16号逆变器)数据1.xls")
-    del df2['Unnamed: 0']
-    df2['时间'] = pd.to_datetime(df2['时间'])
-    df2.set_index(keys='时间', inplace=True)
-
-    df3 = read_file_to_df(file_path + "箱变(1-8号逆变器)数据2.xls")
-    del df3['Unnamed: 0']
-    df3['时间'] = pd.to_datetime(df3['时间'])
-    df3.set_index(keys='时间', inplace=True)
-
-    df4 = read_file_to_df(file_path + "箱变(9-16号逆变器)数据2.xls")
-    del df4['Unnamed: 0']
-    df4['时间'] = pd.to_datetime(df4['时间'])
-    df4.set_index(keys='时间', inplace=True)
-
-    df = pd.concat([df1, df2, df3, df4], axis=1)
-    df.reset_index(inplace=True)
-    columns = list(df.columns)
-    columns.sort()
-
-    print(df.columns)
-
-    df = df[columns]
-    df.sort_values(by='时间', inplace=True)
-
-    df.to_csv(path.join(r'D:\trans_data\大唐玉湖性能分析离线分析', '05整理数据', base_name + '_箱变.csv'), encoding='utf-8',
-              index=False)
-    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
-
-
-if __name__ == '__main__':
-
-    path = r'D:\trans_data\大唐玉湖性能分析离线分析\test'
-    all_files = read_excel_files(path)
-
-    all_paths = set()
-    for file in all_files:
-        base_name = path.basename(file).split("箱变")[0]
-        base_path = path.dirname(file)
-        if base_name not in all_paths:
-            all_paths.add(path.join(base_path, base_name))
-
-    all_datas = list(all_paths)
-    all_datas.sort()
-
-    print(all_datas)
-    # with Pool(1) as pool:
-    #     pool.starmap(read_and_save_csv, [(i,) for i in all_datas])

+ 0 - 22
tmp_file/列名包含数据处理.py

@@ -1,22 +0,0 @@
-import pandas as pd
-
-path = r'd://data//11.csv'
-
-df = pd.read_csv(path, encoding='gb18030')
-df.reset_index(inplace=True)
-print(df.columns)
-df.columns = [i.replace('()', '') for i in df.columns]
-wind_names = set([i.split('#-')[0] for i in df.columns if i.find('#-') > -1])
-print(df.columns)
-print(wind_names)
-for wind_name in wind_names:
-    select_cols = [i for i in df.columns if str(i).startswith(wind_name)]
-    print(select_cols)
-    select_cols.insert(0, '时间')
-    print(select_cols)
-    df_temp = df[select_cols]
-    df_temp.columns = [i.split('#-')[-1] for i in df_temp.columns]
-
-    df_temp.sort_values(by='时间', inplace=True)
-
-    df_temp.to_csv("d://data//najiade//"+str(wind_name) + '.csv', encoding='utf8', index=False)

+ 0 - 40
tmp_file/吉山批次处理并重新存数据库.py

@@ -1,40 +0,0 @@
-import datetime
-import multiprocessing
-import sys
-from os import *
-
-sys.path.insert(0, path.abspath(__file__).split("tmp_file")[0])
-
-from service.trans_service import save_df_to_db, drop_table, creat_min_sec_table
-from utils.file.trans_methods import read_file_to_df, read_files
-
-
-def read_and_exec(file_path):
-    begin = datetime.datetime.now()
-    print("开始执行:", path.basename(file_path))
-    df = read_file_to_df(file_path)
-    df['yaw_error1'] = df['true_wind_direction'] - 180
-    df.to_csv(file_path, index=False, encoding='utf8')
-    creat_min_sec_table()
-    save_df_to_db('WOF079200018-WOB000012_second', df)
-    print("结束执行:", path.basename(file_path), ",耗时:", datetime.datetime.now() - begin)
-
-
-if __name__ == '__main__':
-    begin = datetime.datetime.now()
-    env = 'prod'
-    if len(sys.argv) >= 2:
-        env = sys.argv[1]
-
-    conf_path = path.abspath(f"./conf/etl_config_{env}.yaml")
-    environ['ETL_CONF'] = conf_path
-    environ['env'] = env
-
-    drop_table("WOF079200018-WOB000012_second")
-
-    read_dir = r'/data/download/collection_data/2完成/吉山风电场-江西-大唐/清理数据/WOF079200018-WOB000012_JS一期1秒24.8-10/second'
-
-    all_files = read_files(read_dir)
-    with multiprocessing.Pool(24) as pool:
-        pool.map(read_and_exec, all_files)
-    print("总耗时:", datetime.datetime.now() - begin)

+ 0 - 97
tmp_file/对比文件夹列名差值.py

@@ -1,97 +0,0 @@
-from utils.file.trans_methods import *
-
-
-def boolean_is_check_data(df_cols):
-    fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机', '工作模式']
-
-    df_cols = [str(i).split('_')[-1] for i in df_cols]
-    for fault in fault_list:
-        if fault in df_cols:
-            return True
-
-    return False
-
-
-def compareTwoFolders(list1, other_dfs):
-    for is_falut in [True]:
-        result_df = pd.DataFrame()
-        # for df1 in df1s:
-        #     tmp_list = [str(i).split('_')[-1] for i in list(df1.columns) if i != 'sheet_name']
-        #     if is_falut:
-        #         if boolean_is_check_data(df1.columns):
-        #             list1.extend(tmp_list)
-        #     else:
-        #         if not boolean_is_check_data(df1.columns):
-        #             list1.extend(tmp_list)
-
-        set1 = set(list1)
-
-        list1 = list(set1)
-        list1.sort()
-
-        result_df['完整列名'] = list1
-
-        for wind_name, dfs in other_dfs.items():
-
-            list2 = list()
-            for df in dfs:
-                tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
-                if is_falut:
-                    if boolean_is_check_data(df.columns):
-                        list2.extend(tmp_list)
-                else:
-                    if not boolean_is_check_data(df.columns):
-                        list2.extend(tmp_list)
-
-            set2 = set(list2)
-            list2 = list(set2)
-            list2.sort()
-
-            list3 = list(set1 - set2)
-            list3.sort()
-
-            # list4 = list(set2 - set1)
-            # list4.sort()
-            # print(list3)
-            # print(list4)
-
-            max_count = len(list1)
-            list1.extend([''] * (max_count - len(list1)))
-            list2.extend([''] * (max_count - len(list2)))
-            list3.extend([''] * (max_count - len(list3)))
-            # list4.extend([''] * (max_count - len(list4)))
-
-            result_df[str(wind_name) + '字段'] = list2
-            result_df[str(wind_name) + '比完整列名少字段'] = list3
-            # result_df['风机' + str(wind_name) + '_比风机1多字段'] = list4
-
-        file_name = 'col_compare.csv' if not is_falut else 'col_compare_fault.csv'
-
-        result_df.to_csv(file_name, encoding='utf-8', index=False)
-
-
-if __name__ == '__main__':
-    begin = datetime.datetime.now()
-    dir2 = r'D:\data\新华水电\风机SCADA数据'
-    files2 = read_excel_files(dir2)
-
-    other_dfs = dict()
-    list1 = list()
-    for file in files2:
-        month = path.basename(path.dirname(path.dirname(file)))[0:2]
-        wind_name = month + path.basename(path.dirname(file)).split('#')[0] + '号风机'
-        df = read_file_to_df(file, nrows=1)
-        if boolean_is_check_data(df.columns):
-            list1.extend([str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name'])
-        if wind_name in other_dfs.keys():
-            other_dfs[wind_name].append(df)
-        else:
-            other_dfs[wind_name] = [df]
-
-    # with multiprocessing.Pool(10) as pool:
-    #     df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2])
-    #
-    list1 = [i for i in list(set(list1)) if i != 'sheet_name']
-    compareTwoFolders(list1, other_dfs)
-
-    print(datetime.datetime.now() - begin)

+ 0 - 96
tmp_file/年度汇总平均缺失率.py

@@ -1,96 +0,0 @@
-import calendar
-import datetime
-import math
-import multiprocessing
-
-import pandas as pd
-
-from utils.file.trans_methods import read_excel_files, read_file_to_df
-
-
-def get_year_days(year):
-    now_year = datetime.datetime.now().year
-
-    if now_year == year:
-        today = datetime.date.today()
-        # 获取昨天的日期
-        yesterday = today - datetime.timedelta(days=1)
-        # 获取今年的第一天
-        start_of_year = datetime.date(yesterday.year, 1, 1)
-        # 计算从年初到昨天的天数
-        return (yesterday - start_of_year).days + 1
-
-    if calendar.isleap(year):
-        return 366
-    else:
-        return 365
-
-
-def save_percent(value, save_decimal=7):
-    return round(value, save_decimal) * 100
-
-
-if __name__ == '__main__':
-
-    read_dir = r'D:\data\综合报表22-24年'
-
-    all_fils = read_excel_files(read_dir)
-
-    with multiprocessing.Pool(6) as pool:
-        dfs = pool.map(read_file_to_df, all_fils)
-
-    df = pd.concat(dfs, ignore_index=True)
-    del_cols = ['Unnamed: 0', '序号', 'times']
-
-    for col in del_cols:
-        del df[col]
-
-    df = df.query("风机 != '完整'")
-
-    numic_cols = ['数据有效性', '历史总有功发电量', '历史总有功耗电量',
-                  '查询区间有功发电量', '查询区间有功耗电量', '历史总无功发电量', '历史总无功耗电量',
-                  '查询区间无功发电量',
-                  '查询区间无功耗电量', '时间可利用率', '最大风速', '最小风速', '平均风速', '空气密度', '最大有功功率',
-                  '最小有功功率', '平均有功功率', '平均无功功率', '电网停机次数', '累计运行时间', '有效风时数',
-                  '满发时间',
-                  '启动时间', '启动次数', '并网发电时间', '等效发电时间', '正常发电时间', '调度限功率发电时间',
-                  '风机限功率发电时间',
-                  '停机时间', '维护停机时间', '故障停机时间', '调度停机时间', '气象停机时间', '电网停机时间',
-                  '远程停机时间',
-                  '待机时间', '户外平均温度', '机舱最高温度', '维护停机次数', '气象停机次数', '故障停机次数',
-                  '报警发电时间',
-                  '报警发电次数', '偏航时长', '偏航次数', '通讯中断时间', '通讯故障次数', '调度限功率发电损失电量',
-                  '风机限功率发电损失电量', '气象停机损失电量', '调度限功率停机损失电量', '远程停机损失电量',
-                  '维护停机损失电量',
-                  '风机故障停机损失电量', '电网停机损失电量']
-
-    for numic_col in numic_cols:
-        df[numic_col] = pd.to_numeric(df[numic_col], errors='coerce')
-
-    cols = df.columns
-    df['year'] = pd.to_datetime(df['时间'], errors='coerce').dt.year
-
-    group_df = df.groupby(by=['year', '风机']).count()
-    group_df.reset_index(inplace=True)
-    count_df = pd.DataFrame(group_df)
-
-    # now_df.to_csv('聚合后.csv', encoding='utf-8', index=False)
-
-    years = count_df['year'].unique()
-    wind_names = count_df['风机'].unique()
-    numic_cols.insert(0, '时间')
-
-    result_df = pd.DataFrame()
-    for year in years:
-        year_days = get_year_days(year)
-        for wind_name in wind_names:
-            count = count_df[(count_df['year'] == year) & (count_df['风机'] == wind_name)][numic_cols].values[0].sum()
-            print(year, wind_name, count, len(numic_cols) * year_days)
-            now_df = pd.DataFrame()
-            now_df['时间'] = [int(year)]
-            now_df['风机'] = [wind_name]
-            now_df['缺失均值'] = [save_percent(count / (len(numic_cols) * year_days))]
-
-            result_df = pd.concat([result_df, now_df])
-
-    result_df.to_csv('年度平均缺失率.csv', encoding='utf-8', index=False)

+ 0 - 31
tmp_file/张崾先26故障.py

@@ -1,31 +0,0 @@
-import pandas as pd
-
-df = pd.read_csv(r'C:\Users\Administrator\Documents\WeChat Files\anmox-\FileStorage\File\2024-12\26故障.csv',
-                 encoding='gbk')
-df['开始时间'] = pd.to_datetime(df['开始时间'], errors='coerce')
-df['结束时间'] = pd.to_datetime(df['结束时间'], errors='coerce')
-time_df = pd.DataFrame(df.groupby(['开始时间'])['结束时间'].max())
-time_df.reset_index(inplace=True)
-time_df.sort_values(by='开始时间', inplace=True)
-
-datas = set()
-max_row = None
-for index, row in time_df.iterrows():
-    if index == 0:
-        datas.add((row['开始时间'], row['结束时间']))
-        max_row = row
-        continue
-
-    if row['结束时间'] > max_row['结束时间']:
-        datas.add((row['开始时间'], row['结束时间']))
-        max_row = row
-
-result_df = pd.DataFrame()
-for begin, end in datas:
-    print(begin, end)
-    now_df = df[(df['开始时间'] == begin) & (df['结束时间'] == end)]
-    now_df = now_df.tail(1)
-    result_df = pd.concat([result_df, now_df])
-
-result_df.sort_values(by='开始时间', inplace=True)
-result_df.to_csv(r'd:\data\26故障_new.csv', encoding='utf8', index=False)

+ 0 - 46
tmp_file/张崾先筛选20241210.py

@@ -1,46 +0,0 @@
-import datetime
-import multiprocessing
-import os
-import sys
-
-import pandas as pd
-
-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
-
-from utils.file.trans_methods import read_file_to_df, read_excel_files
-
-# read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241209'
-
-# save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/变桨-20241210'
-
-# user_cols = ['Time', '机舱外温度', '桨叶角度A', '桨叶角度B', '桨叶角度C',
-#              '轴1电机电流', '轴2电机电流', '轴3电机电流',
-#              '轴1电机温度', '轴2电机温度', '轴3电机温度']
-
-
-read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241210'
-
-save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/偏航-20241210'
-
-user_cols = ['Time', '实际风速', '偏航误差', '电缆扭角', '激活偏航解缆阀','激活顺时针偏航','激活逆时针偏航']
-
-os.makedirs(save_dir, exist_ok=True)
-
-
-def read_and_save(file_path, read_dir, save_dir):
-    begin = datetime.datetime.now()
-    df = read_file_to_df(file_path, read_cols=user_cols)
-    df['Time'] = pd.to_datetime(df['Time'], errors='coerce')
-    df.sort_values(by=['Time'], inplace=True)
-    df.to_csv(os.path.join(save_dir, os.path.basename(file_path)), index=False, encoding='utf8')
-    print(os.path.basename(file_path), '耗时:', (datetime.datetime.now() - begin))
-
-
-if __name__ == '__main__':
-    begin = datetime.datetime.now()
-    all_files = read_excel_files(read_dir)
-
-    with multiprocessing.Pool(16) as pool:
-        pool.starmap(read_and_save, [(file, read_dir, save_dir) for file in all_files])
-
-    print('总耗时:', (datetime.datetime.now() - begin))

+ 0 - 67
tmp_file/张崾先统计-分钟.py

@@ -1,67 +0,0 @@
-import multiprocessing
-import os
-import sys
-
-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
-
-import pandas as pd
-
-from utils.file.trans_methods import read_file_to_df
-
-
-def save_percent(value, save_decimal=7):
-    return round(value, save_decimal) * 100
-
-
-def read_and_select(file_path, read_cols):
-    result_df = pd.DataFrame()
-    df = read_file_to_df(file_path, read_cols=read_cols)
-    wind_name = os.path.basename(file_path).split('.')[0]
-    df['风机号'] = wind_name
-    df = df.query("(startTime>='2023-10-01 00:00:00') & (startTime<'2024-10-01 00:00:00')")
-    count = 366 * 24 * 6  # 十分钟数据  2024年366天
-    repeat_time_count = df.shape[0] - len(df['startTime'].unique())
-    print(wind_name, count, repeat_time_count)
-    result_df['风机号'] = [wind_name]
-    result_df['重复率'] = [save_percent(repeat_time_count / count)]
-    result_df['重复次数'] = [repeat_time_count]
-    result_df['总记录数'] = [count]
-
-    for read_col in read_cols:
-
-        if read_col != 'startTime':
-            df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
-        else:
-            df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
-
-    group_df = df.groupby(by=['风机号']).count()
-    group_df.reset_index(inplace=True)
-    count_df = pd.DataFrame(group_df)
-    total_count = count_df[read_cols].values[0].sum()
-    print(wind_name, total_count, count * len(read_cols))
-    result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
-    result_df['缺失数值'] = ['-'.join([str(count - i) for i in count_df[read_cols].values[0]])]
-    del group_df
-
-    error_fengsu_count = df.query("(风速10min < 0) | (风速10min > 80)").shape[0]
-    error_yougong_gonglv = df.query("(有功功率 < -200) | (有功功率 > 4800)").shape[0]
-
-    result_df['平均异常率'] = [save_percent((error_fengsu_count + error_yougong_gonglv) / (2 * count))]
-
-    return result_df
-
-
-if __name__ == '__main__':
-    read_cols_str = 'startTime,有功功率,叶轮转速,发电机转速,风速10min,桨叶1角度,桨叶2角度,桨叶3角度,机舱位置,偏航误差,发电机轴承温度,机舱内温度,环境温度,发电机U相温度,发电机V相温度,发电机W相温度'
-    read_cols = [i for i in read_cols_str.split(",") if i]
-    read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/导出数据2'
-
-    files = os.listdir(read_dir)
-
-    with multiprocessing.Pool(16) as pool:
-        dfs = pool.starmap(read_and_select, [(os.path.join(read_dir, i), read_cols) for i in files])
-
-    df = pd.concat(dfs, ignore_index=True)
-    df.sort_values(by=['风机号'], inplace=True)
-
-    df.to_csv("张崾先统计-分钟.csv", encoding='utf8', index=False)

+ 0 - 92
tmp_file/张崾先统计-秒.py

@@ -1,92 +0,0 @@
-import multiprocessing
-import os
-import sys
-
-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
-
-import pandas as pd
-
-from utils.file.trans_methods import read_file_to_df
-
-
-def save_percent(value, save_decimal=7):
-    return round(value, save_decimal) * 100
-
-
-def read_and_select(file_path, read_cols):
-    result_df = pd.DataFrame()
-    df = read_file_to_df(file_path, read_cols=read_cols)
-    wind_name = os.path.basename(file_path).split('.')[0]
-    df['风机号'] = wind_name
-    df = df.query("(Time>='2024-06-01 00:00:00') & (Time<'2024-12-01 00:00:00')")
-    count = 15811200  # 1秒数据  半年
-    repeat_time_count = df.shape[0] - len(df['Time'].unique())
-    print(wind_name, count, repeat_time_count)
-    result_df['风机号'] = [wind_name]
-    result_df['重复率'] = [save_percent(repeat_time_count / count)]
-    result_df['重复次数'] = [repeat_time_count]
-    result_df['总记录数'] = [count]
-
-    for read_col in read_cols:
-
-        if read_col != 'Time':
-            df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
-        else:
-            df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
-
-    group_df = df.groupby(by=['风机号']).count()
-    group_df.reset_index(inplace=True)
-    count_df = pd.DataFrame(group_df)
-    total_count = count_df[read_cols].values[0].sum()
-    print(wind_name, total_count, count * len(read_cols))
-    result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
-    result_df['缺失数值'] = ['-'.join([str(count - i) for i in count_df[read_cols].values[0]])]
-    del group_df
-
-    fengsu_count = 0
-    fengsu_cols = [i for i in read_cols if '风速' in i]
-    fengsu_str = ''
-    for col in fengsu_cols:
-        now_count = df.query("(" + col + " < 0) | (" + col + " > 80)").shape[0]
-        fengsu_count = fengsu_count + now_count
-        fengsu_str = fengsu_str + ',' + col + ':' + str(fengsu_count)
-    result_df['风速异常'] = [fengsu_str]
-
-    gonglv_cols = ['有功功率', '瞬时功率', '当前理论可发最大功率']
-    gonglv_count = 0
-    gonglv_str = ''
-    for col in gonglv_cols:
-        now_count = df.query("(" + col + " < -200) | (" + col + " > 4800)").shape[0]
-        gonglv_count = gonglv_count + now_count
-        gonglv_str = gonglv_str + ',' + col + ':' + str(gonglv_count)
-    result_df['功率异常'] = [gonglv_str]
-
-    result_df['平均异常率'] = [
-        save_percent((fengsu_count + fengsu_count) / ((len(fengsu_cols) + len(gonglv_cols)) * count))]
-
-    return result_df
-
-
-if __name__ == '__main__':
-    read_cols = ['Time', '设备主要状态', '功率曲线风速', '湍流强度', '实际风速', '有功功率', '桨叶角度A', '桨叶角度B',
-                 '桨叶角度C', '机舱内温度', '机舱外温度', '绝对风向', '机舱绝对位置', '叶轮转速', '发电机转速',
-                 '瞬时风速',
-                 '有功设定反馈', '当前理论可发最大功率', '空气密度', '偏航误差', '发电机扭矩', '瞬时功率', '风向1s',
-                 '偏航压力', '桨叶1速度', '桨叶2速度', '桨叶3速度', '桨叶1角度给定', '桨叶2角度给定', '桨叶3角度给定',
-                 '轴1电机电流', '轴2电机电流', '轴3电机电流', '轴1电机温度', '轴2电机温度', '轴3电机温度', '待机',
-                 '启动',
-                 '偏航', '并网', '限功率', '正常发电', '故障', '计入功率曲线', '运行发电机冷却风扇1',
-                 '运行发电机冷却风扇2',
-                 '激活偏航解缆阀', '激活偏航刹车阀', '激活风轮刹车阀', '激活顺时针偏航', '激活逆时针偏航', '电缆扭角']
-
-    read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241210'
-
-    files = os.listdir(read_dir)
-
-    with multiprocessing.Pool(4) as pool:
-        dfs = pool.starmap(read_and_select, [(os.path.join(read_dir, i), read_cols) for i in files])
-
-    df = pd.concat(dfs, ignore_index=True)
-    df.sort_values(by=['风机号'], inplace=True)
-
-    df.to_csv("张崾先统计-秒.csv", encoding='utf8', index=False)

+ 0 - 90
tmp_file/张崾先震动_参数获取.py

@@ -1,90 +0,0 @@
-import datetime
-import multiprocessing
-import os.path
-
-import pandas as pd
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in os.listdir(path):
-        item_path = os.path.join(path, item)
-        if os.path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif os.path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path, filter_types=None):
-    if filter_types is None:
-        filter_types = ['xls', 'xlsx', 'csv', 'gz']
-    if os.path.isfile(read_path):
-        return [read_path]
-
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=filter_types)
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 读取路径下所有的文件
-def read_files(read_path, filter_types=None):
-    if filter_types is None:
-        filter_types = ['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar']
-    if os.path.isfile(read_path):
-        return [read_path]
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=filter_types)
-
-    return [path1 for paths in directory_dict.values() for path1 in paths if path1]
-
-
-all_files = read_files(r'G:\CMS', ['txt'])
-
-
-def get_line_count(file_path):
-    with open(file_path, 'r', encoding='utf-8') as file:
-        return sum(1 for _ in file)
-
-
-def read_file_and_read_count(index, file_path, datas):
-    if index % 10000 == 0:
-        print(datetime.datetime.now(), index)
-    base_name = os.path.basename(file_path).split('.')[0]
-    cols = base_name.split('_')
-
-    cols.append(get_line_count(file_path))
-    datas.append(cols)
-
-
-def get_name(x):
-    result_str = ''
-    if x['col3'] != '无':
-        result_str += x['col3']
-    result_str += x['col2']
-    if x['col4'] != '无':
-        result_str += x['col4']
-    result_str += x['col7']
-    return result_str
-
-
-if __name__ == '__main__':
-    datas = multiprocessing.Manager().list()
-
-    with multiprocessing.Pool(20) as pool:
-        pool.starmap(read_file_and_read_count, [(i, file_path, datas) for i, file_path in enumerate(all_files)])
-
-    df = pd.DataFrame(datas, columns=[f'col{i}' for i in range(10)])
-
-    df['col8'] = pd.to_datetime(df['col8'], format='%Y%m%d%H%M%S', errors='coerce')
-    df.sort_values(by=['col1', 'col8'], inplace=True)
-    df['测点完整名称'] = df.apply(get_name, axis=1)
-    df.to_csv('d://cms_data.csv', index=False, encoding='utf8')

+ 0 - 32
tmp_file/张崾先风电场-故障整理.py

@@ -1,32 +0,0 @@
-import multiprocessing
-import os
-
-import pandas as pd
-
-read_dir = 'D:\data\张崾先风电场\故障事件数据'
-save_dir = 'D:\data\崾先风电场\故障事件数据整理'
-
-print(os.listdir(read_dir))
-
-
-def read_solve_data(file_dir):
-    base_dir = os.path.basename(file_dir)
-    df = pd.DataFrame()
-    for file in os.listdir(file_dir):
-        df = pd.concat([df, pd.read_csv(file_dir + '/' + file, encoding='gbk')])
-
-    df['开始时间'] = pd.to_datetime(df['开始时间'], errors='coerce')
-    df = df.query("(开始时间 >= '2024-01-01 00:00:00') & (开始时间 < '2024-12-01 00:00:00')")
-    df['month'] = df['开始时间'].dt.month
-    months = df['month'].unique()
-    for month in months:
-        df_month = df[df['month'] == month]
-        os.makedirs(save_dir + os.sep + base_dir, exist_ok=True)
-        df_month.to_csv(save_dir + os.sep + base_dir + os.sep + str(month) + '.csv', index=False)
-
-
-if __name__ == '__main__':
-    dirs = os.listdir(read_dir)
-
-    with multiprocessing.Pool(4) as pool:
-        pool.map(read_solve_data, [read_dir + os.sep + i for i in dirs])

+ 0 - 108
tmp_file/张崾先风电场-非点检字段获取.py

@@ -1,108 +0,0 @@
-import datetime
-import multiprocessing
-import os
-import sys
-
-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
-
-from utils.file.trans_methods import read_excel_files, copy_to_new, read_file_to_df
-from utils.zip.unzip import unzip, get_desc_path, unrar
-import pandas as pd
-
-read_cols = ['Time', '设备主要状态', '功率曲线风速', '湍流强度', '实际风速', '有功功率', '桨叶角度A', '桨叶角度B',
-             '桨叶角度C', '机舱内温度', '机舱外温度', '绝对风向', '机舱绝对位置', '叶轮转速', '发电机转速', '瞬时风速',
-             '有功设定反馈', '当前理论可发最大功率', '空气密度', '偏航误差', '发电机扭矩', '瞬时功率', '风向1s',
-             '偏航压力', '桨叶1速度', '桨叶2速度', '桨叶3速度', '桨叶1角度给定', '桨叶2角度给定', '桨叶3角度给定',
-             '轴1电机电流', '轴2电机电流', '轴3电机电流', '轴1电机温度', '轴2电机温度', '轴3电机温度', '待机', '启动',
-             '偏航', '并网', '限功率', '正常发电', '故障', '计入功率曲线', '运行发电机冷却风扇1', '运行发电机冷却风扇2',
-             '激活偏航解缆阀', '激活偏航刹车阀', '激活风轮刹车阀', '激活顺时针偏航', '激活逆时针偏航', '电缆扭角']
-
-read_path = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/sec'
-save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241210'
-tmp_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/tmp/second/excel_tmp'
-
-# read_path = r'D:\data\张崾先风电场\6'
-# save_dir = r'D:\data\张崾先风电场\点检表以外测点儿-20241209'
-# tmp_dir = r'D:\data\张崾先风电场\tmp'
-
-os.makedirs(tmp_dir, exist_ok=True)
-os.makedirs(save_dir, exist_ok=True)
-
-
-def get_and_remove(file):
-    to_path = tmp_dir
-    if str(file).endswith("zip"):
-        if str(file).endswith("csv.zip"):
-            copy_to_new(file, file.replace(read_path, to_path).replace("csv.zip", 'csv.gz'))
-        else:
-            desc_path = file.replace(read_path, to_path)
-            is_success, e = unzip(file, get_desc_path(desc_path))
-            if not is_success:
-                # raise e
-                pass
-    elif str(file).endswith("rar"):
-        desc_path = file.replace(read_path, to_path)
-        unrar(file, get_desc_path(desc_path))
-    else:
-        copy_to_new(file, file.replace(read_path, to_path))
-
-
-def get_resolve(file_path, exist_wind_names, map_lock):
-    begin = datetime.datetime.now()
-    df = read_file_to_df(file_path, read_cols=read_cols)
-    wind_name = str(os.path.basename(file_path)[0:2])
-    date = os.path.basename(file_path)[14:24]
-    df['Time'] = df['Time'].apply(lambda x: date + ' ' + x)
-    df = df[read_cols]
-    with map_lock[str(wind_name)]:
-        if wind_name in exist_wind_names:
-            df.to_csv(save_dir + '/' + wind_name + '.csv', mode='a', index=False, header=False, encoding='utf8')
-        else:
-            df.to_csv(save_dir + '/' + wind_name + '.csv', index=False, encoding='utf8')
-            exist_wind_names.append(wind_name)
-
-    print(os.path.basename(file_path), '执行完成,耗时:', get_haoshi(begin))
-
-
-def sort_data(file_path):
-    df = pd.read_csv(file_path, encoding='utf8')
-    df['Time'] = pd.to_datetime(df['Time'], error='coerce')
-    df.sort_values(by=['Time'], inplace=True)
-    df.to_csv(file_path, index=False, encoding='utf8')
-
-
-def get_haoshi(begin):
-    return datetime.datetime.now() - begin
-
-
-if __name__ == '__main__':
-    begin = datetime.datetime.now()
-    # all_files = read_files(read_path)
-    # split_count = get_available_cpu_count_with_percent(1 / 2)
-    # all_arrays = split_array(all_files, split_count)
-    #
-    # for index, arr in enumerate(all_arrays):
-    #     with multiprocessing.Pool(10) as pool:
-    #         pool.starmap(get_and_remove, [(i,) for i in arr])
-    #
-    # print("移动完成,耗时:", get_haoshi(begin))
-
-    # exist_wind_names = multiprocessing.Manager().list()
-    #
-    # map_lock = dict()
-    # for i in range(26, 42):
-    #     map_lock[str(i)] = multiprocessing.Manager().Lock()
-    #
-    # all_files = read_excel_files(tmp_dir)
-    # with multiprocessing.Pool(16) as pool:
-    #     pool.starmap(get_resolve, [(i, exist_wind_names, map_lock) for i in all_files])
-    #
-    # print("整理完成,耗时:", get_haoshi(begin))
-
-    all_files = read_excel_files(save_dir)
-    with multiprocessing.Pool(4) as pool:
-        pool.map(sort_data, all_files)
-    print("排序完成,耗时:", get_haoshi(begin))
-
-    # shutil.rmtree(tmp_dir)
-    # print("移除临时文件完成,耗时:", get_haoshi(begin))

+ 0 - 158
tmp_file/玉湖光伏-标准化.py

@@ -1,158 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Jul  9 16:28:48 2024
-
-@author: Administrator
-"""
-import multiprocessing
-from datetime import datetime
-from os import *
-
-import chardet
-import pandas as pd
-
-pd.options.mode.copy_on_write = True
-
-
-# 获取文件编码
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
-        encoding = 'gb18030'
-    return encoding
-
-
-# 读取数据到df
-def read_file_to_df(file_path, read_cols=list(), header=0):
-    df = pd.DataFrame()
-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
-        encoding = detect_file_encoding(file_path)
-        end_with_gz = str(file_path).lower().endswith("gz")
-        if read_cols:
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
-        else:
-
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
-
-    else:
-        xls = pd.ExcelFile(file_path)
-        # 获取所有的sheet名称
-        sheet_names = xls.sheet_names
-        for sheet in sheet_names:
-            if read_cols:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
-            else:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
-
-    return df
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-    # 读取所有文件
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    if is_file_path:
-        path = path.dirname(path)
-
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-def generate_df(pv_df, col):
-    if col != '时间':
-        xiangbian = col.split("逆变器")[0].replace("#", "")
-        nibianqi = col.split("-")[0].split('逆变器')[1]
-        pv_index = col.split("-")[1].replace("PV", "")
-        now_df = pv_df[['时间', col + '输入电流()', col + '输入电压()']]
-        now_df.loc[:, '箱变'] = xiangbian
-        now_df.loc[:, '逆变器'] = nibianqi
-        now_df.loc[:, 'PV'] = pv_index
-        now_df.columns = [df_col.replace(col, "").replace("()", "") for df_col in now_df.columns]
-        now_df['输入电流'] = now_df['输入电流'].astype(float)
-        now_df['输入电压'] = now_df['输入电压'].astype(float)
-
-        print(xiangbian, nibianqi, pv_index, now_df.shape)
-        return now_df
-    return pd.DataFrame()
-
-
-def read_and_save_csv(file_path, save_path):
-    begin = datetime.now()
-    base_name = path.basename(file_path)
-    print('开始', base_name)
-
-    df = read_file_to_df(file_path)
-    df['时间'] = pd.to_datetime(df['时间'])
-    # df.set_index(keys='时间', inplace=True)
-
-    pv_df_cols = [col for col in df.columns if col.find('输入电') > -1]
-    pv_df_cols.append('时间')
-    pv_df = df[pv_df_cols]
-    shuru_cols = set([col.split("输入电")[0] for col in pv_df.columns])
-
-    with multiprocessing.Pool(6) as pool:
-        dfs = pool.starmap(generate_df, [(pv_df, col) for col in shuru_cols])
-
-    saved_pv_df = pd.concat(dfs)
-    saved_pv_df.sort_values(by=['箱变', '逆变器', 'PV', '时间'], inplace=True)
-    save_file = path.join(save_path, path.basename(file_path).split(".")[0], 'PV.csv')
-    create_file_path(save_file, True)
-
-    saved_pv_df.to_csv(save_file, encoding='utf-8', index=False)
-
-    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
-
-
-if __name__ == '__main__':
-    path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\yuanshi'
-    save_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\zhengli'
-    all_files = read_excel_files(path)
-
-    all_datas = list(all_files)
-    all_datas.sort()
-    print(all_datas)
-
-    for file in all_datas:
-        read_and_save_csv(file, save_path)
-
-    # with Pool(1) as pool:
-    #     pool.starmap(read_and_save_csv, [(i, save_path) for i in all_datas])

+ 0 - 209
tmp_file/玉湖光伏-标准化_1.py

@@ -1,209 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Jul  9 16:28:48 2024
-
-@author: Administrator
-"""
-import multiprocessing
-from datetime import datetime
-from os import *
-
-import chardet
-import pandas as pd
-
-pd.options.mode.copy_on_write = True
-
-
-# 获取文件编码
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
-        encoding = 'gb18030'
-    return encoding
-
-
-# 读取数据到df
-def read_file_to_df(file_path, read_cols=list(), header=0):
-    df = pd.DataFrame()
-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
-        encoding = detect_file_encoding(file_path)
-        end_with_gz = str(file_path).lower().endswith("gz")
-        if read_cols:
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
-        else:
-
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
-
-    else:
-        xls = pd.ExcelFile(file_path)
-        # 获取所有的sheet名称
-        sheet_names = xls.sheet_names
-        for sheet in sheet_names:
-            if read_cols:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
-            else:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
-
-    return df
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-    # 读取所有文件
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    if is_file_path:
-        path = path.dirname(path)
-
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-def generate_df(pv_df, col):
-    if col != '时间':
-        xiangbian = col.split("逆变器")[0].replace("#", "")
-        nibianqi = col.split("-")[0].split('逆变器')[1]
-        pv_index = col.split("-")[1].replace("PV", "")
-        now_df = pv_df[['时间', col + '输入电流()', col + '输入电压()']]
-        now_df.loc[:, '箱变'] = xiangbian
-        now_df.loc[:, '逆变器'] = nibianqi
-        now_df.loc[:, 'PV'] = pv_index
-        now_df.columns = [df_col.replace(col, "").replace("()", "") for df_col in now_df.columns]
-        now_df['输入电流'] = now_df['输入电流'].astype(float)
-        now_df['输入电压'] = now_df['输入电压'].astype(float)
-
-        print(xiangbian, nibianqi, pv_index, now_df.shape)
-        return now_df
-    return pd.DataFrame()
-
-
-def split_index(split_data: str, split_str: str):
-    count = split_data.find(split_str)
-    if count > -1:
-        return split_data[count + len(split_str):]
-    else:
-        return split_str
-
-
-def replece_col_to_biaozhun(col):
-    for k, v in dianjian_dict.items():
-        if col.find(k) > -1:
-            col = col.replace(k, v)
-            return col
-
-    return col
-
-
-def read_and_save_csv(file_path, save_path):
-    begin = datetime.now()
-    base_name = path.basename(file_path)
-    print('开始', base_name)
-
-    df = read_file_to_df(file_path)
-
-    for col in df.columns:
-        for del_col in del_cols:
-            if col.find(del_col) > -1:
-                del df[col]
-
-    df['时间'] = pd.to_datetime(df['时间'])
-    xiangbian = [col for col in df.columns if str(col).startswith('#') and str(col).find('逆变器') > -1][0].split("逆变器")[
-        0].replace("#", "")
-    df.columns = [xiangbian + "_" + split_index(df_col, "逆变器").replace('PV', "").replace("()", "").replace("-",
-                                                                                                           "_") if df_col.startswith(
-        "#") else df_col for df_col in
-                  df.columns]
-
-    df.columns = [col.replace("输入", "_输入") for col in df.columns]
-
-    df.columns = [replece_col_to_biaozhun(col) for col in df.columns]
-
-    # saved_pv_df = pd.concat(dfs)
-    df.sort_values(by=['时间'], inplace=True)
-    save_file = path.join(save_path, path.basename(file_path))
-    create_file_path(save_file, True)
-
-    df.to_csv(save_file, encoding='utf-8', index=False)
-
-    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
-
-
-dianjian_data_str = """
-输入电压	支路输出电压
-输入电流	支路输出电流
-功率因数	
-总发电量	逆变器总发电量
-无功功率	
-有功功率	逆变器输出有功功率
-机内温度	逆变器温度
-电网AB线电压	交流输出电压
-电网A相电流	逆变器输出电流A相
-电网BC线电压	
-电网B相电流	逆变器输出电流B相
-电网CA线电压	
-电网C相电流	逆变器输出电流C相
-逆变器效率	逆变器转换效率
-"""
-
-dianjian_dict = {}
-del_cols = []
-for data in dianjian_data_str.split("\n"):
-    if data:
-        datas = data.split("\t")
-        if len(datas) == 2 and datas[1]:
-            dianjian_dict[datas[0]] = datas[1]
-        else:
-            del_cols.append(datas[0])
-
-if __name__ == '__main__':
-    path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/逆变器数据'
-    save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/逆变器数据'
-    all_files = read_excel_files(path)
-
-    all_datas = list(all_files)
-    all_datas.sort()
-    print(all_datas)
-    #
-    # for file in all_datas:
-    #     read_and_save_csv(file, save_path)
-
-    with multiprocessing.Pool(20) as pool:
-        pool.starmap(read_and_save_csv, [(i, save_path) for i in all_datas])

+ 0 - 283
tmp_file/玉湖光伏-标准化_2.py

@@ -1,283 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Jul  9 16:28:48 2024
-
-@author: Administrator
-"""
-import multiprocessing
-from datetime import datetime
-from os import *
-
-import chardet
-import numpy as np
-import pandas as pd
-
-pd.options.mode.copy_on_write = True
-
-
-# 获取文件编码
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
-        encoding = 'gb18030'
-    return encoding
-
-
-# 读取数据到df
-def read_file_to_df(file_path, read_cols=list(), header=0):
-    df = pd.DataFrame()
-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
-        encoding = detect_file_encoding(file_path)
-        end_with_gz = str(file_path).lower().endswith("gz")
-        if read_cols:
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
-        else:
-
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
-
-    else:
-        xls = pd.ExcelFile(file_path)
-        # 获取所有的sheet名称
-        sheet_names = xls.sheet_names
-        for sheet in sheet_names:
-            if read_cols:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
-            else:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
-
-    return df
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-    # 读取所有文件
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    if is_file_path:
-        path = path.dirname(path)
-
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-def split_index(split_data: str, split_str: str):
-    count = split_data.find(split_str)
-    if count > -1:
-        return split_data[count + len(split_str):]
-    else:
-        return split_str
-
-
-def replece_col_to_biaozhun(col):
-    for k, v in dianjian_dict.items():
-        if col.find(k) > -1:
-            col = col.replace(k, v)
-            return col
-
-    return col
-
-
-def row_to_datas(row, pv_dict, inverter_cols, df_cols):
-    row_datas = list(list())
-    for xiangbian in pv_dict.keys():
-        for nibianqi in pv_dict[xiangbian].keys():
-            for pv in pv_dict[xiangbian][nibianqi]:
-                datas = [np.nan] * 14
-                datas[0] = row['时间']
-                datas[1] = xiangbian
-                datas[2] = nibianqi
-                datas[3] = pv
-                datas_4_col = "_".join([str(xiangbian), str(nibianqi), str(pv), '支路输出电压'])
-                if datas_4_col in df_cols:
-                    datas[4] = row[datas_4_col]
-                else:
-                    datas[4] = np.nan
-
-                datas_5_col = "_".join([str(xiangbian), str(nibianqi), str(pv), '支路输出电流'])
-                if datas_5_col in df_cols:
-                    datas[5] = row[datas_5_col]
-                else:
-                    datas[5] = np.nan
-
-                row_datas.append(datas)
-
-    for xiangbian in pv_dict.keys():
-        for nibianqi in pv_dict[xiangbian].keys():
-            datas = [np.nan] * 14
-            datas[0] = row['时间']
-            datas[1] = xiangbian
-            datas[2] = nibianqi
-            datas[3] = 0
-            for index, col_name in enumerate(inverter_cols):
-                col = '_'.join([str(xiangbian), str(nibianqi), col_name])
-                if col in df_cols:
-                    datas[index + 6] = row[col]
-                else:
-                    datas[index + 6] = np.nan
-
-            row_datas.append(datas)
-
-    return row_datas
-
-
-def df_to_biaozhun(df):
-    pv_cols = ['支路输出电压', '支路输出电流']
-    inverter_cols = ['逆变器总发电量', '逆变器输出有功功率', '逆变器温度', '交流输出电压', '逆变器输出电流A相', '逆变器输出电流B相', '逆变器输出电流C相', '逆变器转换效率']
-    # 从列名获取箱变->逆变器->PV等的字典
-    pv_dict = dict(dict())
-    for col in df.columns:
-        for pv_col in pv_cols:
-            if str(col).endswith(pv_col):
-                datas = col.split("_")
-                xiangbian = datas[0]
-                nibiangqi = datas[1]
-                pv = datas[2]
-
-                if xiangbian in pv_dict.keys():
-                    if nibiangqi in pv_dict[xiangbian]:
-                        pv_dict[xiangbian][nibiangqi].add(pv)
-                    else:
-                        pv_dict[xiangbian][nibiangqi] = set([pv])
-                else:
-                    pv_dict[xiangbian] = {nibiangqi: set([pv])}
-
-    results = df.apply(row_to_datas, args=(pv_dict, inverter_cols, df.columns), axis=1)
-
-    df_datas = results.to_list()
-    df_datas = [da for data in df_datas for da in data]
-    df_cols = ["时间", "箱变", "逆变器", "支路"]
-    df_cols.extend(pv_cols)
-    df_cols.extend(inverter_cols)
-    df = pd.DataFrame(df_datas, columns=df_cols)
-
-    type_conver_list = []
-    type_conver_list.extend(pv_cols)
-    type_conver_list.extend(inverter_cols)
-    for type_conver in type_conver_list:
-        df[type_conver] = pd.to_numeric(df[type_conver], errors='coerce')
-
-    return df
-
-
-def read_and_save_csv(file_path, save_path):
-    begin = datetime.now()
-    base_name = path.basename(file_path)
-    print('开始', base_name)
-
-    df = read_file_to_df(file_path)
-
-    for col in df.columns:
-        for del_col in del_cols:
-            if col.find(del_col) > -1:
-                del df[col]
-
-    df['时间'] = pd.to_datetime(df['时间'])
-    xiangbian = [col for col in df.columns if str(col).startswith('#') and str(col).find('逆变器') > -1][0].split("逆变器")[
-        0].replace("#", "")
-    df.columns = [xiangbian + "_" + split_index(df_col, "逆变器").replace('PV', "").replace("()", "").replace("-",
-                                                                                                           "_") if df_col.startswith(
-        "#") else df_col for df_col in
-                  df.columns]
-
-    df.columns = [col.replace("输入", "_输入") for col in df.columns]
-    df.columns = [replece_col_to_biaozhun(col) for col in df.columns]
-
-    df = df_to_biaozhun(df)
-
-    # df.sort_values(by=['时间', "箱变", "逆变器", "支路"], inplace=True)
-    # save_file = path.join(save_path, path.basename(file_path))
-    # create_file_path(save_file, True)
-
-    # df.to_csv(save_file, encoding='utf-8', index=False)
-
-    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
-    return df
-
-dianjian_data_str = """
-输入电压	支路输出电压
-输入电流	支路输出电流
-功率因数	
-总发电量	逆变器总发电量
-无功功率	
-有功功率	逆变器输出有功功率
-机内温度	逆变器温度
-电网AB线电压	交流输出电压
-电网A相电流	逆变器输出电流A相
-电网BC线电压	
-电网B相电流	逆变器输出电流B相
-电网CA线电压	
-电网C相电流	逆变器输出电流C相
-逆变器效率	逆变器转换效率
-"""
-
-dianjian_dict = {}
-del_cols = []
-for data in dianjian_data_str.split("\n"):
-    if data:
-        datas = data.split("\t")
-        if len(datas) == 2 and datas[1]:
-            dianjian_dict[datas[0]] = datas[1]
-        else:
-            del_cols.append(datas[0])
-
-if __name__ == '__main__':
-    path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/逆变器数据'
-    save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/逆变器数据'
-    # path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\yuanshi'
-    # save_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\zhengli'
-    all_files = read_excel_files(path)
-
-    all_datas = list(all_files)
-    all_datas.sort()
-    print(all_datas)
-
-    # for file in all_datas:
-    #     read_and_save_csv(file, save_path)
-
-    with multiprocessing.Pool(40) as pool:
-        dfs = pool.starmap(read_and_save_csv, [(i, save_path) for i in all_datas])
-
-    saved_pv_df = pd.concat(dfs)
-    saved_pv_df.sort_values(by=['时间', "箱变", "逆变器", "支路"], inplace=True)
-    save_file = path.join(save_path, "合并.csv")
-    create_file_path(save_file, True)
-    saved_pv_df.sort_values(by=['时间', "箱变", "逆变器", "支路"], inplace=True)
-    saved_pv_df.to_csv(save_file, encoding='utf-8', index=False)

+ 0 - 122
tmp_file/玉湖光伏-气象标准化.py

@@ -1,122 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Jul  9 16:28:48 2024
-
-@author: Administrator
-"""
-from os import *
-
-import chardet
-import pandas as pd
-
-pd.options.mode.copy_on_write = True
-
-
-# 获取文件编码
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
-        encoding = 'gb18030'
-    return encoding
-
-
-# 读取数据到df
-def read_file_to_df(file_path, read_cols=list(), header=0):
-    df = pd.DataFrame()
-    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
-        encoding = detect_file_encoding(file_path)
-        end_with_gz = str(file_path).lower().endswith("gz")
-        if read_cols:
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
-        else:
-
-            if end_with_gz:
-                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
-            else:
-                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
-
-    else:
-        xls = pd.ExcelFile(file_path)
-        # 获取所有的sheet名称
-        sheet_names = xls.sheet_names
-        for sheet in sheet_names:
-            if read_cols:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
-            else:
-                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
-
-    return df
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in listdir(path):
-        item_path = path.join(path, item)
-        if path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-    # 读取所有文件
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path):
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-# 创建路径
-def create_file_path(path, is_file_path=False):
-    if is_file_path:
-        path = path.dirname(path)
-
-    if not path.exists(path):
-        makedirs(path, exist_ok=True)
-
-
-if __name__ == '__main__':
-    # path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/气象站数据'
-    # save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/气象站数据'
-    path = r'Z:\大唐玉湖性能分析离线分析\05整理数据\气象站数据'
-    save_path = r'Z:\大唐玉湖性能分析离线分析\06整理数据\气象站数据'
-
-    fengsu_df = read_file_to_df(path.join(path, '风速.csv'), read_cols=['当前时间', '实际风速'])
-    fengxiang_df = read_file_to_df(path.join(path, '风向.csv'), read_cols=['当前时间', '实际风向'])
-    fuzhaodu_df = read_file_to_df(path.join(path, '辐照度.csv'), read_cols=['时间', '水平总辐照度', '倾斜总辐照度', '散射辐照度'])
-    shidu_df = read_file_to_df(path.join(path, '湿度.csv'), read_cols=['时间', '实际湿度'])
-    wendu_df = read_file_to_df(path.join(path, '温度.csv'), read_cols=['时间', '实际温度'])
-    yali_df = read_file_to_df(path.join(path, '压力.csv'), read_cols=['时间', '实际气压'])
-
-    fengsu_df.rename(columns={'当前时间': '时间'}, inplace=True)
-    fengxiang_df.rename(columns={'当前时间': '时间'}, inplace=True)
-
-    dfs = [fengxiang_df, fengsu_df, fuzhaodu_df, shidu_df, wendu_df, yali_df]
-
-    for df in dfs:
-        df['时间'] = pd.to_datetime(df['时间'])
-        df.set_index(keys='时间', inplace=True)
-
-    df = pd.concat(dfs, axis=1)
-    create_file_path(save_path, is_file_path=False)
-    df.to_csv(path.join(save_path, '气象合并.csv'), encoding='utf-8')

+ 0 - 90
tmp_file/王博提取数据完整风机数据.py

@@ -1,90 +0,0 @@
-import datetime
-import multiprocessing
-import os
-
-import chardet
-import pandas as pd
-
-
-def detect_file_encoding(filename):
-    # 读取文件的前1000个字节(足够用于大多数编码检测)
-    with open(filename, 'rb') as f:
-        rawdata = f.read(1000)
-    result = chardet.detect(rawdata)
-    encoding = result['encoding']
-
-    print("文件类型:", filename, encoding)
-
-    if encoding is None:
-        encoding = 'gb18030'
-
-    if encoding.lower() in ['utf-8', 'ascii', 'utf8', 'utf-8-sig']:
-        return 'utf-8'
-
-    return 'gb18030'
-
-
-def __build_directory_dict(directory_dict, path, filter_types=None):
-    # 遍历目录下的所有项
-    for item in os.listdir(path):
-        item_path = os.path.join(path, item)
-        if os.path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
-        elif os.path.isfile(item_path):
-            if path not in directory_dict:
-                directory_dict[path] = []
-
-            if filter_types is None or len(filter_types) == 0:
-                directory_dict[path].append(item_path)
-            elif str(item_path).split(".")[-1] in filter_types:
-                if str(item_path).count("~$") == 0:
-                    directory_dict[path].append(item_path)
-
-
-# 读取路径下所有的excel文件
-def read_excel_files(read_path, filter_types=None):
-    if filter_types is None:
-        filter_types = ['xls', 'xlsx', 'csv', 'gz']
-    if os.path.isfile(read_path):
-        return [read_path]
-
-    directory_dict = {}
-    __build_directory_dict(directory_dict, read_path, filter_types=filter_types)
-
-    return [path for paths in directory_dict.values() for path in paths if path]
-
-
-def read_file_to_df(file_path):
-    df = pd.read_csv(file_path, encoding=detect_file_encoding(file_path))
-    date = os.path.basename(file_path)[14:24]
-    df['Time'] = df['Time'].apply(lambda x: date + ' ' + x)
-    return df
-
-
-def read_files_and_save_csv(file_dir, month, save_dir):
-    begin = datetime.datetime.now()
-    base_dir = os.path.basename(file_dir)
-    print(f"{datetime.datetime.now()}: 开始执行{base_dir}-{month}")
-    all_files = read_excel_files(os.path.join(file_dir, month))
-    df = pd.concat([read_file_to_df(file) for file in all_files], ignore_index=True)
-    save_path = os.path.join(save_dir, base_dir, f'{month}.csv')
-    os.makedirs(os.path.dirname(save_path), exist_ok=True)
-    df.sort_values(by=['Time'], inplace=True)
-    df.to_csv(save_path, encoding='utf8', index=False)
-    print(f"{datetime.datetime.now()}: 执行{base_dir}-{month}结束,耗时{datetime.datetime.now() - begin}")
-
-
-if __name__ == '__main__':
-    begin = datetime.datetime.now()
-    read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/tmp/second/excel_tmp/'
-    save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/20241217完整字段'
-    read_dirs = list()
-    for i in range(26, 42):
-        read_dirs.append(os.path.join(read_dir, str(i)))
-
-    for read_dir in read_dirs:
-        begin = datetime.datetime.now()
-        with multiprocessing.Pool(6) as pool:
-            pool.starmap(read_files_and_save_csv, [(read_dir, i, save_dir) for i in os.listdir(read_dir)])
-
-    print(f"{datetime.datetime.now()}: 执行结束,总耗时{datetime.datetime.now() - begin}")

+ 0 - 35
tmp_file/白玉山每月限电损失.py

@@ -1,35 +0,0 @@
-from os import *
-
-import pandas as pd
-
-read_path = r'D:\data\白玉山后评估数据资料\需要整理的数据\每月发电量和限电量、限电率'
-
-all_paths = list()
-for root, dirs, files in walk(read_path):
-    if files:
-        for file in files:
-            year_mont = int(file.split("(")[1].split("_")[0])
-            if year_mont >= 20230901 and year_mont < 20240901:
-                all_paths.append(path.join(root, file))
-
-df = pd.DataFrame()
-
-for path in all_paths:
-    now_df = pd.read_excel(path, usecols=['设备名称', '统计时间', '限电损失电量(kWh)'], header=2)
-    now_df = now_df[now_df['设备名称'].str.startswith("#")]
-    df = pd.concat([df, now_df])
-
-## 人工验证 看一看
-print(df[df['设备名称'] == '#34'])
-
-df = df[['设备名称', '限电损失电量(kWh)']]
-group_df = df.groupby('设备名称').sum()
-
-result_df = pd.DataFrame(group_df)
-result_df.reset_index(inplace=True)
-result_df.columns = ['设备名称', '总限电损失电量(kWh)']
-result_df.sort_values(by=['设备名称'], inplace=True)
-
-print(result_df)
-
-result_df.to_csv("设备总限电损失.csv", encoding='utf-8', index=False)

+ 0 - 29
tmp_file/筛选字段.py

@@ -1,29 +0,0 @@
-import multiprocessing
-import os
-import sys
-
-sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
-
-from utils.file.trans_methods import read_file_to_df, create_file_path
-
-
-def read_and_save(file_path, select_cols, save_path):
-    base_name = os.path.basename(file_path).split('.')[0]
-    df = read_file_to_df(file_path, read_cols=select_cols)
-
-    save_path = os.path.join(save_path, base_name + '.csv')
-    create_file_path(save_path, True)
-    df.to_csv(save_path, index=False, encoding='utf-8')
-
-
-if __name__ == '__main__':
-    select_cols_str = 'Time,瞬时风速,风机号,瞬时功率,扭矩给定,扭矩反馈,高风切出,风机允许功率管理,功率管理使能反馈,不可利用,功率曲线可用,主控初始化完成,待机,启动,偏航,并网,限功率,正常发电,故障,紧急停机,快速停机,正常停机,告警,停机完成,允许功率管理,处于功率管理,检修,维护'
-
-    select_cols = [i for i in select_cols_str.split(',') if i]
-
-    read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/20241213(26,38)完整字段/26'
-
-    save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/20241213(26,38)完整字段/20241216113130'
-
-    with multiprocessing.Pool(6) as pool:
-        pool.starmap(read_and_save, [(os.path.join(read_dir, i), select_cols, save_dir) for i in os.listdir(read_dir)])

+ 3 - 0
utils/common.py

@@ -0,0 +1,3 @@
+excel_types = ['xls', 'xlsx', 'xlsm', 'xlsb', 'odf', 'ods', 'csv', 'csv.gz']
+
+zip_types = ['rar', 'zip']

+ 2 - 2
utils/db/ConnectMysql.py

@@ -13,10 +13,10 @@ from utils.log.trans_log import trans_print
 class ConnectMysql:
 class ConnectMysql:
 
 
     def __init__(self, connet_name):
     def __init__(self, connet_name):
-        config_path = path.abspath(__file__).split("utils")[0] + 'conf' + sep + 'etl_config_prod.yaml'
-        self.yaml_data = yaml_conf(environ.get('ETL_CONF', config_path))
+        self.yaml_data = yaml_conf(environ.get('ETL_CONF'))
         self.connet_name = connet_name
         self.connet_name = connet_name
         self.config = self.yaml_data[self.connet_name]
         self.config = self.yaml_data[self.connet_name]
+        self.database = self.config['database']
 
 
     # 从连接池中获取一个连接
     # 从连接池中获取一个连接
     def get_conn(self):
     def get_conn(self):

+ 6 - 1
utils/file/trans_methods.py

@@ -10,6 +10,7 @@ import warnings
 import chardet
 import chardet
 import pandas as pd
 import pandas as pd
 
 
+from utils.common import excel_types, zip_types
 from utils.log.trans_log import trans_print
 from utils.log.trans_log import trans_print
 
 
 warnings.filterwarnings("ignore")
 warnings.filterwarnings("ignore")
@@ -162,6 +163,9 @@ def __build_directory_dict(directory_dict, path, filter_types=None):
 
 
 # 读取路径下所有的excel文件
 # 读取路径下所有的excel文件
 def read_excel_files(read_path, filter_types=None):
 def read_excel_files(read_path, filter_types=None):
+    if not os.path.exists(read_path):
+        return []
+
     if filter_types is None:
     if filter_types is None:
         filter_types = ['xls', 'xlsx', 'csv', 'gz']
         filter_types = ['xls', 'xlsx', 'csv', 'gz']
     if os.path.isfile(read_path):
     if os.path.isfile(read_path):
@@ -176,7 +180,8 @@ def read_excel_files(read_path, filter_types=None):
 # 读取路径下所有的文件
 # 读取路径下所有的文件
 def read_files(read_path, filter_types=None):
 def read_files(read_path, filter_types=None):
     if filter_types is None:
     if filter_types is None:
-        filter_types = ['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar']
+        filter_types = [i for i in excel_types]
+        filter_types.extend(zip_types)
     if os.path.isfile(read_path):
     if os.path.isfile(read_path):
         return [read_path]
         return [read_path]
     directory_dict = {}
     directory_dict = {}