Forráskód Böngészése

修改获取激光数据代码
添加年度汇总缺失率临时文件

wzl 5 hónapja
szülő
commit
2931b09a11
2 módosított fájl, 135 hozzáadás és 16 törlés
  1. 39 16
      etl/wind_power/laser/LaserTrans.py
  2. 96 0
      tmp_file/年度汇总平均缺失率.py

+ 39 - 16
etl/wind_power/laser/LaserTrans.py

@@ -1,13 +1,15 @@
 import datetime
+import json
 import multiprocessing
 import os.path
 
+import numpy as np
 import pandas as pd
 
 from service.plt_service import get_all_wind
 from service.trans_service import save_df_to_db
 from utils.file.trans_methods import read_files, read_file_to_df
-from utils.log.trans_log import set_trance_id
+from utils.log.trans_log import set_trance_id, trans_print
 
 
 class LaserTrans():
@@ -15,10 +17,9 @@ class LaserTrans():
     激光测距仪转化
     """
 
-    def __init__(self, field_code, read_path, save_path: str):
+    def __init__(self, field_code, read_path):
         self.field_code = field_code
         self.read_path = read_path
-        self.save_path = save_path
         self.begin = datetime.datetime.now()
         self.wind_col_trans, _ = get_all_wind(self.field_code, need_rated_param=False)
 
@@ -26,29 +27,51 @@ class LaserTrans():
         file_name = os.path.basename(file_path)
         wind_farm, wind_turbine_number, acquisition_time, sampling_frequency = file_name.split("_")
         result_df = pd.DataFrame()
-        result_df['wind_turbine_number'] = wind_turbine_number
-        result_df['acquisition_time'] = pd.to_datetime(acquisition_time, format='%Y%m%d%H%M%S')
-        result_df['sampling_frequency'] = sampling_frequency
+        result_df['wind_turbine_number'] = [wind_turbine_number]
+        result_df['acquisition_time'] = [pd.to_datetime(acquisition_time, format='%Y%m%d%H%M%S')]
+        result_df['sampling_frequency'] = [sampling_frequency]
         result_df['wind_turbine_number'] = result_df['wind_turbine_number'].map(self.wind_col_trans).fillna(
             result_df['wind_turbine_number'])
         # 获取数据
         df = read_file_to_df(file_path)
-        result_df['pk_no'] = df['PkNo'].values[0]
-        result_df['echo_type'] = df['EchoType'].values[0]
-        result_df['echo1_dist'] = df['Echo1Dist'].values
-        result_df['echo1_grey'] = df['Echo1Grey'].values
-        result_df['echo2_dist'] = df['Echo2Dist'].values
-        result_df['echo2_grey'] = df['Echo2Grey'].values
-        result_df['echo3_dist'] = df['Echo3Dist'].values
-        result_df['echo3_grey'] = df['Echo3Grey'].values
+        if not df.empty:
+            result_df['pk_no'] = [df['PkNo'].values[0]]
+            result_df['echo_type'] = [df['EchoType'].values[0]]
+            result_df['echo1_dist'] = [json.dumps([float(i) for i in df['Echo1Dist'].values if not np.isnan(i)])]
+            result_df['echo1_grey'] = [json.dumps([int(i) for i in df['Echo1Grey'].values if not np.isnan(i)])]
+            result_df['echo2_dist'] = [json.dumps([float(i) for i in df['Echo2Dist'].values if not np.isnan(i)])]
+            result_df['echo2_grey'] = [json.dumps([int(i) for i in df['Echo2Grey'].values if not np.isnan(i)])]
+            result_df['echo3_dist'] = [json.dumps([float(i) for i in df['Echo3Dist'].values if not np.isnan(i)])]
+            result_df['echo3_grey'] = [json.dumps([int(i) for i in df['Echo3Grey'].values if not np.isnan(i)])]
+        else:
+            return pd.DataFrame()
 
-        save_df_to_db(self.field_code + "_laser", result_df)
+        return result_df
 
     def run(self):
         trance_id = '-'.join([self.field_code, 'laser'])
         set_trance_id(trance_id)
         all_files = read_files(self.read_path, ['csv'])
+        trans_print(self.field_code, '获取文件总数为:', len(all_files))
         pool_count = 8 if len(all_files) > 8 else len(all_files)
 
         with multiprocessing.Pool(pool_count) as pool:
-            pool.map(self.get_file_data, all_files)
+            dfs = pool.map(self.get_file_data, all_files)
+        df = pd.concat(dfs, ignore_index=True)
+        save_df_to_db(self.field_code + "_laser", df)
+        df.sort_values(by=['acquisition_time'], inplace=True)
+        trans_print(self.field_code, '执行结束,总耗时:', (datetime.datetime.now() - self.begin))
+
+
+if __name__ == '__main__':
+    import sys
+    from os import path, environ
+
+    env = 'dev'
+    if len(sys.argv) >= 2:
+        env = sys.argv[1]
+
+    conf_path = path.abspath(__file__).split("energy-data-trans")[0] + f"/energy-data-trans/conf/etl_config_{env}.yaml"
+    environ['ETL_CONF'] = conf_path
+    environ['env'] = env
+    LaserTrans('JGCS001', r'D:\data\激光\测试').run()

+ 96 - 0
tmp_file/年度汇总平均缺失率.py

@@ -0,0 +1,96 @@
+import calendar
+import datetime
+import math
+import multiprocessing
+
+import pandas as pd
+
+from utils.file.trans_methods import read_excel_files, read_file_to_df
+
+
+def get_year_days(year):
+    now_year = datetime.datetime.now().year
+
+    if now_year == year:
+        today = datetime.date.today()
+        # 获取昨天的日期
+        yesterday = today - datetime.timedelta(days=1)
+        # 获取今年的第一天
+        start_of_year = datetime.date(yesterday.year, 1, 1)
+        # 计算从年初到昨天的天数
+        return (yesterday - start_of_year).days + 1
+
+    if calendar.isleap(year):
+        return 366
+    else:
+        return 365
+
+
+def save_percent(value, save_decimal=7):
+    return round(value, save_decimal) * 100
+
+
+if __name__ == '__main__':
+
+    read_dir = r'D:\data\综合报表22-24年'
+
+    all_fils = read_excel_files(read_dir)
+
+    with multiprocessing.Pool(6) as pool:
+        dfs = pool.map(read_file_to_df, all_fils)
+
+    df = pd.concat(dfs, ignore_index=True)
+    del_cols = ['Unnamed: 0', '序号', 'times']
+
+    for col in del_cols:
+        del df[col]
+
+    df = df.query("风机 != '完整'")
+
+    numic_cols = ['数据有效性', '历史总有功发电量', '历史总有功耗电量',
+                  '查询区间有功发电量', '查询区间有功耗电量', '历史总无功发电量', '历史总无功耗电量',
+                  '查询区间无功发电量',
+                  '查询区间无功耗电量', '时间可利用率', '最大风速', '最小风速', '平均风速', '空气密度', '最大有功功率',
+                  '最小有功功率', '平均有功功率', '平均无功功率', '电网停机次数', '累计运行时间', '有效风时数',
+                  '满发时间',
+                  '启动时间', '启动次数', '并网发电时间', '等效发电时间', '正常发电时间', '调度限功率发电时间',
+                  '风机限功率发电时间',
+                  '停机时间', '维护停机时间', '故障停机时间', '调度停机时间', '气象停机时间', '电网停机时间',
+                  '远程停机时间',
+                  '待机时间', '户外平均温度', '机舱最高温度', '维护停机次数', '气象停机次数', '故障停机次数',
+                  '报警发电时间',
+                  '报警发电次数', '偏航时长', '偏航次数', '通讯中断时间', '通讯故障次数', '调度限功率发电损失电量',
+                  '风机限功率发电损失电量', '气象停机损失电量', '调度限功率停机损失电量', '远程停机损失电量',
+                  '维护停机损失电量',
+                  '风机故障停机损失电量', '电网停机损失电量']
+
+    for numic_col in numic_cols:
+        df[numic_col] = pd.to_numeric(df[numic_col], errors='coerce')
+
+    cols = df.columns
+    df['year'] = pd.to_datetime(df['时间'], errors='coerce').dt.year
+
+    group_df = df.groupby(by=['year', '风机']).count()
+    group_df.reset_index(inplace=True)
+    count_df = pd.DataFrame(group_df)
+
+    # now_df.to_csv('聚合后.csv', encoding='utf-8', index=False)
+
+    years = count_df['year'].unique()
+    wind_names = count_df['风机'].unique()
+    numic_cols.insert(0, '时间')
+
+    result_df = pd.DataFrame()
+    for year in years:
+        year_days = get_year_days(year)
+        for wind_name in wind_names:
+            count = count_df[(count_df['year'] == year) & (count_df['风机'] == wind_name)][numic_cols].values[0].sum()
+            print(year, wind_name, count, len(numic_cols) * year_days)
+            now_df = pd.DataFrame()
+            now_df['时间'] = [int(year)]
+            now_df['风机'] = [wind_name]
+            now_df['缺失均值'] = [save_percent(count / (len(numic_cols) * year_days))]
+
+            result_df = pd.concat([result_df, now_df])
+
+    result_df.to_csv('年度平均缺失率.csv', encoding='utf-8', index=False)