7 bulan lalu · 2931b09a11
--- a/etl/wind_power/laser/LaserTrans.py
+++ b/etl/wind_power/laser/LaserTrans.py
@@ -1,13 +1,15 @@
 
				 import datetime
			
 
				+import json
			
 
				 import multiprocessing
			
 
				 import os.path
			
 
				 
			
 
				+import numpy as np
			
 
				 import pandas as pd
			
 
				 
			
 
				 from service.plt_service import get_all_wind
			
 
				 from service.trans_service import save_df_to_db
			
 
				 from utils.file.trans_methods import read_files, read_file_to_df
			
 
				-from utils.log.trans_log import set_trance_id
			
 
				+from utils.log.trans_log import set_trance_id, trans_print
			
 
				 
			
 
				 
			
 
				 class LaserTrans():
			
@@ -15,10 +17,9 @@ class LaserTrans():
 
				     激光测距仪转化
			
 
				     """
			
 
				 
			
 
				-    def __init__(self, field_code, read_path, save_path: str):
			
 
				+    def __init__(self, field_code, read_path):
			
 
				         self.field_code = field_code
			
 
				         self.read_path = read_path
			
 
				-        self.save_path = save_path
			
 
				         self.begin = datetime.datetime.now()
			
 
				         self.wind_col_trans, _ = get_all_wind(self.field_code, need_rated_param=False)
			
 
				 
			
@@ -26,29 +27,51 @@ class LaserTrans():
 
				         file_name = os.path.basename(file_path)
			
 
				         wind_farm, wind_turbine_number, acquisition_time, sampling_frequency = file_name.split("_")
			
 
				         result_df = pd.DataFrame()
			
 
				-        result_df['wind_turbine_number'] = wind_turbine_number
			
 
				-        result_df['acquisition_time'] = pd.to_datetime(acquisition_time, format='%Y%m%d%H%M%S')
			
 
				-        result_df['sampling_frequency'] = sampling_frequency
			
 
				+        result_df['wind_turbine_number'] = [wind_turbine_number]
			
 
				+        result_df['acquisition_time'] = [pd.to_datetime(acquisition_time, format='%Y%m%d%H%M%S')]
			
 
				+        result_df['sampling_frequency'] = [sampling_frequency]
			
 
				         result_df['wind_turbine_number'] = result_df['wind_turbine_number'].map(self.wind_col_trans).fillna(
			
 
				             result_df['wind_turbine_number'])
			
 
				         # 获取数据
			
 
				         df = read_file_to_df(file_path)
			
 
				-        result_df['pk_no'] = df['PkNo'].values[0]
			
 
				-        result_df['echo_type'] = df['EchoType'].values[0]
			
 
				-        result_df['echo1_dist'] = df['Echo1Dist'].values
			
 
				-        result_df['echo1_grey'] = df['Echo1Grey'].values
			
 
				-        result_df['echo2_dist'] = df['Echo2Dist'].values
			
 
				-        result_df['echo2_grey'] = df['Echo2Grey'].values
			
 
				-        result_df['echo3_dist'] = df['Echo3Dist'].values
			
 
				-        result_df['echo3_grey'] = df['Echo3Grey'].values
			
 
				+        if not df.empty:
			
 
				+            result_df['pk_no'] = [df['PkNo'].values[0]]
			
 
				+            result_df['echo_type'] = [df['EchoType'].values[0]]
			
 
				+            result_df['echo1_dist'] = [json.dumps([float(i) for i in df['Echo1Dist'].values if not np.isnan(i)])]
			
 
				+            result_df['echo1_grey'] = [json.dumps([int(i) for i in df['Echo1Grey'].values if not np.isnan(i)])]
			
 
				+            result_df['echo2_dist'] = [json.dumps([float(i) for i in df['Echo2Dist'].values if not np.isnan(i)])]
			
 
				+            result_df['echo2_grey'] = [json.dumps([int(i) for i in df['Echo2Grey'].values if not np.isnan(i)])]
			
 
				+            result_df['echo3_dist'] = [json.dumps([float(i) for i in df['Echo3Dist'].values if not np.isnan(i)])]
			
 
				+            result_df['echo3_grey'] = [json.dumps([int(i) for i in df['Echo3Grey'].values if not np.isnan(i)])]
			
 
				+        else:
			
 
				+            return pd.DataFrame()
			
 
				 
			
 
				-        save_df_to_db(self.field_code + "_laser", result_df)
			
 
				+        return result_df
			
 
				 
			
 
				     def run(self):
			
 
				         trance_id = '-'.join([self.field_code, 'laser'])
			
 
				         set_trance_id(trance_id)
			
 
				         all_files = read_files(self.read_path, ['csv'])
			
 
				+        trans_print(self.field_code, '获取文件总数为:', len(all_files))
			
 
				         pool_count = 8 if len(all_files) > 8 else len(all_files)
			
 
				 
			
 
				         with multiprocessing.Pool(pool_count) as pool:
			
 
				-            pool.map(self.get_file_data, all_files)
			
 
				+            dfs = pool.map(self.get_file_data, all_files)
			
 
				+        df = pd.concat(dfs, ignore_index=True)
			
 
				+        save_df_to_db(self.field_code + "_laser", df)
			
 
				+        df.sort_values(by=['acquisition_time'], inplace=True)
			
 
				+        trans_print(self.field_code, '执行结束,总耗时:', (datetime.datetime.now() - self.begin))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    import sys
			
 
				+    from os import path, environ
			
 
				+
			
 
				+    env = 'dev'
			
 
				+    if len(sys.argv) >= 2:
			
 
				+        env = sys.argv[1]
			
 
				+
			
 
				+    conf_path = path.abspath(__file__).split("energy-data-trans")[0] + f"/energy-data-trans/conf/etl_config_{env}.yaml"
			
 
				+    environ['ETL_CONF'] = conf_path
			
 
				+    environ['env'] = env
			
 
				+    LaserTrans('JGCS001', r'D:\data\激光\测试').run()
			
--- a/tmp_file/年度汇总平均缺失率.py
+++ b/tmp_file/年度汇总平均缺失率.py
@@ -0,0 +1,96 @@
 
				+import calendar
			
 
				+import datetime
			
 
				+import math
			
 
				+import multiprocessing
			
 
				+
			
 
				+import pandas as pd
			
 
				+
			
 
				+from utils.file.trans_methods import read_excel_files, read_file_to_df
			
 
				+
			
 
				+
			
 
				+def get_year_days(year):
			
 
				+    now_year = datetime.datetime.now().year
			
 
				+
			
 
				+    if now_year == year:
			
 
				+        today = datetime.date.today()
			
 
				+        # 获取昨天的日期
			
 
				+        yesterday = today - datetime.timedelta(days=1)
			
 
				+        # 获取今年的第一天
			
 
				+        start_of_year = datetime.date(yesterday.year, 1, 1)
			
 
				+        # 计算从年初到昨天的天数
			
 
				+        return (yesterday - start_of_year).days + 1
			
 
				+
			
 
				+    if calendar.isleap(year):
			
 
				+        return 366
			
 
				+    else:
			
 
				+        return 365
			
 
				+
			
 
				+
			
 
				+def save_percent(value, save_decimal=7):
			
 
				+    return round(value, save_decimal) * 100
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+    read_dir = r'D:\data\综合报表22-24年'
			
 
				+
			
 
				+    all_fils = read_excel_files(read_dir)
			
 
				+
			
 
				+    with multiprocessing.Pool(6) as pool:
			
 
				+        dfs = pool.map(read_file_to_df, all_fils)
			
 
				+
			
 
				+    df = pd.concat(dfs, ignore_index=True)
			
 
				+    del_cols = ['Unnamed: 0', '序号', 'times']
			
 
				+
			
 
				+    for col in del_cols:
			
 
				+        del df[col]
			
 
				+
			
 
				+    df = df.query("风机 != '完整'")
			
 
				+
			
 
				+    numic_cols = ['数据有效性', '历史总有功发电量', '历史总有功耗电量',
			
 
				+                  '查询区间有功发电量', '查询区间有功耗电量', '历史总无功发电量', '历史总无功耗电量',
			
 
				+                  '查询区间无功发电量',
			
 
				+                  '查询区间无功耗电量', '时间可利用率', '最大风速', '最小风速', '平均风速', '空气密度', '最大有功功率',
			
 
				+                  '最小有功功率', '平均有功功率', '平均无功功率', '电网停机次数', '累计运行时间', '有效风时数',
			
 
				+                  '满发时间',
			
 
				+                  '启动时间', '启动次数', '并网发电时间', '等效发电时间', '正常发电时间', '调度限功率发电时间',
			
 
				+                  '风机限功率发电时间',
			
 
				+                  '停机时间', '维护停机时间', '故障停机时间', '调度停机时间', '气象停机时间', '电网停机时间',
			
 
				+                  '远程停机时间',
			
 
				+                  '待机时间', '户外平均温度', '机舱最高温度', '维护停机次数', '气象停机次数', '故障停机次数',
			
 
				+                  '报警发电时间',
			
 
				+                  '报警发电次数', '偏航时长', '偏航次数', '通讯中断时间', '通讯故障次数', '调度限功率发电损失电量',
			
 
				+                  '风机限功率发电损失电量', '气象停机损失电量', '调度限功率停机损失电量', '远程停机损失电量',
			
 
				+                  '维护停机损失电量',
			
 
				+                  '风机故障停机损失电量', '电网停机损失电量']
			
 
				+
			
 
				+    for numic_col in numic_cols:
			
 
				+        df[numic_col] = pd.to_numeric(df[numic_col], errors='coerce')
			
 
				+
			
 
				+    cols = df.columns
			
 
				+    df['year'] = pd.to_datetime(df['时间'], errors='coerce').dt.year
			
 
				+
			
 
				+    group_df = df.groupby(by=['year', '风机']).count()
			
 
				+    group_df.reset_index(inplace=True)
			
 
				+    count_df = pd.DataFrame(group_df)
			
 
				+
			
 
				+    # now_df.to_csv('聚合后.csv', encoding='utf-8', index=False)
			
 
				+
			
 
				+    years = count_df['year'].unique()
			
 
				+    wind_names = count_df['风机'].unique()
			
 
				+    numic_cols.insert(0, '时间')
			
 
				+
			
 
				+    result_df = pd.DataFrame()
			
 
				+    for year in years:
			
 
				+        year_days = get_year_days(year)
			
 
				+        for wind_name in wind_names:
			
 
				+            count = count_df[(count_df['year'] == year) & (count_df['风机'] == wind_name)][numic_cols].values[0].sum()
			
 
				+            print(year, wind_name, count, len(numic_cols) * year_days)
			
 
				+            now_df = pd.DataFrame()
			
 
				+            now_df['时间'] = [int(year)]
			
 
				+            now_df['风机'] = [wind_name]
			
 
				+            now_df['缺失均值'] = [save_percent(count / (len(numic_cols) * year_days))]
			
 
				+
			
 
				+            result_df = pd.concat([result_df, now_df])
			
 
				+
			
 
				+    result_df.to_csv('年度平均缺失率.csv', encoding='utf-8', index=False)