|
@@ -0,0 +1,92 @@
|
|
|
+import multiprocessing
|
|
|
+import os
|
|
|
+import sys
|
|
|
+
|
|
|
+sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
|
|
|
+
|
|
|
+import pandas as pd
|
|
|
+
|
|
|
+from utils.file.trans_methods import read_file_to_df
|
|
|
+
|
|
|
+
|
|
|
+def save_percent(value, save_decimal=7):
|
|
|
+ return round(value, save_decimal) * 100
|
|
|
+
|
|
|
+
|
|
|
+def read_and_select(file_path, read_cols):
|
|
|
+ result_df = pd.DataFrame()
|
|
|
+ df = read_file_to_df(file_path, read_cols=read_cols)
|
|
|
+ wind_name = os.path.basename(file_path).split('.')[0]
|
|
|
+ df['风机号'] = wind_name
|
|
|
+ df = df.query("(Time>='2024-06-01 00:00:00') & (Time<'2024-12-01 00:00:00')")
|
|
|
+ count = 15811200 # 1秒数据 半年
|
|
|
+ repeat_time_count = df.shape[0] - len(df['Time'].unique())
|
|
|
+ print(wind_name, count, repeat_time_count)
|
|
|
+ result_df['风机号'] = [wind_name]
|
|
|
+ result_df['重复率'] = [save_percent(repeat_time_count / count)]
|
|
|
+ result_df['重复次数'] = [repeat_time_count]
|
|
|
+ result_df['总记录数'] = [count]
|
|
|
+
|
|
|
+ for read_col in read_cols:
|
|
|
+
|
|
|
+ if read_col != 'Time':
|
|
|
+ df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
|
|
|
+ else:
|
|
|
+ df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
|
|
|
+
|
|
|
+ group_df = df.groupby(by=['风机号']).count()
|
|
|
+ group_df.reset_index(inplace=True)
|
|
|
+ count_df = pd.DataFrame(group_df)
|
|
|
+ total_count = count_df[read_cols].values[0].sum()
|
|
|
+ print(wind_name, total_count, count * len(read_cols))
|
|
|
+ result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
|
|
|
+ result_df['缺失数值'] = ['-'.join([str(count - i) for i in count_df[read_cols].values[0]])]
|
|
|
+ del group_df
|
|
|
+
|
|
|
+ fengsu_count = 0
|
|
|
+ fengsu_cols = [i for i in read_cols if '风速' in i]
|
|
|
+ fengsu_str = ''
|
|
|
+ for col in fengsu_cols:
|
|
|
+ now_count = df.query("(" + col + " < 0) | (" + col + " > 80)").shape[0]
|
|
|
+ fengsu_count = fengsu_count + now_count
|
|
|
+ fengsu_str = fengsu_str + ',' + col + ':' + str(fengsu_count)
|
|
|
+ result_df['风速异常'] = [fengsu_str]
|
|
|
+
|
|
|
+ gonglv_cols = ['有功功率', '瞬时功率', '当前理论可发最大功率']
|
|
|
+ gonglv_count = 0
|
|
|
+ gonglv_str = ''
|
|
|
+ for col in gonglv_cols:
|
|
|
+ now_count = df.query("(" + col + " < -200) | (" + col + " > 4800)").shape[0]
|
|
|
+ gonglv_count = gonglv_count + now_count
|
|
|
+ gonglv_str = gonglv_str + ',' + col + ':' + str(gonglv_count)
|
|
|
+ result_df['功率异常'] = [gonglv_str]
|
|
|
+
|
|
|
+ result_df['平均异常率'] = [
|
|
|
+ save_percent((fengsu_count + fengsu_count) / ((len(fengsu_cols) + len(gonglv_cols)) * count))]
|
|
|
+
|
|
|
+ return result_df
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ read_cols = ['Time', '设备主要状态', '功率曲线风速', '湍流强度', '实际风速', '有功功率', '桨叶角度A', '桨叶角度B',
|
|
|
+ '桨叶角度C', '机舱内温度', '机舱外温度', '绝对风向', '机舱绝对位置', '叶轮转速', '发电机转速',
|
|
|
+ '瞬时风速',
|
|
|
+ '有功设定反馈', '当前理论可发最大功率', '空气密度', '偏航误差', '发电机扭矩', '瞬时功率', '风向1s',
|
|
|
+ '偏航压力', '桨叶1速度', '桨叶2速度', '桨叶3速度', '桨叶1角度给定', '桨叶2角度给定', '桨叶3角度给定',
|
|
|
+ '轴1电机电流', '轴2电机电流', '轴3电机电流', '轴1电机温度', '轴2电机温度', '轴3电机温度', '待机',
|
|
|
+ '启动',
|
|
|
+ '偏航', '并网', '限功率', '正常发电', '故障', '计入功率曲线', '运行发电机冷却风扇1',
|
|
|
+ '运行发电机冷却风扇2',
|
|
|
+ '激活偏航解缆阀', '激活偏航刹车阀', '激活风轮刹车阀', '激活顺时针偏航', '激活逆时针偏航', '电缆扭角']
|
|
|
+
|
|
|
+ read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241210'
|
|
|
+
|
|
|
+ files = os.listdir(read_dir)
|
|
|
+
|
|
|
+ with multiprocessing.Pool(4) as pool:
|
|
|
+ dfs = pool.starmap(read_and_select, [(os.path.join(read_dir, i), read_cols) for i in files])
|
|
|
+
|
|
|
+ df = pd.concat(dfs, ignore_index=True)
|
|
|
+ df.sort_values(by=['风机号'], inplace=True)
|
|
|
+
|
|
|
+ df.to_csv("张崾先统计-秒.csv", encoding='utf8', index=False)
|