|
@@ -0,0 +1,87 @@
|
|
|
+# coding=utf-8
|
|
|
+
|
|
|
+import datetime
|
|
|
+import multiprocessing
|
|
|
+import os
|
|
|
+import sys
|
|
|
+
|
|
|
+sys.path.insert(0, os.path.abspath(__file__).split("utils")[0])
|
|
|
+
|
|
|
+import pandas as pd
|
|
|
+
|
|
|
+from utils.file.trans_methods import read_file_to_df, read_excel_files
|
|
|
+
|
|
|
+
|
|
|
+def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
|
|
|
+ """
|
|
|
+ 获取俩个时间之间的个数
|
|
|
+ :return: 查询时间间隔
|
|
|
+ """
|
|
|
+ delta = end_time - start_time
|
|
|
+ total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
|
|
|
+
|
|
|
+ return abs(int(total_seconds / time_space)) + 1
|
|
|
+
|
|
|
+
|
|
|
+def save_percent(value, save_decimal=7):
|
|
|
+ return round(value, save_decimal) * 100
|
|
|
+
|
|
|
+
|
|
|
+def read_and_select(file_path, read_cols_bak):
|
|
|
+ try:
|
|
|
+ read_cols = read_cols_bak[0:len(read_cols_bak)]
|
|
|
+ result_df = pd.DataFrame()
|
|
|
+ df = read_file_to_df(file_path, read_cols=read_cols)
|
|
|
+ wind_name = df['名称'].values[0]
|
|
|
+ df['时间'] = pd.to_datetime(df['时间'])
|
|
|
+ count = get_time_space_count(df['时间'].min(), df['时间'].max(), 600)
|
|
|
+ repeat_time_count = df.shape[0] - len(df['时间'].unique())
|
|
|
+ print(wind_name, count, repeat_time_count)
|
|
|
+ result_df['风机号'] = [wind_name]
|
|
|
+ result_df['重复率'] = [save_percent(repeat_time_count / count)]
|
|
|
+ result_df['重复次数'] = [repeat_time_count]
|
|
|
+ result_df['总记录数'] = [count]
|
|
|
+
|
|
|
+ read_cols.remove('名称')
|
|
|
+ for read_col in read_cols:
|
|
|
+
|
|
|
+ if read_col != '时间':
|
|
|
+ df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
|
|
|
+ else:
|
|
|
+ df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
|
|
|
+
|
|
|
+ group_df = df.groupby(by=['名称']).count()
|
|
|
+ group_df.reset_index(inplace=True)
|
|
|
+ count_df = pd.DataFrame(group_df)
|
|
|
+ total_count = count_df[read_cols].values[0].sum()
|
|
|
+ print(wind_name, total_count, count * len(read_cols))
|
|
|
+ result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
|
|
|
+ result_df['缺失数值'] = [
|
|
|
+ '-'.join([f'{col_name}_{str(count - i)}' for col_name, i in zip(read_cols, count_df[read_cols].values[0])])]
|
|
|
+ del group_df
|
|
|
+
|
|
|
+ error_fengsu_count = df.query("(风速 < 0) | (风速 > 80)").shape[0]
|
|
|
+ error_yougong_gonglv = df.query("(发电机有功功率 < -200) | (发电机有功功率 > 2500)").shape[0]
|
|
|
+
|
|
|
+ result_df['平均异常率'] = [save_percent((error_fengsu_count + error_yougong_gonglv) / (2 * count))]
|
|
|
+ except Exception as e:
|
|
|
+ print(file_path)
|
|
|
+ raise e
|
|
|
+
|
|
|
+ return result_df
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ read_cols_str = '名称,时间,发电机有功功率,发电机转速,发电机驱动端轴承温度,发电机非驱动端轴承温度,发电机定子U相线圈温度,发电机定子V相线圈温度,发电机定子W相线圈温度,实际扭矩,设定扭矩,仪表盘风速,舱内温度,控制柜内温度,舱外温度,风向,风速,机舱风向夹角,1#桨叶片角度,1#桨设定角度,2#桨叶片角度,2#桨设定角度,3#桨叶片角度,3#桨设定角度,1#桨电机温度,2#桨电机温度,3#桨电机温度,轮毂内温度,齿轮箱油泵吸油口油压,齿轮箱分配器位置油压,偏航液压刹车系统蓄能罐压力,主轴转速,齿轮箱油路入口温度,齿轮箱中间轴驱动端轴承温度,齿轮箱中间轴非驱动端轴承温度,齿轮箱油池温度,主轴承外圈温度,可利用率,机舱位置,总扭缆角度'
|
|
|
+ read_cols = [i for i in read_cols_str.split(",") if i]
|
|
|
+ read_dir = r'D:\data\tmp_data\10分'
|
|
|
+
|
|
|
+ files = read_excel_files(read_dir)
|
|
|
+
|
|
|
+ with multiprocessing.Pool(4) as pool:
|
|
|
+ dfs = pool.starmap(read_and_select, [(os.path.join(read_dir, i), read_cols) for i in files])
|
|
|
+
|
|
|
+ df = pd.concat(dfs, ignore_index=True)
|
|
|
+ df.sort_values(by=['风机号'], inplace=True)
|
|
|
+
|
|
|
+ df.to_csv("神木风电场-10分钟.csv", encoding='utf8', index=False)
|