|
@@ -0,0 +1,96 @@
|
|
|
+import calendar
|
|
|
+import datetime
|
|
|
+import math
|
|
|
+import multiprocessing
|
|
|
+
|
|
|
+import pandas as pd
|
|
|
+
|
|
|
+from utils.file.trans_methods import read_excel_files, read_file_to_df
|
|
|
+
|
|
|
+
|
|
|
+def get_year_days(year):
|
|
|
+ now_year = datetime.datetime.now().year
|
|
|
+
|
|
|
+ if now_year == year:
|
|
|
+ today = datetime.date.today()
|
|
|
+ # 获取昨天的日期
|
|
|
+ yesterday = today - datetime.timedelta(days=1)
|
|
|
+ # 获取今年的第一天
|
|
|
+ start_of_year = datetime.date(yesterday.year, 1, 1)
|
|
|
+ # 计算从年初到昨天的天数
|
|
|
+ return (yesterday - start_of_year).days + 1
|
|
|
+
|
|
|
+ if calendar.isleap(year):
|
|
|
+ return 366
|
|
|
+ else:
|
|
|
+ return 365
|
|
|
+
|
|
|
+
|
|
|
+def save_percent(value, save_decimal=7):
|
|
|
+ return round(value, save_decimal) * 100
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+
|
|
|
+ read_dir = r'D:\data\综合报表22-24年'
|
|
|
+
|
|
|
+ all_fils = read_excel_files(read_dir)
|
|
|
+
|
|
|
+ with multiprocessing.Pool(6) as pool:
|
|
|
+ dfs = pool.map(read_file_to_df, all_fils)
|
|
|
+
|
|
|
+ df = pd.concat(dfs, ignore_index=True)
|
|
|
+ del_cols = ['Unnamed: 0', '序号', 'times']
|
|
|
+
|
|
|
+ for col in del_cols:
|
|
|
+ del df[col]
|
|
|
+
|
|
|
+ df = df.query("风机 != '完整'")
|
|
|
+
|
|
|
+ numic_cols = ['数据有效性', '历史总有功发电量', '历史总有功耗电量',
|
|
|
+ '查询区间有功发电量', '查询区间有功耗电量', '历史总无功发电量', '历史总无功耗电量',
|
|
|
+ '查询区间无功发电量',
|
|
|
+ '查询区间无功耗电量', '时间可利用率', '最大风速', '最小风速', '平均风速', '空气密度', '最大有功功率',
|
|
|
+ '最小有功功率', '平均有功功率', '平均无功功率', '电网停机次数', '累计运行时间', '有效风时数',
|
|
|
+ '满发时间',
|
|
|
+ '启动时间', '启动次数', '并网发电时间', '等效发电时间', '正常发电时间', '调度限功率发电时间',
|
|
|
+ '风机限功率发电时间',
|
|
|
+ '停机时间', '维护停机时间', '故障停机时间', '调度停机时间', '气象停机时间', '电网停机时间',
|
|
|
+ '远程停机时间',
|
|
|
+ '待机时间', '户外平均温度', '机舱最高温度', '维护停机次数', '气象停机次数', '故障停机次数',
|
|
|
+ '报警发电时间',
|
|
|
+ '报警发电次数', '偏航时长', '偏航次数', '通讯中断时间', '通讯故障次数', '调度限功率发电损失电量',
|
|
|
+ '风机限功率发电损失电量', '气象停机损失电量', '调度限功率停机损失电量', '远程停机损失电量',
|
|
|
+ '维护停机损失电量',
|
|
|
+ '风机故障停机损失电量', '电网停机损失电量']
|
|
|
+
|
|
|
+ for numic_col in numic_cols:
|
|
|
+ df[numic_col] = pd.to_numeric(df[numic_col], errors='coerce')
|
|
|
+
|
|
|
+ cols = df.columns
|
|
|
+ df['year'] = pd.to_datetime(df['时间'], errors='coerce').dt.year
|
|
|
+
|
|
|
+ group_df = df.groupby(by=['year', '风机']).count()
|
|
|
+ group_df.reset_index(inplace=True)
|
|
|
+ count_df = pd.DataFrame(group_df)
|
|
|
+
|
|
|
+ # now_df.to_csv('聚合后.csv', encoding='utf-8', index=False)
|
|
|
+
|
|
|
+ years = count_df['year'].unique()
|
|
|
+ wind_names = count_df['风机'].unique()
|
|
|
+ numic_cols.insert(0, '时间')
|
|
|
+
|
|
|
+ result_df = pd.DataFrame()
|
|
|
+ for year in years:
|
|
|
+ year_days = get_year_days(year)
|
|
|
+ for wind_name in wind_names:
|
|
|
+ count = count_df[(count_df['year'] == year) & (count_df['风机'] == wind_name)][numic_cols].values[0].sum()
|
|
|
+ print(year, wind_name, count, len(numic_cols) * year_days)
|
|
|
+ now_df = pd.DataFrame()
|
|
|
+ now_df['时间'] = [int(year)]
|
|
|
+ now_df['风机'] = [wind_name]
|
|
|
+ now_df['缺失均值'] = [save_percent(count / (len(numic_cols) * year_days))]
|
|
|
+
|
|
|
+ result_df = pd.concat([result_df, now_df])
|
|
|
+
|
|
|
+ result_df.to_csv('年度平均缺失率.csv', encoding='utf-8', index=False)
|