import calendar import datetime import math import multiprocessing import pandas as pd from utils.file.trans_methods import read_excel_files, read_file_to_df def get_year_days(year): now_year = datetime.datetime.now().year if now_year == year: today = datetime.date.today() # 获取昨天的日期 yesterday = today - datetime.timedelta(days=1) # 获取今年的第一天 start_of_year = datetime.date(yesterday.year, 1, 1) # 计算从年初到昨天的天数 return (yesterday - start_of_year).days + 1 if calendar.isleap(year): return 366 else: return 365 def save_percent(value, save_decimal=7): return round(value, save_decimal) * 100 if __name__ == '__main__': read_dir = r'D:\data\综合报表22-24年' all_fils = read_excel_files(read_dir) with multiprocessing.Pool(6) as pool: dfs = pool.map(read_file_to_df, all_fils) df = pd.concat(dfs, ignore_index=True) del_cols = ['Unnamed: 0', '序号', 'times'] for col in del_cols: del df[col] df = df.query("风机 != '完整'") numic_cols = ['数据有效性', '历史总有功发电量', '历史总有功耗电量', '查询区间有功发电量', '查询区间有功耗电量', '历史总无功发电量', '历史总无功耗电量', '查询区间无功发电量', '查询区间无功耗电量', '时间可利用率', '最大风速', '最小风速', '平均风速', '空气密度', '最大有功功率', '最小有功功率', '平均有功功率', '平均无功功率', '电网停机次数', '累计运行时间', '有效风时数', '满发时间', '启动时间', '启动次数', '并网发电时间', '等效发电时间', '正常发电时间', '调度限功率发电时间', '风机限功率发电时间', '停机时间', '维护停机时间', '故障停机时间', '调度停机时间', '气象停机时间', '电网停机时间', '远程停机时间', '待机时间', '户外平均温度', '机舱最高温度', '维护停机次数', '气象停机次数', '故障停机次数', '报警发电时间', '报警发电次数', '偏航时长', '偏航次数', '通讯中断时间', '通讯故障次数', '调度限功率发电损失电量', '风机限功率发电损失电量', '气象停机损失电量', '调度限功率停机损失电量', '远程停机损失电量', '维护停机损失电量', '风机故障停机损失电量', '电网停机损失电量'] for numic_col in numic_cols: df[numic_col] = pd.to_numeric(df[numic_col], errors='coerce') cols = df.columns df['year'] = pd.to_datetime(df['时间'], errors='coerce').dt.year group_df = df.groupby(by=['year', '风机']).count() group_df.reset_index(inplace=True) count_df = pd.DataFrame(group_df) # now_df.to_csv('聚合后.csv', encoding='utf-8', index=False) years = count_df['year'].unique() wind_names = count_df['风机'].unique() numic_cols.insert(0, '时间') result_df = pd.DataFrame() for year in years: year_days = get_year_days(year) for wind_name in wind_names: count = count_df[(count_df['year'] == year) & (count_df['风机'] == wind_name)][numic_cols].values[0].sum() print(year, wind_name, count, len(numic_cols) * year_days) now_df = pd.DataFrame() now_df['时间'] = [int(year)] now_df['风机'] = [wind_name] now_df['缺失均值'] = [save_percent(count / (len(numic_cols) * year_days))] result_df = pd.concat([result_df, now_df]) result_df.to_csv('年度平均缺失率.csv', encoding='utf-8', index=False)