123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- import calendar
- import datetime
- import math
- import multiprocessing
- import pandas as pd
- from utils.file.trans_methods import read_excel_files, read_file_to_df
- def get_year_days(year):
- now_year = datetime.datetime.now().year
- if now_year == year:
- today = datetime.date.today()
- # 获取昨天的日期
- yesterday = today - datetime.timedelta(days=1)
- # 获取今年的第一天
- start_of_year = datetime.date(yesterday.year, 1, 1)
- # 计算从年初到昨天的天数
- return (yesterday - start_of_year).days + 1
- if calendar.isleap(year):
- return 366
- else:
- return 365
- def save_percent(value, save_decimal=7):
- return round(value, save_decimal) * 100
- if __name__ == '__main__':
- read_dir = r'D:\data\综合报表22-24年'
- all_fils = read_excel_files(read_dir)
- with multiprocessing.Pool(6) as pool:
- dfs = pool.map(read_file_to_df, all_fils)
- df = pd.concat(dfs, ignore_index=True)
- del_cols = ['Unnamed: 0', '序号', 'times']
- for col in del_cols:
- del df[col]
- df = df.query("风机 != '完整'")
- numic_cols = ['数据有效性', '历史总有功发电量', '历史总有功耗电量',
- '查询区间有功发电量', '查询区间有功耗电量', '历史总无功发电量', '历史总无功耗电量',
- '查询区间无功发电量',
- '查询区间无功耗电量', '时间可利用率', '最大风速', '最小风速', '平均风速', '空气密度', '最大有功功率',
- '最小有功功率', '平均有功功率', '平均无功功率', '电网停机次数', '累计运行时间', '有效风时数',
- '满发时间',
- '启动时间', '启动次数', '并网发电时间', '等效发电时间', '正常发电时间', '调度限功率发电时间',
- '风机限功率发电时间',
- '停机时间', '维护停机时间', '故障停机时间', '调度停机时间', '气象停机时间', '电网停机时间',
- '远程停机时间',
- '待机时间', '户外平均温度', '机舱最高温度', '维护停机次数', '气象停机次数', '故障停机次数',
- '报警发电时间',
- '报警发电次数', '偏航时长', '偏航次数', '通讯中断时间', '通讯故障次数', '调度限功率发电损失电量',
- '风机限功率发电损失电量', '气象停机损失电量', '调度限功率停机损失电量', '远程停机损失电量',
- '维护停机损失电量',
- '风机故障停机损失电量', '电网停机损失电量']
- for numic_col in numic_cols:
- df[numic_col] = pd.to_numeric(df[numic_col], errors='coerce')
- cols = df.columns
- df['year'] = pd.to_datetime(df['时间'], errors='coerce').dt.year
- group_df = df.groupby(by=['year', '风机']).count()
- group_df.reset_index(inplace=True)
- count_df = pd.DataFrame(group_df)
- # now_df.to_csv('聚合后.csv', encoding='utf-8', index=False)
- years = count_df['year'].unique()
- wind_names = count_df['风机'].unique()
- numic_cols.insert(0, '时间')
- result_df = pd.DataFrame()
- for year in years:
- year_days = get_year_days(year)
- for wind_name in wind_names:
- count = count_df[(count_df['year'] == year) & (count_df['风机'] == wind_name)][numic_cols].values[0].sum()
- print(year, wind_name, count, len(numic_cols) * year_days)
- now_df = pd.DataFrame()
- now_df['时间'] = [int(year)]
- now_df['风机'] = [wind_name]
- now_df['缺失均值'] = [save_percent(count / (len(numic_cols) * year_days))]
- result_df = pd.concat([result_df, now_df])
- result_df.to_csv('年度平均缺失率.csv', encoding='utf-8', index=False)
|