年度平均缺失率.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. import calendar
  2. import datetime
  3. import math
  4. import multiprocessing
  5. import pandas as pd
  6. from utils.file.trans_methods import read_excel_files, read_file_to_df
  7. def get_year_days(year):
  8. now_year = datetime.datetime.now().year
  9. if now_year == year:
  10. today = datetime.date.today()
  11. # 获取昨天的日期
  12. yesterday = today - datetime.timedelta(days=1)
  13. # 获取今年的第一天
  14. start_of_year = datetime.date(yesterday.year, 1, 1)
  15. # 计算从年初到昨天的天数
  16. return (yesterday - start_of_year).days + 1
  17. if calendar.isleap(year):
  18. return 366
  19. else:
  20. return 365
  21. def save_percent(value, save_decimal=7):
  22. return round(value, save_decimal) * 100
  23. if __name__ == '__main__':
  24. read_dir = r'D:\data\综合报表22-24年'
  25. all_fils = read_excel_files(read_dir)
  26. with multiprocessing.Pool(6) as pool:
  27. dfs = pool.map(read_file_to_df, all_fils)
  28. df = pd.concat(dfs, ignore_index=True)
  29. del_cols = ['Unnamed: 0', '序号', 'times']
  30. for col in del_cols:
  31. del df[col]
  32. df = df.query("风机 != '完整'")
  33. numic_cols = ['数据有效性', '历史总有功发电量', '历史总有功耗电量',
  34. '查询区间有功发电量', '查询区间有功耗电量', '历史总无功发电量', '历史总无功耗电量',
  35. '查询区间无功发电量',
  36. '查询区间无功耗电量', '时间可利用率', '最大风速', '最小风速', '平均风速', '空气密度', '最大有功功率',
  37. '最小有功功率', '平均有功功率', '平均无功功率', '电网停机次数', '累计运行时间', '有效风时数',
  38. '满发时间',
  39. '启动时间', '启动次数', '并网发电时间', '等效发电时间', '正常发电时间', '调度限功率发电时间',
  40. '风机限功率发电时间',
  41. '停机时间', '维护停机时间', '故障停机时间', '调度停机时间', '气象停机时间', '电网停机时间',
  42. '远程停机时间',
  43. '待机时间', '户外平均温度', '机舱最高温度', '维护停机次数', '气象停机次数', '故障停机次数',
  44. '报警发电时间',
  45. '报警发电次数', '偏航时长', '偏航次数', '通讯中断时间', '通讯故障次数', '调度限功率发电损失电量',
  46. '风机限功率发电损失电量', '气象停机损失电量', '调度限功率停机损失电量', '远程停机损失电量',
  47. '维护停机损失电量',
  48. '风机故障停机损失电量', '电网停机损失电量']
  49. for numic_col in numic_cols:
  50. df[numic_col] = pd.to_numeric(df[numic_col], errors='coerce')
  51. cols = df.columns
  52. df['year'] = pd.to_datetime(df['时间'], errors='coerce').dt.year
  53. group_df = df.groupby(by=['year', '风机']).count()
  54. group_df.reset_index(inplace=True)
  55. count_df = pd.DataFrame(group_df)
  56. # now_df.to_csv('聚合后.csv', encoding='utf-8', index=False)
  57. years = count_df['year'].unique()
  58. wind_names = count_df['风机'].unique()
  59. numic_cols.insert(0, '时间')
  60. result_df = pd.DataFrame()
  61. for year in years:
  62. year_days = get_year_days(year)
  63. for wind_name in wind_names:
  64. count = count_df[(count_df['year'] == year) & (count_df['风机'] == wind_name)][numic_cols].values[0].sum()
  65. print(year, wind_name, count, len(numic_cols) * year_days)
  66. now_df = pd.DataFrame()
  67. now_df['时间'] = [int(year)]
  68. now_df['风机'] = [wind_name]
  69. now_df['缺失均值'] = [save_percent(count / (len(numic_cols) * year_days))]
  70. result_df = pd.concat([result_df, now_df])
  71. result_df.to_csv('年度平均缺失率.csv', encoding='utf-8', index=False)