qitaihe_biaozhunhua_minute.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. import datetime
  2. import multiprocessing
  3. import os
  4. import sys
  5. sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
  6. import pandas as pd
  7. from utils.file.trans_methods import read_file_to_df, read_excel_files
  8. def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
  9. """
  10. 获取俩个时间之间的个数
  11. :return: 查询时间间隔
  12. """
  13. delta = end_time - start_time
  14. total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
  15. return abs(int(total_seconds / time_space)) + 1
  16. def save_percent(value, save_decimal=7):
  17. return round(value, save_decimal) * 100
  18. def read_and_select(file):
  19. result_df = pd.DataFrame()
  20. # wind_name = os.path.basename(file_path).split('.')[0]
  21. df = read_file_to_df(file)
  22. read_cols = list(df.columns)
  23. read_cols.remove('系统时间')
  24. read_cols.remove('风机号')
  25. wind_name = os.path.basename(file).replace('.csv', '')
  26. result_df['风机号'] = [wind_name]
  27. # df = df.query("(Time>='2024-06-01 00:00:00') & (Time<'2024-12-01 00:00:00')")
  28. df['系统时间'] = pd.to_datetime(df['系统时间'], errors='coerce')
  29. df = df[df['系统时间'] <= '2024-11-20 09:10:00']
  30. # count = get_time_space_count(df['系统时间'].min(), df['系统时间'].max(), 600)
  31. count = 59959
  32. print(df['系统时间'].min(), df['系统时间'].max(), count)
  33. repeat_time_count = df.shape[0] - len(df['系统时间'].unique())
  34. print(wind_name, count, repeat_time_count)
  35. result_df['重复率'] = [save_percent(repeat_time_count / count)]
  36. result_df['重复次数'] = [repeat_time_count]
  37. result_df['总记录数'] = [count]
  38. result_df['数据条数'] = [df.shape[0]]
  39. for read_col in read_cols:
  40. if read_col not in ['系统时间']:
  41. df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
  42. group_df = df.groupby(by=['风机号']).count()
  43. group_df.reset_index(inplace=True)
  44. count_df = pd.DataFrame(group_df)
  45. total_count = count_df[read_cols].values[0].sum()
  46. print(wind_name, total_count, count * len(read_cols))
  47. result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
  48. # result_df['缺失数值'] = [
  49. # '-'.join(
  50. # [str(read_cols[index]) + ':' + str(count - i) for index, i in enumerate(count_df[read_cols].values[0])])]
  51. del group_df
  52. fengsu_count = 0
  53. fengsu_cols = ['1秒平均风速[m/s]']
  54. fengsu_str = ''
  55. for col in fengsu_cols:
  56. now_count = df[(df[col] < 0) | (df[col] > 80)].shape[0]
  57. fengsu_count = fengsu_count + now_count
  58. fengsu_str = fengsu_str + ',' + col + ':' + str(fengsu_count)
  59. result_df['风速异常'] = [fengsu_str]
  60. gonglv_cols = ['有功功率[kW]']
  61. gonglv_count = 0
  62. gonglv_str = ''
  63. for col in gonglv_cols:
  64. now_count = df[(df[col] < -200) | (df[col] > 3000)].shape[0]
  65. gonglv_count = gonglv_count + now_count
  66. gonglv_str = gonglv_str + ',' + col + ':' + str(gonglv_count)
  67. result_df['功率异常'] = [gonglv_str]
  68. result_df['平均异常率'] = [
  69. save_percent((fengsu_count + fengsu_count) / ((len(fengsu_cols) + len(gonglv_cols)) * count))]
  70. return result_df
  71. def save_to_csv(df: pd.DataFrame, path):
  72. df.to_csv(path, encoding='utf8', index=False)
  73. if __name__ == '__main__':
  74. # read_cols = ['Time', '设备主要状态', '功率曲线风速', '湍流强度', '实际风速', '有功功率', '桨叶角度A', '桨叶角度B',
  75. # '桨叶角度C', '机舱内温度', '机舱外温度', '绝对风向', '机舱绝对位置', '叶轮转速', '发电机转速',
  76. # '瞬时风速',
  77. # '有功设定反馈', '当前理论可发最大功率', '空气密度', '偏航误差', '发电机扭矩', '瞬时功率', '风向1s',
  78. # '偏航压力', '桨叶1速度', '桨叶2速度', '桨叶3速度', '桨叶1角度给定', '桨叶2角度给定', '桨叶3角度给定',
  79. # '轴1电机电流', '轴2电机电流', '轴3电机电流', '轴1电机温度', '轴2电机温度', '轴3电机温度', '待机',
  80. # '启动',
  81. # '偏航', '并网', '限功率', '正常发电', '故障', '计入功率曲线', '运行发电机冷却风扇1',
  82. # '运行发电机冷却风扇2',
  83. # '激活偏航解缆阀', '激活偏航刹车阀', '激活风轮刹车阀', '激活顺时针偏航', '激活逆时针偏航', '电缆扭角']
  84. # select_cols = ['风机号', '系统时间', 'power', 'iwinfil', 'hubpos1', 'hubpos2', 'hubpos3', 'windir']
  85. # read_dir = r'D:\data\shouzi\qitaihe\十分钟数据'
  86. # files = read_excel_files(read_dir)
  87. # dfs = list()
  88. # with multiprocessing.Pool(len(files)) as pool:
  89. # dfs = pool.map(read_file_to_df, files)
  90. # df = pd.concat(dfs, ignore_index=True)
  91. # print(df.columns)
  92. # df['系统时间'] = pd.to_datetime(df['系统时间'], errors='coerce')
  93. # df['风机号'] = pd.to_numeric(df['sheet_name'], errors='coerce')
  94. # del df['sheet_name']
  95. #
  96. # wind_names = df['风机号'].unique()
  97. tmp_save_dir = r'D:\data\shouzi\qitaihe\tmp'
  98. # with multiprocessing.Pool(4) as pool:
  99. # pool.starmap(save_to_csv,
  100. # [(df[df['风机号'] == wind_name], os.path.join(tmp_save_dir, str(wind_name) + '.csv')) for wind_name
  101. # in
  102. # wind_names])
  103. #
  104. # del df
  105. all_fils = read_excel_files(tmp_save_dir)
  106. with multiprocessing.Pool(10) as pool:
  107. dfs = pool.starmap(read_and_select,
  108. [(file,) for file in all_fils])
  109. resu_df = pd.concat(dfs, ignore_index=True)
  110. print(resu_df.columns)
  111. resu_df.sort_values(by=['风机号'], inplace=True)
  112. resu_df.to_csv("七台河-10分.csv", encoding='utf8', index=False)