qitaihe_biaozhunhua.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. import datetime
  2. import multiprocessing
  3. import os
  4. import sys
  5. sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
  6. import pandas as pd
  7. from utils.file.trans_methods import read_file_to_df, read_excel_files
  8. def get_time_space_count(start_time, end_time, time_space=1):
  9. """
  10. 获取俩个时间之间的个数
  11. :return: 查询时间间隔
  12. """
  13. if isinstance(start_time, str):
  14. start_time = datetime.datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')
  15. if isinstance(end_time, str):
  16. end_time = datetime.datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S')
  17. delta = end_time - start_time
  18. total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
  19. return abs(int(total_seconds / time_space)) + 1
  20. def save_percent(value, save_decimal=7):
  21. return round(value, save_decimal) * 100
  22. def read_and_select(file):
  23. result_df = pd.DataFrame()
  24. # wind_name = os.path.basename(file_path).split('.')[0]
  25. df = pd.read_csv(file)
  26. df['systime'] = pd.to_datetime(df['systime'], errors='coerce')
  27. # condation1 = (df[df['systime'] >= '2024-11-12 00:00:00']) & (df[df['systime'] <= '2024-11-19 12:15:35'])
  28. # condation2 = (df[df['systime'] >= '2024-12-02 00:00:00']) & (df[df['systime'] <= '2024-12-31 23:59:55'])
  29. # condation3 = (df[df['systime'] >= '2025-01-01 00:00:00']) & (df[df['systime'] <= '2025-01-21 23:59:55'])
  30. # condation4 = (df[df['systime'] >= '2025-01-31 00:00:00']) & (df[df['systime'] <= '2025-02-04 23:59:55'])
  31. #
  32. # condation = condation1 | condation2 | condation3 | condation4
  33. #
  34. # df = df[condation]
  35. read_cols = list(df.columns)
  36. read_cols.remove('systime')
  37. read_cols.remove('wecnum')
  38. wind_name = os.path.basename(file).replace('.csv', '')
  39. result_df['wecnum'] = [wind_name]
  40. # df = df.query("(Time>='2024-06-01 00:00:00') & (Time<'2024-12-01 00:00:00')")
  41. count1 = get_time_space_count('2024-11-12 00:00:00', '2024-11-19 12:15:35', 5)
  42. count2 = get_time_space_count('2024-12-02 00:00:00', '2024-12-31 23:59:55', 5)
  43. count3 = get_time_space_count('2025-01-01 00:00:00', '2025-01-21 23:59:55', 5)
  44. count4 = get_time_space_count('2025-01-31 00:00:00', '2025-02-04 23:59:55', 5)
  45. count = sum([count1, count2, count3, count4])
  46. print(df['systime'].min(), df['systime'].max(), count)
  47. repeat_time_count = df.shape[0] - len(df['systime'].unique())
  48. print(wind_name, count, repeat_time_count)
  49. result_df['重复率'] = [save_percent(repeat_time_count / count)]
  50. result_df['重复次数'] = [repeat_time_count]
  51. result_df['总记录数'] = [count]
  52. for read_col in read_cols:
  53. if read_col not in ['systime', 'plcvernew', 'dmsver', 'scadaver', 'collectime']:
  54. df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
  55. group_df = df.groupby(by=['wecnum']).count()
  56. group_df.reset_index(inplace=True)
  57. count_df = pd.DataFrame(group_df)
  58. total_count = count_df[read_cols].values[0].sum()
  59. print(wind_name, total_count, count * len(read_cols))
  60. result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
  61. # result_df['缺失数值'] = [
  62. # '-'.join(
  63. # [str(read_cols[index]) + ':' + str(count - i) for index, i in enumerate(count_df[read_cols].values[0])])]
  64. del group_df
  65. fengsu_count = 0
  66. fengsu_cols = ['iwinfil']
  67. fengsu_str = ''
  68. for col in fengsu_cols:
  69. now_count = df[(df[col] < 0) | (df[col] > 80)].shape[0]
  70. fengsu_count = fengsu_count + now_count
  71. fengsu_str = fengsu_str + ',' + col + ':' + str(fengsu_count)
  72. result_df['风速异常'] = [fengsu_str]
  73. gonglv_cols = ['power']
  74. gonglv_count = 0
  75. gonglv_str = ''
  76. for col in gonglv_cols:
  77. now_count = df[(df[col] < -200) | (df[col] > 3000)].shape[0]
  78. gonglv_count = gonglv_count + now_count
  79. gonglv_str = gonglv_str + ',' + col + ':' + str(gonglv_count)
  80. result_df['功率异常'] = [gonglv_str]
  81. result_df['平均异常率'] = [
  82. save_percent((fengsu_count + fengsu_count) / ((len(fengsu_cols) + len(gonglv_cols)) * count))]
  83. return result_df
  84. def save_to_csv(df: pd.DataFrame, path):
  85. df.to_csv(path, encoding='utf8', index=False)
  86. def read_and_select_time(file):
  87. df = pd.read_csv(file, usecols=['collectime'])
  88. df['collectime'] = pd.to_datetime(df['collectime'])
  89. df1 = df[(df['collectime'] >= '2024-11-12 00:00:00') & (df['collectime'] <= '2024-11-19 23:59:59')]
  90. df2 = df[(df['collectime'] >= '2024-12-02 00:00:00') & (df['collectime'] <= '2024-12-31 23:59:59')]
  91. df3 = df[(df['collectime'] >= '2025-01-01 00:00:00') & (df['collectime'] <= '2025-01-21 23:59:59')]
  92. df4 = df[(df['collectime'] >= '2025-01-31 00:00:00') & (df['collectime'] <= '2025-02-04 23:59:59')]
  93. return [(df1['collectime'].min(), df1['collectime'].max()), (df2['collectime'].min(), df2['collectime'].max()),
  94. (df3['collectime'].min(), df3['collectime'].max()), (df4['collectime'].min(), df4['collectime'].max())]
  95. if __name__ == '__main__':
  96. # read_cols = ['Time', '设备主要状态', '功率曲线风速', '湍流强度', '实际风速', '有功功率', '桨叶角度A', '桨叶角度B',
  97. # '桨叶角度C', '机舱内温度', '机舱外温度', '绝对风向', '机舱绝对位置', '叶轮转速', '发电机转速',
  98. # '瞬时风速',
  99. # '有功设定反馈', '当前理论可发最大功率', '空气密度', '偏航误差', '发电机扭矩', '瞬时功率', '风向1s',
  100. # '偏航压力', '桨叶1速度', '桨叶2速度', '桨叶3速度', '桨叶1角度给定', '桨叶2角度给定', '桨叶3角度给定',
  101. # '轴1电机电流', '轴2电机电流', '轴3电机电流', '轴1电机温度', '轴2电机温度', '轴3电机温度', '待机',
  102. # '启动',
  103. # '偏航', '并网', '限功率', '正常发电', '故障', '计入功率曲线', '运行发电机冷却风扇1',
  104. # '运行发电机冷却风扇2',
  105. # '激活偏航解缆阀', '激活偏航刹车阀', '激活风轮刹车阀', '激活顺时针偏航', '激活逆时针偏航', '电缆扭角']
  106. # select_cols = ['wecnum', 'systime', 'power', 'iwinfil', 'hubpos1', 'hubpos2', 'hubpos3', 'windir']
  107. # read_dir = r'/data/download/collection_data/1进行中/七台河风电场-黑龙江-华电/收资数据/七台河/秒级数据/sec'
  108. # files = read_excel_files(read_dir)
  109. # dfs = list()
  110. # with multiprocessing.Pool(33) as pool:
  111. # dfs = pool.map(read_file_to_df, files)
  112. # df = pd.concat(dfs, ignore_index=True)
  113. # print(df.columns)
  114. # df['systime'] = pd.to_datetime(df['systime'], errors='coerce')
  115. # df['wecnum'] = pd.to_numeric(df['wecnum'], errors='coerce')
  116. # read_cols = list(df.columns)
  117. # read_cols.remove('systime')
  118. # read_cols.remove('wecnum')
  119. #
  120. # wind_names = df['wecnum'].unique()
  121. tmp_save_dir = r'/home/wzl/test_data/qitaihe/sec'
  122. # with multiprocessing.Pool(4) as pool:
  123. # pool.starmap(save_to_csv,
  124. # [(df[df['wecnum'] == wind_name], os.path.join(tmp_save_dir, str(wind_name) + '.csv')) for wind_name
  125. # in
  126. # wind_names])
  127. #
  128. # del df
  129. all_fils = read_excel_files(tmp_save_dir)
  130. with multiprocessing.Pool(10) as pool:
  131. dfs = pool.starmap(read_and_select,
  132. [(file,) for file in all_fils])
  133. resu_df = pd.concat(dfs, ignore_index=True)
  134. print(resu_df.columns)
  135. resu_df.sort_values(by=['wecnum'], inplace=True)
  136. resu_df.to_csv("七台河-5秒.csv", encoding='utf8', index=False)
  137. # with multiprocessing.Pool(10) as pool:
  138. # datas = pool.map(read_and_select_time, all_fils)
  139. #
  140. # min1 = list()
  141. # max1 = list()
  142. #
  143. # min2 = list()
  144. # max2 = list()
  145. #
  146. # min3 = list()
  147. # max3 = list()
  148. #
  149. # min4 = list()
  150. # max4 = list()
  151. #
  152. # for data in datas:
  153. # print(data)
  154. # data1, data2, data3, data4 = data[0], data[1], data[2], data[3]
  155. # min1.append(data1[0])
  156. # max1.append(data1[1])
  157. #
  158. # min2.append(data2[0])
  159. # max2.append(data2[1])
  160. #
  161. # min3.append(data3[0])
  162. # max3.append(data3[1])
  163. #
  164. # min4.append(data4[0])
  165. # max4.append(data4[1])
  166. #
  167. # print(min(min1), max(max1))
  168. # print(min(min2), max(max2))
  169. # print(min(min3), max(max3))
  170. # print(min(min4), max(max4))