qinghai-nuomuhong.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. # -*- coding: utf-8 -*-
  2. """
  3. Spyder 编辑器
  4. 这是一个临时脚本文件。
  5. """
  6. import copy
  7. import datetime
  8. import multiprocessing
  9. from os import *
  10. import numpy as np
  11. import pandas as pd
  12. dianjian_str = """
  13. wind_turbine_number
  14. time_stamp 时间
  15. active_power 有功功率 kW
  16. rotor_speed 风轮转速 rpm
  17. generator_speed 发电机转速 rpm
  18. wind_velocity 风速 m/s
  19. pitch_angle_blade_1 叶片1角度 °
  20. pitch_angle_blade_2 叶片2角度 °
  21. pitch_angle_blade_3 叶片3角度 °
  22. cabin_position 机舱位置 °
  23. true_wind_direction
  24. yaw_error1 风向 °
  25. twisted_cable_angle
  26. main_bearing_temperature 主轴温度 ℃
  27. gearbox_oil_temperature 齿轮箱温度 ℃
  28. gearbox_low_speed_shaft_bearing_temperature 齿轮箱轴承温度 ℃
  29. gearboxmedium_speed_shaftbearing_temperature
  30. gearbox_high_speed_shaft_bearing_temperature 齿轮箱轴承温度2 ℃
  31. generatordrive_end_bearing_temperature 发电机驱动侧轴承温度 ℃
  32. generatornon_drive_end_bearing_temperature 发电机非驱动侧轴承温度 ℃
  33. cabin_temperature 机舱温度 ℃
  34. outside_cabin_temperature 舱外温度 ℃
  35. generator_winding1_temperature
  36. generator_winding2_temperature
  37. generator_winding3_temperature
  38. front_back_vibration_of_the_cabin
  39. side_to_side_vibration_of_the_cabin
  40. required_gearbox_speed
  41. inverter_speed_master_control
  42. actual_torque
  43. given_torque
  44. clockwise_yaw_count
  45. counterclockwise_yaw_count
  46. unusable
  47. power_curve_available
  48. set_value_of_active_power 有功功率设定 kW
  49. wind_turbine_status
  50. wind_turbine_status2
  51. turbulence_intensity
  52. """
  53. datas = [i for i in dianjian_str.split("\n") if i]
  54. dianjian_dict = dict()
  55. for data in datas:
  56. ds = data.split("\t")
  57. if len(ds) == 3:
  58. dianjian_dict[ds[0]] = ds[2]
  59. else:
  60. dianjian_dict[ds[0]] = ''
  61. def read_df(file_path):
  62. df = pd.read_csv(file_path, header=[0, 1])
  63. col_nams_map = dict()
  64. pre_col = ""
  65. for tuple_col in df.columns:
  66. col1 = tuple_col[0]
  67. col2 = tuple_col[1]
  68. if str(col1).startswith("Unnamed"):
  69. if pre_col:
  70. col1 = pre_col
  71. pre_col = ''
  72. else:
  73. col1 = ''
  74. else:
  75. pre_col = col1
  76. if str(col2).startswith("Unnamed"):
  77. col2 = ''
  78. col_nams_map[str(tuple_col)] = ''.join([col1, col2])
  79. print(col_nams_map)
  80. for k, v in col_nams_map.items():
  81. if str(v).endswith('采样值'):
  82. col_nams_map[k] = str(v)[:-3]
  83. df.columns = [str(col) for col in df.columns]
  84. df.rename(columns=col_nams_map, inplace=True)
  85. for col, name in dianjian_dict.items():
  86. if name in df.columns:
  87. df.rename(columns={name: col}, inplace=True)
  88. for col in df.columns:
  89. if col not in dianjian_dict.keys():
  90. del df[col]
  91. return df
  92. def get_wind_name_files(path):
  93. files = listdir(path)
  94. wind_files_map = dict()
  95. for file in files:
  96. full_file = path.join(path, file)
  97. file_datas = str(file).split("@")
  98. key = file_datas[0].replace("HD", "HD2")
  99. if key in wind_files_map.keys():
  100. wind_files_map[key].append(full_file)
  101. else:
  102. wind_files_map[key] = [full_file]
  103. return wind_files_map
  104. def combine_df(save_path, wind_name, files):
  105. begin = datetime.datetime.now()
  106. df = pd.DataFrame()
  107. for file in files:
  108. query_df = read_df(file)
  109. print("读取", file, query_df.shape)
  110. query_df['time_stamp'] = pd.to_datetime(query_df['time_stamp'])
  111. query_df.set_index(keys='time_stamp', inplace=True)
  112. query_df = query_df[~query_df.index.duplicated(keep='first')]
  113. if df.empty:
  114. df = copy.deepcopy(query_df)
  115. else:
  116. df = pd.concat([df, query_df], join='inner')
  117. df.reset_index(inplace=True)
  118. df['wind_turbine_number'] = wind_name
  119. for col, name in dianjian_dict.items():
  120. if col not in df.columns:
  121. df[col] = np.nan
  122. df = df[dianjian_dict.keys()]
  123. df.to_csv(path.join(save_path, wind_name + ".csv"), encoding='utf-8', index=False)
  124. print(wind_name, '整理完成', '耗时:', (datetime.datetime.now() - begin).seconds)
  125. if __name__ == '__main__':
  126. read_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/sec'
  127. save_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/sec_采样值'
  128. # read_path = r'D:\trans_data\诺木洪\收资数据\min'
  129. # save_path = r'D:\trans_data\诺木洪\清理数据\min'
  130. if not path.exists(save_path):
  131. makedirs(save_path, exist_ok=True)
  132. wind_files_map = get_wind_name_files(read_path)
  133. with multiprocessing.Pool(20) as pool:
  134. pool.starmap(combine_df, [(save_path, wind_name, files) for wind_name, files in wind_files_map.items()])