qinghai-nuomuhong.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. # -*- coding: utf-8 -*-
  2. """
  3. Spyder 编辑器
  4. 这是一个临时脚本文件。
  5. """
  6. import copy
  7. import datetime
  8. import multiprocessing
  9. import os
  10. import pandas as pd
  11. import numpy as np
  12. dianjian_str = """
  13. wind_turbine_number
  14. time_stamp 时间
  15. active_power 有功功率 kW
  16. rotor_speed 风轮转速 rpm
  17. generator_speed 发电机转速 rpm
  18. wind_velocity 风速 m/s
  19. pitch_angle_blade_1 叶片1角度 °
  20. pitch_angle_blade_2 叶片2角度 °
  21. pitch_angle_blade_3 叶片3角度 °
  22. cabin_position 机舱位置 °
  23. true_wind_direction
  24. yaw_error1 风向 °
  25. twisted_cable_angle
  26. main_bearing_temperature 主轴温度 ℃
  27. gearbox_oil_temperature 齿轮箱温度 ℃
  28. gearbox_low_speed_shaft_bearing_temperature 齿轮箱轴承温度 ℃
  29. gearboxmedium_speed_shaftbearing_temperature
  30. gearbox_high_speed_shaft_bearing_temperature 齿轮箱轴承温度2 ℃
  31. generatordrive_end_bearing_temperature 发电机驱动侧轴承温度 ℃
  32. generatornon_drive_end_bearing_temperature 发电机非驱动侧轴承温度 ℃
  33. cabin_temperature 机舱温度 ℃
  34. outside_cabin_temperature 舱外温度 ℃
  35. generator_winding1_temperature
  36. generator_winding2_temperature
  37. generator_winding3_temperature
  38. front_back_vibration_of_the_cabin
  39. side_to_side_vibration_of_the_cabin
  40. required_gearbox_speed
  41. inverter_speed_master_control
  42. actual_torque
  43. given_torque
  44. clockwise_yaw_count
  45. counterclockwise_yaw_count
  46. unusable
  47. power_curve_available
  48. set_value_of_active_power 有功功率设定 kW
  49. wind_turbine_status
  50. wind_turbine_status2
  51. turbulence_intensity
  52. """
  53. datas = [i for i in dianjian_str.split("\n") if i]
  54. dianjian_dict = dict()
  55. for data in datas:
  56. ds = data.split("\t")
  57. if len(ds) == 3:
  58. dianjian_dict[ds[0]] = ds[2]
  59. else:
  60. dianjian_dict[ds[0]] = ''
  61. def read_df(file_path):
  62. df = pd.read_csv(file_path, header=[0, 1])
  63. col_nams_map = dict()
  64. pre_col = ""
  65. for tuple_col in df.columns:
  66. col1 = tuple_col[0]
  67. col2 = tuple_col[1]
  68. if str(col1).startswith("Unnamed"):
  69. if pre_col:
  70. col1 = pre_col
  71. pre_col = ''
  72. else:
  73. col1 = ''
  74. else:
  75. pre_col = col1
  76. if str(col2).startswith("Unnamed"):
  77. col2 = ''
  78. col_nams_map[str(tuple_col)] = ''.join([col1, col2])
  79. for k, v in col_nams_map.items():
  80. if str(v).endswith('均值'):
  81. col_nams_map[k] = str(v)[:-2]
  82. df.columns = [str(col) for col in df.columns]
  83. df.rename(columns=col_nams_map, inplace=True)
  84. for col, name in dianjian_dict.items():
  85. if name in df.columns:
  86. df.rename(columns={name: col}, inplace=True)
  87. for col in df.columns:
  88. if col not in dianjian_dict.keys():
  89. del df[col]
  90. return df
  91. def get_wind_name_files(path):
  92. files = os.listdir(path)
  93. wind_files_map = dict()
  94. for file in files:
  95. full_file = os.path.join(path, file)
  96. file_datas = str(file).split("@")
  97. key = file_datas[0].replace("HD", "HD2")
  98. if key in wind_files_map.keys():
  99. wind_files_map[key].append(full_file)
  100. else:
  101. wind_files_map[key] = [full_file]
  102. return wind_files_map
  103. def combine_df(save_path, wind_name, files):
  104. begin = datetime.datetime.now()
  105. df = pd.DataFrame()
  106. for file in files:
  107. query_df = read_df(file)
  108. print("读取", file, query_df.shape)
  109. query_df['time_stamp'] = pd.to_datetime(query_df['time_stamp'])
  110. query_df.set_index(keys='time_stamp', inplace=True)
  111. query_df = query_df[~query_df.index.duplicated(keep='first')]
  112. if df.empty:
  113. df = copy.deepcopy(query_df)
  114. else:
  115. df = pd.concat([df, query_df], axis=1, join='inner')
  116. df.reset_index(inplace=True)
  117. df['wind_turbine_number'] = wind_name
  118. for col, name in dianjian_dict.items():
  119. if col not in df.columns:
  120. df[col] = np.nan
  121. df = df[dianjian_dict.keys()]
  122. df.to_csv(os.path.join(save_path, wind_name + ".csv"), encoding='utf-8', index=False)
  123. print(wind_name, '整理完成', '耗时:', (datetime.datetime.now() - begin).seconds)
  124. if __name__ == '__main__':
  125. read_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/sec'
  126. save_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/清理数据/sec'
  127. # read_path = r'D:\trans_data\诺木洪\收资数据\min'
  128. # save_path = r'D:\trans_data\诺木洪\清理数据\min'
  129. if not os.path.exists(save_path):
  130. os.makedirs(save_path, exist_ok=True)
  131. wind_files_map = get_wind_name_files(read_path)
  132. with multiprocessing.Pool(6) as pool:
  133. pool.starmap(combine_df, [(save_path, wind_name, files) for wind_name, files in wind_files_map.items()])