data_clean.py 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061
  1. import os
  2. import json
  3. import pandas as pd
  4. import numpy as np
  5. import matplotlib.pyplot as plt
  6. from typing import Tuple, List
  7. import warnings
  8. import sys
  9. import frequency_filter as ff
  10. from datetime import datetime
  11. from scipy.optimize import least_squares, differential_evolution
  12. from scipy.signal import savgol_filter
  13. warnings.filterwarnings("ignore", category=FutureWarning)
  14. plt.rcParams['font.sans-serif'] = ['SimHei']
  15. plt.rcParams['axes.unicode_minus'] = False
  16. def result_main():
  17. python_interpreter_path = sys.executable
  18. project_directory = os.path.dirname(python_interpreter_path)
  19. data_folder = os.path.join(project_directory, 'data')
  20. if not os.path.exists(data_folder):
  21. os.makedirs(data_folder)
  22. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  23. if not os.path.exists(csv_file_path):
  24. pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
  25. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  26. '叶片1净空值', '叶片2净空值', '叶片3净空值',
  27. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  28. '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
  29. return csv_file_path
  30. def delete_data(name):
  31. python_interpreter_path = sys.executable
  32. project_directory = os.path.dirname(python_interpreter_path)
  33. data_folder = os.path.join(project_directory, 'data')
  34. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  35. df = pd.read_csv(csv_file_path)
  36. condition = ((df['时间'].astype(str).str.contains(name[0])) &
  37. (df['场站'].astype(str).str.contains(name[1])) &
  38. (df['风机编号'].astype(str).str.contains(name[2])))
  39. df = df[~condition]
  40. df.to_csv(csv_file_path, index=False)
  41. return csv_file_path
  42. def history_data(name):
  43. time_code = name[0]
  44. wind_name = name[1]
  45. turbine_code = name[2]
  46. python_interpreter_path = sys.executable
  47. project_directory = os.path.dirname(python_interpreter_path)
  48. data_folder = os.path.join(project_directory, 'data')
  49. time_code_cleaned = time_code.replace("-", "").replace(":", "").replace(" ", "")
  50. json_filename = f"{wind_name}_{turbine_code}_{time_code_cleaned}.json"
  51. json_file_path = os.path.join(data_folder, json_filename)
  52. if not os.path.exists(json_file_path):
  53. raise ValueError("文件不存在")
  54. with open(json_file_path, 'r') as f:
  55. data = json.load(f)
  56. return data
  57. def data_analyse(path: List[str]):
  58. locate_file = path[0]
  59. measure_file = path[1]
  60. noise_reduction = 0.000001
  61. min_difference = 1.5
  62. angle_cone = float(path[2])
  63. axial_inclination = float(path[3])
  64. lift_up_limit = float(path[4])
  65. group_length = [10000, 20000, 5000]
  66. return_list = []
  67. wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen = find_param(locate_file)
  68. wind_name_1, turbine_code_1, time_code_1, sampling_fq_1, angle_flange, angle_root = find_param(measure_file)
  69. sampling_fq_1 = sampling_fq_1 * 1000
  70. sampling_fq = sampling_fq * 1000
  71. data_nan, data_cen = process_data(locate_file)
  72. data_flange, data_root = process_data(measure_file)
  73. if lift_up_limit >= 0.1:
  74. discrete_values = np.arange(0, 0.101, 0.001)
  75. condition = data_flange['distance'] > lift_up_limit
  76. n = condition.sum()
  77. random_discrete = np.random.choice(discrete_values, size=n)
  78. data_flange.loc[condition, 'distance'] = lift_up_limit + 3 + random_discrete
  79. elif np.abs(lift_up_limit) < 0.1:
  80. pass
  81. else:
  82. raise ValueError("lift_up_limit error.")
  83. start_flange, end_flange, filtered_data_flange = cycle_calculate(data_flange, noise_reduction, min_difference)
  84. start_root, end_root, filtered_data_root = cycle_calculate(data_root, noise_reduction, min_difference)
  85. start_nan, end_nan, filtered_data_nan = cycle_calculate(data_nan, noise_reduction, min_difference)
  86. filtered_data_cen = tower_filter(data_cen, noise_reduction)
  87. dist_cen = np.mean(filtered_data_cen.iloc[:, 1].tolist())
  88. filtered_data_cen.iloc[:, 1] = filtered_data_cen.iloc[:, 1] * np.cos(np.deg2rad(angle_cen + axial_inclination))
  89. tower_dist_flange = ff.tower_cal(filtered_data_flange, start_flange, end_flange, sampling_fq_1)
  90. tower_dist_root = ff.tower_cal(filtered_data_root, start_root, end_root, sampling_fq_1)
  91. tower_dist_nan = ff.tower_cal(filtered_data_nan, start_nan, end_nan, sampling_fq)
  92. lowpass_data, fft_x, fft_y, tower_freq, tower_max = ff.process_fft(filtered_data_cen, sampling_fq)
  93. result_line_flange, result_scatter_flange, border_rows_flange, cycle_len_flange, min_flange \
  94. = data_normalize(filtered_data_flange, start_flange, end_flange, group_length[0])
  95. result_line_root, result_scatter_root, border_rows_root, cycle_len_root, min_root \
  96. = data_normalize(filtered_data_root, start_root, end_root, group_length[1])
  97. result_line_nan, result_scatter_nan, border_rows_nan, cycle_len_nan, min_nan \
  98. = data_normalize(filtered_data_nan, start_nan, end_nan, group_length[2])
  99. result_avg_flange, result_diff_flange = blade_shape(result_line_flange)
  100. result_avg_root, result_diff_root = blade_shape(result_line_root)
  101. border_rows_flange_new, angle_flange_new = coordinate_normalize(border_rows_flange, angle_flange)
  102. border_rows_nan_new, angle_nan_new = coordinate_normalize(border_rows_nan, angle_nan)
  103. flange_ava = pd.concat([df['distance'] for df in border_rows_flange_new]).mean(numeric_only=True).mean()
  104. root_ava = pd.concat([df['distance'] for df in border_rows_root]).mean(numeric_only=True).mean()
  105. d_radius = np.abs((flange_ava * np.cos(np.deg2rad(angle_flange_new))
  106. - root_ava * np.cos(np.deg2rad(angle_root))) * np.sin(np.deg2rad(axial_inclination))
  107. + (flange_ava * np.sin(np.deg2rad(angle_flange_new))
  108. - root_ava * np.sin(np.deg2rad(angle_root))) * np.cos(np.deg2rad(axial_inclination)))
  109. flange_root_dist = np.sqrt(flange_ava ** 2 + root_ava ** 2 - 2 * flange_ava * root_ava * np.cos(np.deg2rad(angle_flange_new - angle_root)))
  110. blade_axis = blade_axis_cal(filtered_data_flange, start_flange, end_flange,
  111. angle_flange + angle_cone + axial_inclination)
  112. blade_axis_new, angle_flange_new = flange_coordinate_normalize(blade_axis, angle_flange)
  113. flange_r = radius_cal(border_rows_flange_new, angle_flange_new, dist_cen, angle_cen, axial_inclination, angle_cone)
  114. root_r = radius_cal(border_rows_root, angle_root, dist_cen, angle_cen, axial_inclination, angle_cone)
  115. nan_r = radius_cal(border_rows_nan_new, angle_nan_new, dist_cen, angle_cen, axial_inclination, angle_cone)
  116. if np.abs((root_r - flange_r) - d_radius) > 0.5:
  117. raise ValueError("Radius err1.")
  118. if np.abs(flange_root_dist - d_radius) > 0.5:
  119. raise ValueError("Radius err2.")
  120. blade_axis_new["中心y"] = blade_axis_new["中心y"] - (flange_ava - root_ava)
  121. aero_dist_flange, v_speed_flange, cen_blade_flange = (
  122. blade_angle_aero_dist(border_rows_flange, flange_r, cycle_len_flange, tower_dist_flange, angle_flange_new))
  123. aero_dist_root, v_speed_root, cen_blade_root = (
  124. blade_angle_aero_dist(border_rows_root, root_r, cycle_len_root, tower_dist_root, angle_root))
  125. aero_dist_nan, v_speed_nan, cen_blade_nan = (
  126. blade_angle_aero_dist(border_rows_nan_new, nan_r, cycle_len_nan, tower_dist_nan, angle_nan_new))
  127. pitch_angle_root, v_speed_root = (
  128. blade_angle(border_rows_root, blade_axis_new, root_r, cycle_len_root, angle_root + axial_inclination))
  129. blade_axis_new["中心y"] = blade_axis_new["中心y"]*np.cos(np.deg2rad(angle_root + angle_cone + axial_inclination))
  130. cen_blade_flange_array = np.array(cen_blade_flange)
  131. cen_blade_nan_array = np.array(cen_blade_nan)
  132. min_flange_array = np.array(min_flange)
  133. min_nan_array = np.array(min_nan)
  134. abs_diff = np.abs(cen_blade_flange_array - min_flange_array)
  135. abs_diff_nan = np.abs(cen_blade_nan_array - min_nan_array)
  136. blade_dist_flange = abs_diff * np.cos(np.deg2rad(angle_flange_new))
  137. blade_dist_nan = abs_diff_nan * np.cos(np.deg2rad(angle_nan_new))
  138. blade_dist_flange.tolist()
  139. blade_dist_nan.tolist()
  140. dist_distribute_nan = blade_dist_distribute_cal(filtered_data_nan, start_nan, end_nan,
  141. tower_dist_nan, angle_nan_new, blade_dist_nan)
  142. dist_distribute = [df.round(5) for df in dist_distribute_nan]
  143. min_values = []
  144. min_keys = []
  145. max_values = []
  146. max_keys = []
  147. mean_values = []
  148. for df in dist_distribute:
  149. second_col_min = df[df.columns[1]].min()
  150. second_col_max = df[df.columns[1]].max()
  151. min_row = df[df[df.columns[1]] == second_col_min]
  152. max_row = df[df[df.columns[1]] == second_col_max]
  153. min_values.append(round(second_col_min, 2))
  154. min_keys.append(round(min_row.iloc[0][df.columns[0]], 2))
  155. max_values.append(round(second_col_max, 2))
  156. max_keys.append(round(max_row.iloc[0][df.columns[0]], 2))
  157. for i in range(3):
  158. mean_values.append(round((max_values[i] + min_values[i]) / 2, 2))
  159. for df in result_line_flange:
  160. first_column = df.iloc[:, 0]
  161. sec_column = df.iloc[:, 1]
  162. df.iloc[:, 0] = first_column * v_speed_flange
  163. min_time = df.iloc[:, 0].min()
  164. df.iloc[:, 0] -= min_time
  165. df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_flange_new + angle_cone + axial_inclination))
  166. for df in result_line_root:
  167. first_column = df.iloc[:, 0]
  168. sec_column = df.iloc[:, 1]
  169. df.iloc[:, 0] = first_column * v_speed_root
  170. min_time = df.iloc[:, 0].min()
  171. df.iloc[:, 0] -= min_time
  172. df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_root + angle_cone + axial_inclination))
  173. avg_flange = result_avg_flange.iloc[:, 0]
  174. result_avg_flange.iloc[:, 0] = avg_flange * v_speed_flange
  175. avg_root = result_avg_root.iloc[:, 0]
  176. result_avg_root.iloc[:, 0] = avg_root * v_speed_root
  177. pitch_angle_flange = [0, 0, 0]
  178. twist_1 = round(np.abs(pitch_angle_root[0] - pitch_angle_flange[0]), 2)
  179. twist_2 = round(np.abs(pitch_angle_root[1] - pitch_angle_flange[1]), 2)
  180. twist_3 = round(np.abs(pitch_angle_root[2] - pitch_angle_flange[2]), 2)
  181. twist_avg = round((twist_1 + twist_2 + twist_3) / 3, 2)
  182. sampling_num = int(0.015 * sampling_fq_1)
  183. data_flange.iloc[:, 0] = data_flange.iloc[:, 0] / 5000000
  184. data_root.iloc[:, 0] = data_root.iloc[:, 0] / 5000000
  185. lowpass_data.iloc[:, 0] = lowpass_data.iloc[:, 0] / 5000000
  186. return_list.append(str(time_code))
  187. return_list.append(str(wind_name))
  188. return_list.append(str(turbine_code))
  189. return_list.append(sampling_fq_1)
  190. return_list.append(pitch_angle_root[0])
  191. return_list.append(pitch_angle_root[1])
  192. return_list.append(pitch_angle_root[2])
  193. return_list.append(pitch_angle_root[3])
  194. return_list.append(mean_values[0])
  195. return_list.append(mean_values[1])
  196. return_list.append(mean_values[2])
  197. return_list.append(twist_1)
  198. return_list.append(twist_2)
  199. return_list.append(twist_3)
  200. return_list.append(twist_avg)
  201. return_list.append(tower_max)
  202. return_list.append(tower_freq)
  203. df_new_row = pd.DataFrame([return_list],
  204. columns=['时间', '场站', '风机编号', '采样频率',
  205. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  206. '叶片1净空值', '叶片2净空值', '叶片3净空值',
  207. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  208. '振动幅值', '振动主频'])
  209. json_output = {
  210. 'original_plot': {
  211. 'blade_tip': {
  212. 'xdata': data_flange.iloc[:, 0].tolist()[::sampling_num],
  213. 'ydata': data_flange.iloc[:, 1].tolist()[::sampling_num]
  214. },
  215. 'blade_root': {
  216. 'xdata': data_root.iloc[:, 0].tolist()[::sampling_num],
  217. 'ydata': data_root.iloc[:, 1].tolist()[::sampling_num]
  218. }
  219. },
  220. 'fft_plot': {
  221. 'lowpass': {
  222. 'xdata': lowpass_data['time'].tolist()[::sampling_num],
  223. 'ydata': lowpass_data['distance_filtered'].tolist()[::sampling_num],
  224. 'xmax': max(lowpass_data['time'].tolist()),
  225. 'xmin': min(lowpass_data['time'].tolist()),
  226. 'ymax': max(lowpass_data['distance_filtered'].tolist()) + 0.02,
  227. 'ymin': min(lowpass_data['distance_filtered'].tolist()) - 0.02
  228. },
  229. 'fft': {
  230. 'xdata': fft_x,
  231. 'ydata': fft_y,
  232. 'xmax': max(fft_x),
  233. 'xmin': min(fft_x),
  234. 'ymax': max(fft_y) + 0.02,
  235. 'ymin': 0
  236. }
  237. },
  238. 'blade_tip': {
  239. 'first_blade': {
  240. 'xdata': result_line_flange[0].iloc[:, 0].tolist(),
  241. 'ydata': result_line_flange[0].iloc[:, 1].tolist()
  242. },
  243. 'second_blade': {
  244. 'xdata': result_line_flange[1].iloc[:, 0].tolist(),
  245. 'ydata': result_line_flange[1].iloc[:, 1].tolist()
  246. },
  247. 'third_blade': {
  248. 'xdata': result_line_flange[2].iloc[:, 0].tolist(),
  249. 'ydata': result_line_flange[2].iloc[:, 1].tolist()
  250. },
  251. 'avg_blade': {
  252. 'xdata': result_avg_flange.iloc[:, 0].tolist(),
  253. 'ydata': result_avg_flange.iloc[:, 1].tolist()
  254. }
  255. },
  256. 'blade_root': {
  257. 'first_blade': {
  258. 'xdata': result_line_root[0].iloc[:, 0].tolist(),
  259. 'ydata': result_line_root[0].iloc[:, 1].tolist()
  260. },
  261. 'second_blade': {
  262. 'xdata': result_line_root[1].iloc[:, 0].tolist(),
  263. 'ydata': result_line_root[1].iloc[:, 1].tolist()
  264. },
  265. 'third_blade': {
  266. 'xdata': result_line_root[2].iloc[:, 0].tolist(),
  267. 'ydata': result_line_root[2].iloc[:, 1].tolist()
  268. },
  269. 'avg_blade': {
  270. 'xdata': result_avg_root.iloc[:, 0].tolist(),
  271. 'ydata': result_avg_root.iloc[:, 1].tolist()
  272. }
  273. },
  274. 'dist_distribution': {
  275. 'first_blade': {
  276. 'xdata': dist_distribute[0].iloc[:, 0].tolist(),
  277. 'ydata': dist_distribute[0].iloc[:, 1].tolist()
  278. },
  279. 'second_blade': {
  280. 'xdata': dist_distribute[1].iloc[:, 0].tolist(),
  281. 'ydata': dist_distribute[1].iloc[:, 1].tolist()
  282. },
  283. 'third_blade': {
  284. 'xdata': dist_distribute[2].iloc[:, 0].tolist(),
  285. 'ydata': dist_distribute[2].iloc[:, 1].tolist()
  286. }
  287. },
  288. 'analyse_table': {
  289. 'pitch_angle_diff': {
  290. 'blade_1': pitch_angle_root[0],
  291. 'blade_2': pitch_angle_root[1],
  292. 'blade_3': pitch_angle_root[2],
  293. 'blade_relate': pitch_angle_root[3]
  294. },
  295. 'aero_dist': {
  296. 'first_blade': {
  297. 'x_min': min_keys[0],
  298. 'y_min': min_values[0],
  299. 'x_max': max_keys[0],
  300. 'y_max': max_values[0],
  301. 'y_diff': np.abs(max_values[0] - min_values[0]),
  302. 'y_ava': mean_values[0]
  303. },
  304. 'second_blade': {
  305. 'x_min': min_keys[1],
  306. 'y_min': min_values[1],
  307. 'x_max': max_keys[1],
  308. 'y_max': max_values[1],
  309. 'y_diff': np.abs(max_values[1] - min_values[1]),
  310. 'y_ava': mean_values[1]
  311. },
  312. 'third_blade': {
  313. 'x_min': min_keys[2],
  314. 'y_min': min_values[2],
  315. 'x_max': max_keys[2],
  316. 'y_max': max_values[2],
  317. 'y_diff': np.abs(max_values[2] - min_values[2]),
  318. 'y_ava': mean_values[2]
  319. }
  320. },
  321. 'blade_twist': {
  322. 'blade_1': twist_1,
  323. 'blade_2': twist_2,
  324. 'blade_3': twist_3,
  325. 'blade_avg': twist_avg
  326. },
  327. 'tower_vibration': {
  328. 'max_vibration': tower_max,
  329. 'main_vibration_freq': tower_freq
  330. }
  331. }
  332. }
  333. python_interpreter_path = sys.executable
  334. project_directory = os.path.dirname(python_interpreter_path)
  335. data_folder = os.path.join(project_directory, 'data')
  336. if not os.path.exists(data_folder):
  337. os.makedirs(data_folder)
  338. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  339. if not os.path.exists(csv_file_path):
  340. pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
  341. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  342. '叶片1净空值', '叶片2净空值', '叶片3净空值',
  343. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  344. '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
  345. df_new_row.to_csv(csv_file_path, mode='a', header=False, index=False)
  346. time_code_cleaned = time_code.replace("-", "").replace(":", "").replace(" ", "")
  347. json_filename = f"{wind_name}_{turbine_code}_{time_code_cleaned}.json"
  348. json_file_path = os.path.join(data_folder, json_filename)
  349. with open(json_file_path, 'w') as json_file:
  350. json.dump(json_output, json_file, indent=4)
  351. return json_output
  352. def process_data(file_path):
  353. data = pd.read_csv(file_path, usecols=[1, 3, 4, 8, 9], header=None, engine='c')
  354. data = data.head(int(len(data) * 0.95))
  355. max_value = data.iloc[:, 0].max()
  356. max_index = data.iloc[:, 0].idxmax()
  357. min_index = data.iloc[:, 0].idxmin()
  358. if min_index == max_index + 1:
  359. data.iloc[min_index:, 0] += max_value
  360. last_time = data.iloc[-1, 0]
  361. first_time = data.iloc[0, 0]
  362. data = data[data.iloc[:, 0] >= first_time]
  363. data = data[data.iloc[:, 0] <= last_time]
  364. data.reset_index(drop=True, inplace=True)
  365. min_time = data.iloc[:, 0].min()
  366. data.iloc[:, 0] -= min_time
  367. data_1 = data.iloc[:, [0, 1, 2]]
  368. data_2 = data.iloc[:, [0, 3, 4]]
  369. data_1.columns = ['time', 'distance', 'grey']
  370. data_2.columns = ['time', 'distance', 'grey']
  371. return data_1, data_2
  372. def tower_filter(data_group: pd.DataFrame, noise_threshold: float):
  373. distance_counts = data_group['distance'].value_counts(normalize=True)
  374. noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
  375. noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
  376. data_group.loc[noise_indices, 'distance'] = np.nan
  377. top_5_distances = distance_counts.head(5).index
  378. mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
  379. data_group.loc[(data_group['distance'] < mean_values - 20) | (
  380. data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
  381. data_group['distance'] = data_group['distance'].fillna(method='ffill')
  382. filtered_data = data_group
  383. return filtered_data
  384. def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distance: float):
  385. distance_counts = data_group['distance'].value_counts(normalize=True)
  386. noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
  387. noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
  388. data_group.loc[noise_indices, 'distance'] = np.nan
  389. if 0 in distance_counts.nlargest(3).index:
  390. pass
  391. else:
  392. # 选择频率最大的5个值
  393. top_5_distances = distance_counts.head(5).index
  394. mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
  395. data_group.loc[(data_group['distance'] < mean_values - 30) | (
  396. data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
  397. data_group['distance'] = data_group['distance'].fillna(method='ffill')
  398. filtered_data = data_group
  399. filtered_data['distance_diff'] = filtered_data['distance'].diff()
  400. large_diff_indices = filtered_data[filtered_data['distance_diff'] > min_distance].index
  401. small_diff_indices = filtered_data[filtered_data['distance_diff'] < -min_distance].index
  402. filtered_data = filtered_data.drop(columns=['distance_diff'])
  403. start_points = pd.DataFrame()
  404. end_points = pd.DataFrame()
  405. for idx in large_diff_indices:
  406. current_distance = filtered_data.loc[idx, 'distance']
  407. next_rows_large = filtered_data.loc[idx - 500: idx - 1]
  408. if next_rows_large['distance'].le(current_distance - min_distance).all():
  409. end_points = pd.concat([end_points, filtered_data.loc[[idx - 1]]])
  410. for idx in small_diff_indices:
  411. current_distance = filtered_data.loc[idx - 1, 'distance']
  412. next_rows_small = filtered_data.iloc[idx: idx + 500]
  413. if next_rows_small['distance'].le(current_distance - min_distance).all():
  414. start_points = pd.concat([start_points, filtered_data.loc[[idx]]])
  415. if 0 in distance_counts.nlargest(3).index:
  416. end_points, start_points = start_points, end_points
  417. if end_points.iloc[0, 0] < start_points.iloc[0, 0]:
  418. end_points = end_points.drop(end_points.index[0])
  419. if end_points.iloc[-1, 0] < start_points.iloc[-1, 0]:
  420. start_points = start_points.drop(start_points.index[-1])
  421. else:
  422. pass
  423. return start_points, end_points, filtered_data
  424. def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame, group_len: int) \
  425. -> Tuple[List[pd.DataFrame], List[pd.DataFrame], List[pd.DataFrame], int, list]:
  426. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  427. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  428. combined_df_sorted = combined_df_sorted.iloc[1:]
  429. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  430. combined_df_sorted = combined_df_sorted.iloc[:-1]
  431. combined_df_sorted.reset_index(drop=True, inplace=True)
  432. start_times = combined_df_sorted['time'].tolist()
  433. normalize_cycle = start_times[1] - start_times[0]
  434. full_cycle = int((start_times[2] - start_times[0]) * 3)
  435. turbines = [pd.DataFrame() for _ in range(3)]
  436. for i in range(0, len(start_times), 2):
  437. start_time = start_times[i]
  438. end_time = start_times[i + 1]
  439. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  440. if segment is None:
  441. pass
  442. else:
  443. ratio = (end_time - start_time) / normalize_cycle
  444. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  445. turbines[i % 3] = pd.concat([turbines[i % 3], segment])
  446. turbines_processed = []
  447. turbines_scattered = []
  448. min_list = []
  449. sd_time = [-1, -1]
  450. time_list = list(range(0, normalize_cycle, group_len))
  451. for turbine in turbines:
  452. turbine_sorted = turbine.sort_values(by='time').reset_index(drop=True)
  453. grey_start_index = int(len(turbine_sorted) * 0.1)
  454. grey_end_index = int(len(turbine_sorted) * 0.9)
  455. subset_grey = turbine_sorted[grey_start_index:grey_end_index]
  456. mean_grey = subset_grey['grey'].mean() * 0.8
  457. turbine_sorted = turbine_sorted[turbine_sorted['grey'] > mean_grey]
  458. first_time = turbine_sorted['time'].iloc[0]
  459. bins = list(range(int(first_time), int(turbine_sorted['time'].max()), group_len))
  460. grouped = turbine_sorted.groupby(pd.cut(turbine_sorted['time'], bins=bins, right=False))
  461. processed_df = pd.DataFrame()
  462. scattered_df = pd.DataFrame()
  463. mean_points = []
  464. diff_points = []
  465. for _, group in grouped:
  466. quantile_5 = group['distance'].quantile(0.05)
  467. quantile_95 = group['distance'].quantile(0.95)
  468. filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
  469. mean_point = filtered_group['distance'].mean()
  470. mean_points.append(mean_point)
  471. for i in range(len(mean_points) - 1):
  472. diff = abs(mean_points[i + 1] - mean_points[i])
  473. diff_points.append(diff)
  474. start_index = int(len(diff_points) * 0.05)
  475. end_index = int(len(diff_points) * 0.95)
  476. subset1 = diff_points[start_index:end_index]
  477. sdr_diff = np.max(subset1) * 1.1
  478. min_list.append(min(mean_points))
  479. first_index = np.where(diff_points < sdr_diff)[0][0]
  480. last_index = np.where(diff_points < sdr_diff)[0][-1]
  481. for index, (bin, group) in enumerate(grouped):
  482. quantile_5 = group['distance'].quantile(0.05)
  483. quantile_95 = group['distance'].quantile(0.95)
  484. filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
  485. if first_index <= index < last_index:
  486. mid_point = filtered_group.mean()
  487. mid_point_df = pd.DataFrame([mid_point])
  488. mid_point_df.iloc[0, 0] = time_list[index]
  489. processed_df = pd.concat([processed_df, mid_point_df], ignore_index=True)
  490. scattered_df = pd.concat([scattered_df, filtered_group], ignore_index=True)
  491. else:
  492. pass
  493. min_time = processed_df['time'].min()
  494. max_time = processed_df['time'].max()
  495. if sd_time == [-1, -1]:
  496. sd_time = [min_time, max_time]
  497. elif sd_time[0] < min_time:
  498. sd_time[0] = min_time
  499. elif sd_time[1] > max_time:
  500. sd_time[1] = max_time
  501. turbines_processed.append(processed_df)
  502. turbines_scattered.append(scattered_df)
  503. border_rows = []
  504. for i, turbine in enumerate(turbines_processed):
  505. closest_index_0 = (turbine['time'] - sd_time[0]).abs().idxmin()
  506. turbine.at[closest_index_0, 'time'] = sd_time[0]
  507. sd_time_row_0 = turbine.loc[closest_index_0]
  508. closest_index_1 = (turbine['time'] - sd_time[1]).abs().idxmin()
  509. turbine.at[closest_index_1, 'time'] = sd_time[1]
  510. sd_time_row_1 = turbine.loc[closest_index_1]
  511. turbines_processed[i] = turbine.iloc[closest_index_0:closest_index_1 + 1].reset_index(drop=True)
  512. sd_time_rows_turbine = pd.concat([pd.DataFrame([sd_time_row_0]), pd.DataFrame([sd_time_row_1])]
  513. , ignore_index=True)
  514. border_rows.append(sd_time_rows_turbine)
  515. return turbines_processed, turbines_scattered, border_rows, full_cycle, min_list
  516. def blade_shape(turbines_processed: List[pd.DataFrame]):
  517. row_counts = [df.shape[0] for df in turbines_processed]
  518. num_rows = min(row_counts)
  519. turbine_avg = pd.DataFrame(index=range(num_rows), columns=['time', 'distance'])
  520. turbine_diff = [pd.DataFrame(index=range(num_rows), columns=['time', 'distance']) for _ in turbines_processed]
  521. for i in range(num_rows):
  522. distances = [df.loc[i, 'distance'] for df in turbines_processed]
  523. avg_distance = sum(distances) / len(distances)
  524. time_value = turbines_processed[0].loc[i, 'time']
  525. turbine_avg.loc[i, 'time'] = time_value
  526. turbine_avg.loc[i, 'distance'] = avg_distance
  527. for j in range(len(distances)):
  528. distances[j] = distances[j] - avg_distance
  529. turbine_diff[j].loc[i, 'time'] = time_value
  530. turbine_diff[j].loc[i, 'distance'] = distances[j]
  531. return turbine_avg, turbine_diff
  532. def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
  533. tip_angle1 = np.deg2rad(tip_angle)
  534. tip_angle_list = []
  535. for turbine in tip_border_rows:
  536. tip_angle_cal0 = ((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
  537. (np.cos(tip_angle1) * turbine['distance']))
  538. tip_angle_cal = np.arctan(tip_angle_cal0)
  539. turbine['distance'] = (turbine['distance'] ** 2 + 0.0057881664 -
  540. 0.15216 * turbine['distance'] * np.sin(tip_angle1)) ** 0.5
  541. tip_angle_list.append(tip_angle_cal)
  542. tip_angle_new = float(np.mean(tip_angle_list))
  543. tip_angle_new1 = np.rad2deg(tip_angle_new)
  544. return tip_border_rows, tip_angle_new1
  545. def flange_coordinate_normalize(flange_cen_row: pd.DataFrame, flange_angle):
  546. flange_angle1 = np.deg2rad(flange_angle)
  547. flange_angle_cal0 = ((np.sin(flange_angle1) * flange_cen_row['中心y'] - 0.07608) /
  548. (np.cos(flange_angle1) * flange_cen_row['中心y']))
  549. flange_angle_cal = np.arctan(flange_angle_cal0)
  550. flange_cen_row['中心y'] = (flange_cen_row['中心y'] ** 2 + 0.0057881664 -
  551. 0.15216 * flange_cen_row['中心y'] * np.sin(flange_angle1)) ** 0.5
  552. flange_angle_new = float(flange_angle_cal)
  553. flange_angle_new1 = np.rad2deg(flange_angle_new)
  554. return flange_cen_row, flange_angle_new1
  555. def blade_axis_cal(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame, horizon_angle: float) \
  556. -> pd.DataFrame:
  557. def fit_circle(df, v_bounds=(2, 10), top_k=5, prefilter=True):
  558. def smooth_savgol(y, window_length=101, polyorder=3):
  559. wl = min(window_length, len(y) if len(y) % 2 == 1 else len(y) - 1)
  560. if wl < 3:
  561. return y
  562. if wl % 2 == 0:
  563. wl -= 1
  564. return savgol_filter(y, wl, polyorder)
  565. t = np.asarray(df['time'])
  566. d_raw = np.asarray(df['distance'])
  567. # ---------- 平滑 ----------
  568. d_smooth = smooth_savgol(d_raw, window_length=101, polyorder=3) if prefilter else d_raw
  569. bounds = [v_bounds, (0, 5), (200, 201), (1, 3)]
  570. def residuals_sq(params):
  571. v, xc, yc, R = params
  572. if v <= 0 or R <= 0:
  573. return 1e6 * np.ones_like(t)
  574. x = v * t
  575. return (x - xc) ** 2 + (d_smooth - yc) ** 2 - R ** 2
  576. def objective_mean_sq(params):
  577. res = residuals_sq(params)
  578. return np.mean(res ** 2)
  579. # ---------- 差分进化 ----------
  580. result = differential_evolution(
  581. objective_mean_sq,
  582. bounds,
  583. strategy='rand2bin',
  584. mutation=(0.8, 1.2),
  585. recombination=0.8,
  586. popsize=30,
  587. maxiter=1000,
  588. polish=False,
  589. seed=42,
  590. workers=1
  591. )
  592. # 多候选点精修
  593. pop = result.population
  594. energies = result.population_energies
  595. idx = np.argsort(energies)[:top_k]
  596. candidates = pop[idx]
  597. best_rmse = np.inf
  598. best_result = None
  599. for cand in candidates:
  600. res = least_squares(
  601. residuals_sq,
  602. x0=cand,
  603. bounds=([v_bounds[0], -np.inf, -np.inf, 1e-6],
  604. [v_bounds[1], np.inf, np.inf, np.inf]),
  605. method='trf',
  606. loss='linear',
  607. max_nfev=50000,
  608. xtol=1e-12,
  609. ftol=1e-12,
  610. gtol=1e-12
  611. )
  612. v_opt, xc_opt, yc_opt, R_opt = res.x
  613. x_all = v_opt * t
  614. Ri_all = np.sqrt((x_all - xc_opt) ** 2 + (d_smooth - yc_opt) ** 2)
  615. geo_rmse = np.sqrt(np.mean((Ri_all - R_opt) ** 2))
  616. if geo_rmse < best_rmse:
  617. best_rmse = geo_rmse
  618. best_result = [v_opt, xc_opt, yc_opt, R_opt, geo_rmse]
  619. result_df = pd.DataFrame([best_result],
  620. columns=['旋转半径', '中心x', '中心y', '圆半径', '几何RMSE']) # 旋转半径本身为测量点线速度
  621. return result_df
  622. group_len = 10000
  623. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  624. # 检查排序后的数据从start开始,end结束
  625. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  626. combined_df_sorted = combined_df_sorted.iloc[1:]
  627. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  628. combined_df_sorted = combined_df_sorted.iloc[:-1]
  629. combined_df_sorted.reset_index(drop=True, inplace=True)
  630. # 将 start_points 中的时间点转换为列表
  631. start_times = combined_df_sorted['time'].tolist()
  632. data_group['distance'] = data_group['distance'] * np.cos(np.deg2rad(horizon_angle))
  633. normalize_cycle = start_times[1] - start_times[0]
  634. full_cycle = int((start_times[2] - start_times[0]) * 3)
  635. angle_speed = (np.pi / full_cycle) * 5000000
  636. turbines = [pd.DataFrame() for _ in range(3)]
  637. for i in range(0, len(start_times), 2):
  638. start_time = start_times[i]
  639. end_time = start_times[i + 1]
  640. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  641. if segment is None or segment.empty:
  642. raise ValueError("Segment is empty")
  643. segment = segment.copy()
  644. ratio = (end_time - start_time) / normalize_cycle
  645. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  646. turbines[i % 3] = pd.concat([turbines[i % 3], segment])
  647. turbines_processed = []
  648. turbines_scattered = []
  649. turbines_result = []
  650. result_df = pd.DataFrame()
  651. time_list = list(range(0, normalize_cycle, group_len))
  652. for turbine in turbines:
  653. # 按时间排序
  654. turbine_sorted = turbine.sort_values(by='time').reset_index(drop=True)
  655. # 找到time列的第一个值
  656. first_time = turbine_sorted['time'].iloc[0]
  657. # 分组,时间列每1000为一组(每40个时间点一组)
  658. bins = list(range(int(first_time), int(turbine_sorted['time'].max()), group_len))
  659. grouped = turbine_sorted.groupby(pd.cut(turbine_sorted['time'], bins=bins, right=False))
  660. process_df = pd.DataFrame()
  661. for index, (bin, group) in enumerate(grouped):
  662. mid_point = group.mean()
  663. # 将中点转换为 DataFrame 并添加到处理后的 DataFrame 中
  664. mid_point_df = pd.DataFrame([mid_point])
  665. mid_point_df.iloc[0, 0] = time_list[index]
  666. process_df = pd.concat([process_df, mid_point_df], ignore_index=True)
  667. process_df['time'] = process_df['time'] / 5000000
  668. lower_bound = process_df['time'].quantile(0.2)
  669. upper_bound = process_df['time'].quantile(0.7)
  670. processed_df = process_df[(process_df['time'] >= lower_bound) & (process_df['time'] <= upper_bound)]
  671. blade_cen_est = fit_circle(processed_df)
  672. turbines_processed.append(processed_df)
  673. turbines_scattered.append(turbine)
  674. result_df = pd.concat([result_df, blade_cen_est], ignore_index=True)
  675. if blade_cen_est['几何RMSE'].iloc[0] >= 0.1:
  676. raise ValueError("叶片几何误差过大")
  677. result_df = result_df.mean(numeric_only=True).to_frame().T
  678. result_df["中心y"] = result_df["中心y"] / np.cos(np.deg2rad(horizon_angle))
  679. return result_df
  680. def radius_cal(border_rows, meas_angle, cen_dist, cen_angle, angle_main, angle_rotate):
  681. aero_dist = (pd.concat([df['distance'] for df in border_rows]).mean())
  682. radius = np.abs(aero_dist * np.sin(np.deg2rad(meas_angle - angle_main))
  683. - cen_dist * np.sin(np.deg2rad(cen_angle - angle_main)))
  684. return radius
  685. def blade_angle_aero_dist(border_rows: List[pd.DataFrame], radius: float, full_cycle: int,
  686. tower_dist: float, v_angle: float):
  687. v_speed = 2 * np.pi * radius / full_cycle # 叶片线速度m/(1计时器单位)
  688. aero_dist_list = []
  689. cen_blade = []
  690. for turbine in border_rows:
  691. mean_col2 = (turbine.iloc[1, 1] + turbine.iloc[0, 1]) / 2
  692. aero_dist = abs(mean_col2 - tower_dist) * np.cos(np.deg2rad(v_angle))
  693. aero_dist_list.append(aero_dist)
  694. cen_blade.append(mean_col2)
  695. aero_dist_list.append(np.mean(aero_dist_list))
  696. aero_dist_list = [round(num, 2) for num in aero_dist_list]
  697. return aero_dist_list, v_speed, cen_blade
  698. def blade_angle(border_rows: List[pd.DataFrame], cen_data: pd.DataFrame, radius: float, full_cycle: int,
  699. v_angle: float):
  700. v_speed = 2 * np.pi * radius / full_cycle
  701. values = []
  702. for df in border_rows:
  703. if df.shape[0] >= 2 and df.shape[1] >= 2:
  704. values.append(df.iloc[0, 1])
  705. values.append(df.iloc[1, 1])
  706. mean_value = sum(values) / len(values) if values else float('nan')
  707. if np.abs(cen_data['中心y'].iloc[0] - mean_value) > 0.3:
  708. cen_data['中心y'].iloc[0] = mean_value
  709. if cen_data['中心x'].iloc[0] > 1.5:
  710. cen_data['中心x'].iloc[0] = 1.5
  711. if cen_data['中心x'].iloc[0] < 0.75:
  712. cen_data['中心x'].iloc[0] = 0.75
  713. pitch_angle_list = []
  714. for idx, turbine in enumerate(border_rows, start=1):
  715. diff_time = np.abs((turbine.iloc[0, 0] - turbine.iloc[1, 0]) * 0.66 * v_speed)
  716. diff_len = np.abs((cen_data['中心y'].iloc[0] - turbine.iloc[1, 1]) * np.cos(np.deg2rad(v_angle)))
  717. pitch_angle = np.degrees(np.arctan(diff_len / diff_time))
  718. pitch_angle_list.append(pitch_angle)
  719. pitch_mean = np.mean(pitch_angle_list)
  720. pitch_angle_list = [angle - pitch_mean for angle in pitch_angle_list]
  721. pitch_angle_list.append(max(pitch_angle_list) - min(pitch_angle_list))
  722. pitch_angle_list = [round(num, 2) for num in pitch_angle_list]
  723. return pitch_angle_list, v_speed
  724. def find_param(path: str):
  725. path = path.replace('\\', '/')
  726. last_slash_index = path.rfind('/')
  727. result = path[last_slash_index + 1:]
  728. underscore_indices = []
  729. start = 0
  730. while True:
  731. index = result.find('_', start)
  732. if index == -1:
  733. break
  734. underscore_indices.append(index)
  735. start = index + 1
  736. wind_name = result[: underscore_indices[0]]
  737. turbine_code = result[underscore_indices[0] + 1: underscore_indices[1]]
  738. time_code = result[underscore_indices[1] + 1: underscore_indices[2]]
  739. sampling_fq = int(result[underscore_indices[2] + 1: underscore_indices[3]])
  740. tunnel_1 = float(result[underscore_indices[3] + 1: underscore_indices[4]])
  741. tunnel_2 = float(result[underscore_indices[4] + 1: -4])
  742. dt = datetime.strptime(time_code, "%Y%m%d%H%M%S")
  743. standard_time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
  744. return wind_name, turbine_code, standard_time_str, sampling_fq, tunnel_1, tunnel_2
  745. def blade_dist_distribute_cal(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame,
  746. tower_dist: float, v_angle: float, blade_cen_dist: list):
  747. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  748. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  749. combined_df_sorted = combined_df_sorted.iloc[1:]
  750. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  751. combined_df_sorted = combined_df_sorted.iloc[:-1]
  752. combined_df_sorted.reset_index(drop=True, inplace=True)
  753. start_times = combined_df_sorted['time'].tolist()
  754. normalize_cycle = start_times[1] - start_times[0]
  755. tower_clearance = [pd.DataFrame() for _ in range(3)]
  756. for i in range(0, len(start_times) - 2, 2):
  757. start_time = start_times[i]
  758. end_time = start_times[i + 1]
  759. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  760. min_distance = segment['distance'].min()
  761. clearance = np.abs(tower_dist - min_distance - blade_cen_dist[i % 3]) * np.cos(np.deg2rad(v_angle))
  762. r_speed = round(60 / ((start_times[i + 2] - start_times[i]) * 3 / 5000000), 2)
  763. ratio = (end_time - start_time) / normalize_cycle
  764. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  765. new_df = pd.DataFrame({
  766. 'r_speed': [r_speed],
  767. 'clearance': [clearance]
  768. })
  769. tower_clearance[i % 3] = pd.concat([tower_clearance[i % 3], new_df])
  770. tower_clearance = [df.sort_values(by='r_speed') for df in tower_clearance]
  771. return tower_clearance