data_clean.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830
  1. import os
  2. import json
  3. import pandas as pd
  4. import numpy as np
  5. import matplotlib.pyplot as plt
  6. from typing import Tuple, List
  7. import warnings
  8. import sys
  9. import frequency_filter as ff
  10. from datetime import datetime
  11. warnings.filterwarnings("ignore", category=FutureWarning)
  12. plt.rcParams['font.sans-serif'] = ['SimHei']
  13. plt.rcParams['axes.unicode_minus'] = False
  14. def result_main():
  15. python_interpreter_path = sys.executable
  16. project_directory = os.path.dirname(python_interpreter_path)
  17. data_folder = os.path.join(project_directory, 'data')
  18. if not os.path.exists(data_folder):
  19. os.makedirs(data_folder)
  20. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  21. if not os.path.exists(csv_file_path):
  22. pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
  23. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  24. '叶片1净空值', '叶片2净空值', '叶片3净空值',
  25. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  26. '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
  27. return csv_file_path
  28. def delete_data(name):
  29. python_interpreter_path = sys.executable
  30. project_directory = os.path.dirname(python_interpreter_path)
  31. data_folder = os.path.join(project_directory, 'data')
  32. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  33. df = pd.read_csv(csv_file_path)
  34. condition = ((df['时间'].astype(str).str.contains(name[0])) &
  35. (df['场站'].astype(str).str.contains(name[1])) &
  36. (df['风机编号'].astype(str).str.contains(name[2])))
  37. df = df[~condition]
  38. df.to_csv(csv_file_path, index=False)
  39. return csv_file_path
  40. def history_data(name):
  41. time_code = name[0]
  42. wind_name = name[1]
  43. turbine_code = name[2]
  44. python_interpreter_path = sys.executable
  45. project_directory = os.path.dirname(python_interpreter_path)
  46. data_folder = os.path.join(project_directory, 'data')
  47. time_code_cleaned = time_code.replace("-", "").replace(":", "").replace(" ", "")
  48. json_filename = f"{wind_name}_{turbine_code}_{time_code_cleaned}.json"
  49. json_file_path = os.path.join(data_folder, json_filename)
  50. if not os.path.exists(json_file_path):
  51. raise ValueError("文件不存在")
  52. with open(json_file_path, 'r') as f:
  53. data = json.load(f)
  54. return data
  55. def data_analyse(path: List[str]):
  56. locate_file = path[0]
  57. measure_file = path[1]
  58. noise_reduction = 0.000001
  59. min_difference = 1.5
  60. angle_cone = float(path[2])
  61. axial_inclination = float(path[3])
  62. group_length = [10000, 20000, 5000]
  63. return_list = []
  64. wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen = find_param(locate_file)
  65. wind_name_1, turbine_code_1, time_code_1, sampling_fq_1, angle_tip, angle_root = find_param(measure_file)
  66. sampling_fq_1 = sampling_fq_1 * 1000
  67. sampling_fq = sampling_fq * 1000
  68. data_nan, data_cen = process_data(locate_file)
  69. data_tip, data_root = process_data(measure_file)
  70. start_tip, end_tip, filtered_data_tip = cycle_calculate(data_tip, noise_reduction, min_difference)
  71. start_root, end_root, filtered_data_root = cycle_calculate(data_root, noise_reduction, min_difference)
  72. start_nan, end_nan, filtered_data_nan = cycle_calculate(data_nan, noise_reduction, min_difference)
  73. filtered_data_cen = tower_filter(data_cen, noise_reduction)
  74. dist_cen = np.mean(filtered_data_cen.iloc[:, 1].tolist())
  75. filtered_data_cen.iloc[:, 1] = filtered_data_cen.iloc[:, 1] * np.cos(np.deg2rad(angle_cen + axial_inclination))
  76. if end_tip.iloc[0, 0] < start_root.iloc[0, 0]:
  77. start_tip = start_tip.drop(start_tip.index[0])
  78. end_tip = end_tip.drop(end_tip.index[0])
  79. if start_root.iloc[0, 0] < start_tip.iloc[0, 0] < end_tip.iloc[0, 0] < end_root.iloc[0, 0]:
  80. pass
  81. else:
  82. raise ValueError("The elements are not in the expected order.")
  83. tower_dist_tip = ff.tower_cal(filtered_data_tip, start_tip, end_tip, sampling_fq_1)
  84. tower_dist_root = ff.tower_cal(filtered_data_root, start_root, end_root, sampling_fq_1)
  85. tower_dist_nan = ff.tower_cal(filtered_data_nan, start_nan, end_nan, sampling_fq)
  86. lowpass_data, fft_x, fft_y, tower_freq, tower_max = ff.process_fft(filtered_data_cen, sampling_fq)
  87. result_line_tip, result_scatter_tip, border_rows_tip, cycle_len_tip, min_tip \
  88. = data_normalize(filtered_data_tip, start_tip, end_tip, group_length[0])
  89. result_line_root, result_scatter_root, border_rows_root, cycle_len_root, min_root \
  90. = data_normalize(filtered_data_root, start_root, end_root, group_length[1])
  91. result_line_nan, result_scatter_nan, border_rows_nan, cycle_len_nan, min_nan \
  92. = data_normalize(filtered_data_nan, start_nan, end_nan, group_length[2])
  93. result_avg_tip, result_diff_tip = blade_shape(result_line_tip)
  94. result_avg_root, result_diff_root = blade_shape(result_line_root)
  95. border_rows_tip_new, angle_tip_new = coordinate_normalize(border_rows_tip, angle_tip)
  96. border_rows_nan_new, angle_nan_new = coordinate_normalize(border_rows_nan, angle_nan)
  97. tip_r = radius_cal(border_rows_tip_new, angle_tip_new, dist_cen, angle_cen, axial_inclination, angle_cone)
  98. root_r = radius_cal(border_rows_root, angle_root, dist_cen, angle_cen, axial_inclination, angle_cone)
  99. nan_r = radius_cal(border_rows_nan_new, angle_nan_new, dist_cen, angle_cen, axial_inclination, angle_cone)
  100. pitch_angle_tip, aero_dist_tip, v_speed_tip, cen_blade_tip = (
  101. blade_angle_aero_dist(border_rows_tip, tip_r, cycle_len_tip, tower_dist_tip, angle_tip_new))
  102. pitch_angle_root, aero_dist_root, v_speed_root, cen_blade_root = (
  103. blade_angle_aero_dist(border_rows_root, root_r, cycle_len_root, tower_dist_root, angle_root))
  104. pitch_angle_nan, aero_dist_nan, v_speed_nan, cen_blade_nan = (
  105. blade_angle_aero_dist(border_rows_nan_new, nan_r, cycle_len_nan, tower_dist_nan, angle_nan_new))
  106. cen_blade_tip_array = np.array(cen_blade_tip)
  107. cen_blade_nan_array = np.array(cen_blade_nan)
  108. min_tip_array = np.array(min_tip)
  109. min_nan_array = np.array(min_nan)
  110. abs_diff = np.abs(cen_blade_tip_array - min_tip_array)
  111. abs_diff_nan = np.abs(cen_blade_nan_array - min_nan_array)
  112. blade_dist_tip = abs_diff * np.cos(np.deg2rad(angle_tip_new))
  113. blade_dist_nan = abs_diff_nan * np.cos(np.deg2rad(angle_nan_new))
  114. blade_dist_tip.tolist()
  115. blade_dist_nan.tolist()
  116. dist_distribute_nan = blade_dist_distribute_cal(filtered_data_nan, start_nan, end_nan,
  117. tower_dist_nan, angle_nan_new, blade_dist_nan)
  118. dist_distribute = [df.round(5) for df in dist_distribute_nan]
  119. min_values = []
  120. min_keys = []
  121. max_values = []
  122. max_keys = []
  123. mean_values = []
  124. for df in dist_distribute:
  125. second_col_min = df[df.columns[1]].min()
  126. second_col_max = df[df.columns[1]].max()
  127. min_row = df[df[df.columns[1]] == second_col_min]
  128. max_row = df[df[df.columns[1]] == second_col_max]
  129. min_values.append(round(second_col_min, 2))
  130. min_keys.append(round(min_row.iloc[0][df.columns[0]], 2))
  131. max_values.append(round(second_col_max, 2))
  132. max_keys.append(round(max_row.iloc[0][df.columns[0]], 2))
  133. for i in range(3):
  134. mean_values.append(round((max_values[i] + min_values[i]) / 2, 2))
  135. for df in result_line_tip:
  136. first_column = df.iloc[:, 0]
  137. sec_column = df.iloc[:, 1]
  138. df.iloc[:, 0] = first_column * v_speed_tip
  139. df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_tip_new + angle_cone + axial_inclination))
  140. for df in result_line_root:
  141. first_column = df.iloc[:, 0]
  142. sec_column = df.iloc[:, 1]
  143. df.iloc[:, 0] = first_column * v_speed_root
  144. df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_root + angle_cone + axial_inclination))
  145. avg_tip = result_avg_tip.iloc[:, 0]
  146. result_avg_tip.iloc[:, 0] = avg_tip * v_speed_tip
  147. avg_root = result_avg_root.iloc[:, 0]
  148. result_avg_root.iloc[:, 0] = avg_root * v_speed_root
  149. twist_1 = round(np.abs(pitch_angle_root[0] - pitch_angle_tip[0]), 2)
  150. twist_2 = round(np.abs(pitch_angle_root[1] - pitch_angle_tip[1]), 2)
  151. twist_3 = round(np.abs(pitch_angle_root[2] - pitch_angle_tip[2]), 2)
  152. twist_avg = round((twist_1 + twist_2 + twist_3) / 3, 2)
  153. sampling_num = int(0.015 * sampling_fq_1)
  154. data_tip.iloc[:, 0] = data_tip.iloc[:, 0] / 5000000
  155. data_root.iloc[:, 0] = data_root.iloc[:, 0] / 5000000
  156. lowpass_data.iloc[:, 0] = lowpass_data.iloc[:, 0] / 5000000
  157. return_list.append(str(time_code))
  158. return_list.append(str(wind_name))
  159. return_list.append(str(turbine_code))
  160. return_list.append(sampling_fq_1)
  161. return_list.append(pitch_angle_root[0])
  162. return_list.append(pitch_angle_root[1])
  163. return_list.append(pitch_angle_root[2])
  164. return_list.append(pitch_angle_root[3])
  165. return_list.append(mean_values[0])
  166. return_list.append(mean_values[1])
  167. return_list.append(mean_values[2])
  168. return_list.append(twist_1)
  169. return_list.append(twist_2)
  170. return_list.append(twist_3)
  171. return_list.append(twist_avg)
  172. return_list.append(tower_max)
  173. return_list.append(tower_freq)
  174. df_new_row = pd.DataFrame([return_list],
  175. columns=['时间', '场站', '风机编号', '采样频率',
  176. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  177. '叶片1净空值', '叶片2净空值', '叶片3净空值',
  178. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  179. '振动幅值', '振动主频'])
  180. json_output = {
  181. 'original_plot': {
  182. 'blade_tip': {
  183. 'xdata': data_tip.iloc[:, 0].tolist()[::sampling_num],
  184. 'ydata': data_tip.iloc[:, 1].tolist()[::sampling_num]
  185. },
  186. 'blade_root': {
  187. 'xdata': data_root.iloc[:, 0].tolist()[::sampling_num],
  188. 'ydata': data_root.iloc[:, 1].tolist()[::sampling_num]
  189. }
  190. },
  191. 'fft_plot': {
  192. 'lowpass': {
  193. 'xdata': lowpass_data['time'].tolist()[::sampling_num],
  194. 'ydata': lowpass_data['distance_filtered'].tolist()[::sampling_num],
  195. 'xmax': max(lowpass_data['time'].tolist()),
  196. 'xmin': min(lowpass_data['time'].tolist()),
  197. 'ymax': max(lowpass_data['distance_filtered'].tolist()) + 0.02,
  198. 'ymin': min(lowpass_data['distance_filtered'].tolist()) - 0.02
  199. },
  200. 'fft': {
  201. 'xdata': fft_x,
  202. 'ydata': fft_y,
  203. 'xmax': max(fft_x),
  204. 'xmin': min(fft_x),
  205. 'ymax': max(fft_y) + 0.02,
  206. 'ymin': 0
  207. }
  208. },
  209. 'blade_tip': {
  210. 'first_blade': {
  211. 'xdata': result_line_tip[0].iloc[:, 0].tolist(),
  212. 'ydata': result_line_tip[0].iloc[:, 1].tolist()
  213. },
  214. 'second_blade': {
  215. 'xdata': result_line_tip[1].iloc[:, 0].tolist(),
  216. 'ydata': result_line_tip[1].iloc[:, 1].tolist()
  217. },
  218. 'third_blade': {
  219. 'xdata': result_line_tip[2].iloc[:, 0].tolist(),
  220. 'ydata': result_line_tip[2].iloc[:, 1].tolist()
  221. },
  222. 'avg_blade': {
  223. 'xdata': result_avg_tip.iloc[:, 0].tolist(),
  224. 'ydata': result_avg_tip.iloc[:, 1].tolist()
  225. }
  226. },
  227. 'blade_root': {
  228. 'first_blade': {
  229. 'xdata': result_line_root[0].iloc[:, 0].tolist(),
  230. 'ydata': result_line_root[0].iloc[:, 1].tolist()
  231. },
  232. 'second_blade': {
  233. 'xdata': result_line_root[1].iloc[:, 0].tolist(),
  234. 'ydata': result_line_root[1].iloc[:, 1].tolist()
  235. },
  236. 'third_blade': {
  237. 'xdata': result_line_root[2].iloc[:, 0].tolist(),
  238. 'ydata': result_line_root[2].iloc[:, 1].tolist()
  239. },
  240. 'avg_blade': {
  241. 'xdata': result_avg_root.iloc[:, 0].tolist(),
  242. 'ydata': result_avg_root.iloc[:, 1].tolist()
  243. }
  244. },
  245. 'dist_distribution': {
  246. 'first_blade': {
  247. 'xdata': dist_distribute[0].iloc[:, 0].tolist(),
  248. 'ydata': dist_distribute[0].iloc[:, 1].tolist()
  249. },
  250. 'second_blade': {
  251. 'xdata': dist_distribute[1].iloc[:, 0].tolist(),
  252. 'ydata': dist_distribute[1].iloc[:, 1].tolist()
  253. },
  254. 'third_blade': {
  255. 'xdata': dist_distribute[2].iloc[:, 0].tolist(),
  256. 'ydata': dist_distribute[2].iloc[:, 1].tolist()
  257. }
  258. },
  259. 'analyse_table': {
  260. 'pitch_angle_diff': {
  261. 'blade_1': pitch_angle_root[0],
  262. 'blade_2': pitch_angle_root[1],
  263. 'blade_3': pitch_angle_root[2],
  264. 'blade_relate': pitch_angle_root[3]
  265. },
  266. 'aero_dist': {
  267. 'first_blade': {
  268. 'x_min': min_keys[0],
  269. 'y_min': min_values[0],
  270. 'x_max': max_keys[0],
  271. 'y_max': max_values[0],
  272. 'y_diff': np.abs(max_values[0] - min_values[0]),
  273. 'y_ava': mean_values[0]
  274. },
  275. 'second_blade': {
  276. 'x_min': min_keys[1],
  277. 'y_min': min_values[1],
  278. 'x_max': max_keys[1],
  279. 'y_max': max_values[1],
  280. 'y_diff': np.abs(max_values[1] - min_values[1]),
  281. 'y_ava': mean_values[1]
  282. },
  283. 'third_blade': {
  284. 'x_min': min_keys[2],
  285. 'y_min': min_values[2],
  286. 'x_max': max_keys[2],
  287. 'y_max': max_values[2],
  288. 'y_diff': np.abs(max_values[2] - min_values[2]),
  289. 'y_ava': mean_values[2]
  290. }
  291. },
  292. 'blade_twist': {
  293. 'blade_1': twist_1,
  294. 'blade_2': twist_2,
  295. 'blade_3': twist_3,
  296. 'blade_avg': twist_avg
  297. },
  298. 'tower_vibration': {
  299. 'max_vibration': tower_max,
  300. 'main_vibration_freq': tower_freq
  301. }
  302. }
  303. }
  304. python_interpreter_path = sys.executable
  305. project_directory = os.path.dirname(python_interpreter_path)
  306. data_folder = os.path.join(project_directory, 'data')
  307. if not os.path.exists(data_folder):
  308. os.makedirs(data_folder)
  309. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  310. if not os.path.exists(csv_file_path):
  311. pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
  312. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  313. '叶片1净空值', '叶片2净空值', '叶片3净空值',
  314. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  315. '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
  316. df_new_row.to_csv(csv_file_path, mode='a', header=False, index=False)
  317. time_code_cleaned = time_code.replace("-", "").replace(":", "").replace(" ", "")
  318. json_filename = f"{wind_name}_{turbine_code}_{time_code_cleaned}.json"
  319. json_file_path = os.path.join(data_folder, json_filename)
  320. with open(json_file_path, 'w') as json_file:
  321. json.dump(json_output, json_file, indent=4)
  322. return json_output
  323. def process_data(file_path):
  324. data = pd.read_csv(file_path, usecols=[1, 3, 4, 8, 9], header=None, engine='c')
  325. data = data.head(int(len(data) * 0.95))
  326. max_value = data.iloc[:, 0].max()
  327. max_index = data.iloc[:, 0].idxmax()
  328. min_index = data.iloc[:, 0].idxmin()
  329. if min_index == max_index + 1:
  330. data.iloc[min_index:, 0] += max_value
  331. last_time = data.iloc[-1, 0]
  332. first_time = data.iloc[0, 0]
  333. data = data[data.iloc[:, 0] >= first_time]
  334. data = data[data.iloc[:, 0] <= last_time]
  335. data.reset_index(drop=True, inplace=True)
  336. min_time = data.iloc[:, 0].min()
  337. data.iloc[:, 0] -= min_time
  338. data_1 = data.iloc[:, [0, 1, 2]]
  339. data_2 = data.iloc[:, [0, 3, 4]]
  340. data_1.columns = ['time', 'distance', 'grey']
  341. data_2.columns = ['time', 'distance', 'grey']
  342. return data_1, data_2
  343. def tower_filter(data_group: pd.DataFrame, noise_threshold: float):
  344. distance_counts = data_group['distance'].value_counts(normalize=True)
  345. noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
  346. noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
  347. data_group.loc[noise_indices, 'distance'] = np.nan
  348. top_5_distances = distance_counts.head(5).index
  349. mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
  350. data_group.loc[(data_group['distance'] < mean_values - 20) | (
  351. data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
  352. data_group['distance'] = data_group['distance'].fillna(method='ffill')
  353. filtered_data = data_group
  354. return filtered_data
  355. def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distance: float):
  356. distance_counts = data_group['distance'].value_counts(normalize=True)
  357. noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
  358. noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
  359. data_group.loc[noise_indices, 'distance'] = np.nan
  360. top_5_distances = distance_counts.head(5).index
  361. mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
  362. data_group.loc[(data_group['distance'] < mean_values - 31) | (
  363. data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
  364. data_group['distance'] = data_group['distance'].fillna(method='ffill')
  365. filtered_data = data_group
  366. filtered_data['distance_diff'] = filtered_data['distance'].diff()
  367. large_diff_indices = filtered_data[filtered_data['distance_diff'] > min_distance].index
  368. small_diff_indices = filtered_data[filtered_data['distance_diff'] < -min_distance].index
  369. filtered_data = filtered_data.drop(columns=['distance_diff'])
  370. start_points = pd.DataFrame()
  371. end_points = pd.DataFrame()
  372. for idx in large_diff_indices:
  373. current_distance = filtered_data.loc[idx, 'distance']
  374. next_rows_large = filtered_data.loc[idx - 201: idx - 1]
  375. if next_rows_large['distance'].le(current_distance - min_distance).all():
  376. end_points = pd.concat([end_points, filtered_data.loc[[idx - 1]]])
  377. for idx in small_diff_indices:
  378. current_distance = filtered_data.loc[idx - 1, 'distance']
  379. next_rows_small = filtered_data.iloc[idx: idx + 201]
  380. if next_rows_small['distance'].le(current_distance - min_distance).all():
  381. start_points = pd.concat([start_points, filtered_data.loc[[idx]]])
  382. if end_points.iloc[0, 0] < start_points.iloc[0, 0]:
  383. end_points = end_points.drop(end_points.index[0])
  384. if end_points.iloc[-1, 0] < start_points.iloc[-1, 0]:
  385. start_points = start_points.drop(start_points.index[-1])
  386. else:
  387. pass
  388. return start_points, end_points, filtered_data
  389. def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame, group_len: int) \
  390. -> Tuple[List[pd.DataFrame], List[pd.DataFrame], List[pd.DataFrame], int, list]:
  391. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  392. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  393. combined_df_sorted = combined_df_sorted.iloc[1:]
  394. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  395. combined_df_sorted = combined_df_sorted.iloc[:-1]
  396. combined_df_sorted.reset_index(drop=True, inplace=True)
  397. start_times = combined_df_sorted['time'].tolist()
  398. normalize_cycle = start_times[1] - start_times[0]
  399. full_cycle = int((start_times[2] - start_times[0]) * 3)
  400. turbines = [pd.DataFrame() for _ in range(3)]
  401. for i in range(0, len(start_times), 2):
  402. start_time = start_times[i]
  403. end_time = start_times[i + 1]
  404. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  405. if segment is None:
  406. pass
  407. else:
  408. ratio = (end_time - start_time) / normalize_cycle
  409. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  410. turbines[i % 3] = pd.concat([turbines[i % 3], segment])
  411. turbines_processed = []
  412. turbines_scattered = []
  413. min_list = []
  414. sd_time = [-1, -1]
  415. time_list = list(range(0, normalize_cycle, group_len))
  416. for turbine in turbines:
  417. turbine_sorted = turbine.sort_values(by='time').reset_index(drop=True)
  418. grey_start_index = int(len(turbine_sorted) * 0.1)
  419. grey_end_index = int(len(turbine_sorted) * 0.9)
  420. subset_grey = turbine_sorted[grey_start_index:grey_end_index]
  421. mean_grey = subset_grey['grey'].mean() * 0.8
  422. turbine_sorted = turbine_sorted[turbine_sorted['grey'] > mean_grey]
  423. first_time = turbine_sorted['time'].iloc[0]
  424. bins = list(range(int(first_time), int(turbine_sorted['time'].max()), group_len))
  425. grouped = turbine_sorted.groupby(pd.cut(turbine_sorted['time'], bins=bins, right=False))
  426. processed_df = pd.DataFrame()
  427. scattered_df = pd.DataFrame()
  428. mean_points = []
  429. diff_points = []
  430. for _, group in grouped:
  431. quantile_5 = group['distance'].quantile(0.05)
  432. quantile_95 = group['distance'].quantile(0.95)
  433. filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
  434. mean_point = filtered_group['distance'].mean()
  435. mean_points.append(mean_point)
  436. for i in range(len(mean_points) - 1):
  437. diff = abs(mean_points[i + 1] - mean_points[i])
  438. diff_points.append(diff)
  439. start_index = int(len(diff_points) * 0.05)
  440. end_index = int(len(diff_points) * 0.95)
  441. subset1 = diff_points[start_index:end_index]
  442. sdr_diff = np.max(subset1) * 1.1
  443. min_list.append(min(mean_points))
  444. first_index = np.where(diff_points < sdr_diff)[0][0]
  445. last_index = np.where(diff_points < sdr_diff)[0][-1]
  446. for index, (bin, group) in enumerate(grouped):
  447. quantile_5 = group['distance'].quantile(0.05)
  448. quantile_95 = group['distance'].quantile(0.95)
  449. filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
  450. if first_index <= index < last_index:
  451. mid_point = filtered_group.mean()
  452. mid_point_df = pd.DataFrame([mid_point])
  453. mid_point_df.iloc[0, 0] = time_list[index]
  454. processed_df = pd.concat([processed_df, mid_point_df], ignore_index=True)
  455. scattered_df = pd.concat([scattered_df, filtered_group], ignore_index=True)
  456. else:
  457. pass
  458. min_time = processed_df['time'].min()
  459. max_time = processed_df['time'].max()
  460. if sd_time == [-1, -1]:
  461. sd_time = [min_time, max_time]
  462. elif sd_time[0] < min_time:
  463. sd_time[0] = min_time
  464. elif sd_time[1] > max_time:
  465. sd_time[1] = max_time
  466. turbines_processed.append(processed_df)
  467. turbines_scattered.append(scattered_df)
  468. border_rows = []
  469. for i, turbine in enumerate(turbines_processed):
  470. closest_index_0 = (turbine['time'] - sd_time[0]).abs().idxmin()
  471. turbine.at[closest_index_0, 'time'] = sd_time[0]
  472. sd_time_row_0 = turbine.loc[closest_index_0]
  473. closest_index_1 = (turbine['time'] - sd_time[1]).abs().idxmin()
  474. turbine.at[closest_index_1, 'time'] = sd_time[1]
  475. sd_time_row_1 = turbine.loc[closest_index_1]
  476. turbines_processed[i] = turbine.iloc[closest_index_0:closest_index_1 + 1].reset_index(drop=True)
  477. sd_time_rows_turbine = pd.concat([pd.DataFrame([sd_time_row_0]), pd.DataFrame([sd_time_row_1])]
  478. , ignore_index=True)
  479. border_rows.append(sd_time_rows_turbine)
  480. return turbines_processed, turbines_scattered, border_rows, full_cycle, min_list
  481. def blade_shape(turbines_processed: List[pd.DataFrame]):
  482. row_counts = [df.shape[0] for df in turbines_processed]
  483. num_rows = min(row_counts)
  484. turbine_avg = pd.DataFrame(index=range(num_rows), columns=['time', 'distance'])
  485. turbine_diff = [pd.DataFrame(index=range(num_rows), columns=['time', 'distance']) for _ in turbines_processed]
  486. for i in range(num_rows):
  487. distances = [df.loc[i, 'distance'] for df in turbines_processed]
  488. avg_distance = sum(distances) / len(distances)
  489. time_value = turbines_processed[0].loc[i, 'time']
  490. turbine_avg.loc[i, 'time'] = time_value
  491. turbine_avg.loc[i, 'distance'] = avg_distance
  492. for j in range(len(distances)):
  493. distances[j] = distances[j] - avg_distance
  494. turbine_diff[j].loc[i, 'time'] = time_value
  495. turbine_diff[j].loc[i, 'distance'] = distances[j]
  496. return turbine_avg, turbine_diff
  497. def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
  498. tip_angle1 = np.deg2rad(tip_angle)
  499. tip_angle_list = []
  500. for turbine in tip_border_rows:
  501. tip_angle_cal0 = ((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
  502. (np.cos(tip_angle1) * turbine['distance']))
  503. tip_angle_cal = np.arctan(tip_angle_cal0)
  504. turbine['distance'] = (turbine['distance'] ** 2 + 0.0057881664 -
  505. 0.15216 * turbine['distance'] * np.sin(tip_angle1)) ** 0.5
  506. tip_angle_list.append(tip_angle_cal)
  507. tip_angle_new = float(np.mean(tip_angle_list))
  508. tip_angle_new1 = np.rad2deg(tip_angle_new)
  509. return tip_border_rows, tip_angle_new1
  510. def radius_cal(border_rows, meas_angle, cen_dist, cen_angle, angle_main, angle_rotate):
  511. aero_dist = (pd.concat([df['distance'] for df in border_rows]).mean())
  512. cen_x = np.cos(np.deg2rad(cen_angle)) * cen_dist
  513. cen_y = np.sin(np.deg2rad(cen_angle)) * cen_dist
  514. aero_x = np.cos(np.deg2rad(meas_angle)) * aero_dist
  515. aero_y = np.sin(np.deg2rad(meas_angle)) * aero_dist
  516. theta_4 = np.tan(np.pi - np.deg2rad(angle_main))
  517. theta_5 = np.tan(np.pi / 2 - np.deg2rad(angle_main) - np.deg2rad(angle_rotate))
  518. if theta_5 > 1000:
  519. radius = np.abs((cen_y - aero_y) - theta_4 * (cen_x - aero_x))
  520. else:
  521. radius = (np.abs((theta_4 * (cen_x - aero_x) - (cen_y - aero_y)) / (theta_4 - theta_5))
  522. * (1 + theta_5 ** 2) ** 0.5)
  523. return radius
  524. def blade_angle_aero_dist(border_rows: List[pd.DataFrame], radius: float, full_cycle: int,
  525. tower_dist: float, v_angle: float):
  526. v_speed = 2 * np.pi * radius / full_cycle
  527. pitch_angle_list = []
  528. aero_dist_list = []
  529. cen_blade = []
  530. for turbine in border_rows:
  531. diff_time = turbine.iloc[1, 0] - turbine.iloc[0, 0]
  532. diff_len = (turbine.iloc[1, 1] - turbine.iloc[0, 1]) * np.cos(np.deg2rad(v_angle))
  533. mean_col2 = (turbine.iloc[1, 1] + turbine.iloc[0, 1]) / 2
  534. aero_dist = abs(mean_col2 - tower_dist) * np.cos(np.deg2rad(v_angle))
  535. pitch_angle = np.degrees(np.arctan(diff_len / (diff_time * v_speed)))
  536. pitch_angle_list.append(pitch_angle)
  537. aero_dist_list.append(aero_dist)
  538. cen_blade.append(mean_col2)
  539. pitch_mean = np.mean(pitch_angle_list)
  540. pitch_angle_list = [angle - pitch_mean for angle in pitch_angle_list]
  541. pitch_angle_list.append(max(pitch_angle_list) - min(pitch_angle_list))
  542. aero_dist_list.append(np.mean(aero_dist_list))
  543. pitch_angle_list = [round(num, 2) for num in pitch_angle_list]
  544. aero_dist_list = [round(num, 2) for num in aero_dist_list]
  545. return pitch_angle_list, aero_dist_list, v_speed, cen_blade
  546. def find_param(path: str):
  547. path = path.replace('\\', '/')
  548. last_slash_index = path.rfind('/')
  549. result = path[last_slash_index + 1:]
  550. underscore_indices = []
  551. start = 0
  552. while True:
  553. index = result.find('_', start)
  554. if index == -1:
  555. break
  556. underscore_indices.append(index)
  557. start = index + 1
  558. wind_name = result[: underscore_indices[0]]
  559. turbine_code = result[underscore_indices[0] + 1: underscore_indices[1]]
  560. time_code = result[underscore_indices[1] + 1: underscore_indices[2]]
  561. sampling_fq = int(result[underscore_indices[2] + 1: underscore_indices[3]])
  562. tunnel_1 = float(result[underscore_indices[3] + 1: underscore_indices[4]])
  563. tunnel_2 = float(result[underscore_indices[4] + 1: -4])
  564. dt = datetime.strptime(time_code, "%Y%m%d%H%M%S")
  565. standard_time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
  566. return wind_name, turbine_code, standard_time_str, sampling_fq, tunnel_1, tunnel_2
  567. def blade_dist_distribute_cal(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame,
  568. tower_dist: float, v_angle: float, blade_cen_dist: list):
  569. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  570. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  571. combined_df_sorted = combined_df_sorted.iloc[1:]
  572. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  573. combined_df_sorted = combined_df_sorted.iloc[:-1]
  574. combined_df_sorted.reset_index(drop=True, inplace=True)
  575. start_times = combined_df_sorted['time'].tolist()
  576. normalize_cycle = start_times[1] - start_times[0]
  577. tower_clearance = [pd.DataFrame() for _ in range(3)]
  578. for i in range(0, len(start_times) - 2, 2):
  579. start_time = start_times[i]
  580. end_time = start_times[i + 1]
  581. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  582. min_distance = segment['distance'].min()
  583. clearance = np.abs(tower_dist - min_distance - blade_cen_dist[i % 3]) * np.cos(np.deg2rad(v_angle))
  584. r_speed = round(60 / ((start_times[i + 2] - start_times[i]) * 3 / 5000000), 2)
  585. ratio = (end_time - start_time) / normalize_cycle
  586. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  587. new_df = pd.DataFrame({
  588. 'r_speed': [r_speed],
  589. 'clearance': [clearance]
  590. })
  591. tower_clearance[i % 3] = pd.concat([tower_clearance[i % 3], new_df])
  592. tower_clearance = [df.sort_values(by='r_speed') for df in tower_clearance]
  593. return tower_clearance