data_clean.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817
  1. import os
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. from typing import Tuple, List
  6. import warnings
  7. import time
  8. import sys
  9. import frequency_filter as ff
  10. from datetime import datetime
  11. warnings.filterwarnings("ignore", category=FutureWarning) # 忽略特定警告
  12. plt.rcParams['font.sans-serif'] = ['SimHei'] # 使用黑体
  13. plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
  14. def result_main():
  15. """
  16. 创建data目录,返回历史分析数据存放的文件路径
  17. """
  18. # 获取当前程序的绝对路径
  19. python_interpreter_path = sys.executable
  20. project_directory = os.path.dirname(python_interpreter_path)
  21. data_folder = os.path.join(project_directory, 'data')
  22. # 检查data文件夹是否存在,如果不存在则创建
  23. if not os.path.exists(data_folder):
  24. os.makedirs(data_folder)
  25. # CSV文件路径
  26. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  27. # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件
  28. if not os.path.exists(csv_file_path):
  29. pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
  30. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  31. '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
  32. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  33. '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
  34. return csv_file_path
  35. def data_analyse(path: List[str]):
  36. """
  37. 创建data目录,把分析数据保存到历史记录中,同时返回全量分析数据
  38. """
  39. locate_file = path[0]
  40. measure_file = path[1]
  41. noise_reduction = 0.000001 # 如果一个距离值的所有样本量小于总样本量的noise_reduction,则被去掉
  42. min_difference = 1.5 # 如果相邻2个点的距离差大于min_difference,则被注意是否是周期节点
  43. angle_cone = float(path[2]) # 锥角
  44. axial_inclination = float(path[3]) # 轴向倾角
  45. return_list = []
  46. wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen = find_param(locate_file)
  47. wind_name_1, turbine_code_1, time_code, sampling_fq_1, angle_tip, angle_root = find_param(measure_file)
  48. sampling_fq_1 = sampling_fq_1 * 1000
  49. sampling_fq = sampling_fq * 1000
  50. data_nan, data_cen = process_data(locate_file)
  51. data_tip, data_root = process_data(measure_file)
  52. start_tip, end_tip, filtered_data_tip = cycle_calculate(data_tip, noise_reduction, min_difference)
  53. start_root, end_root, filtered_data_root = cycle_calculate(data_root, noise_reduction, min_difference)
  54. filtered_data_cen = tower_filter(data_cen, noise_reduction)
  55. dist_cen = np.mean(filtered_data_cen.iloc[:, 1].tolist())
  56. if end_tip.iloc[0, 0] < start_root.iloc[0, 0]:
  57. start_tip = start_tip.drop(start_tip.index[0])
  58. end_tip = end_tip.drop(end_tip.index[0])
  59. if start_root.iloc[0, 0] < start_tip.iloc[0, 0] < end_tip.iloc[0, 0] < end_root.iloc[0, 0]:
  60. pass
  61. else:
  62. raise ValueError("The elements are not in the expected order.")
  63. tower_dist_tip = ff.tower_cal(filtered_data_tip, start_tip, end_tip, sampling_fq_1)
  64. tower_dist_root = ff.tower_cal(filtered_data_root, start_root, end_root, sampling_fq_1)
  65. lowpass_data, fft_x, fft_y, tower_freq, tower_max = ff.process_fft(filtered_data_cen, sampling_fq)
  66. result_line_tip, result_scatter_tip, border_rows_tip, cycle_len_tip \
  67. = data_normalize(filtered_data_tip, start_tip, end_tip, sampling_fq_1)
  68. result_line_root, result_scatter_root, border_rows_root, cycle_len_root \
  69. = data_normalize(filtered_data_root, start_root, end_root, sampling_fq_1)
  70. result_avg_tip, result_diff_tip = blade_shape(result_line_tip)
  71. result_avg_root, result_diff_root = blade_shape(result_line_root)
  72. border_rows_tip_new, angle_tip_new = coordinate_normalize(border_rows_tip, angle_tip)
  73. tip_r = radius_cal(border_rows_tip_new, angle_tip_new, dist_cen, angle_cen, axial_inclination, angle_cone)
  74. root_r = radius_cal(border_rows_root, angle_root, dist_cen, angle_cen, axial_inclination, angle_cone)
  75. pitch_angle_tip, aero_dist_tip, v_speed_tip, cen_blade_tip = (
  76. blade_angle_aero_dist(border_rows_tip, tip_r, cycle_len_tip, tower_dist_tip, angle_tip_new))
  77. pitch_angle_root, aero_dist_root, v_speed_root = (
  78. blade_angle_aero_dist(border_rows_root, root_r, cycle_len_root, tower_dist_root, angle_root))
  79. dist_distribute = blade_dist_distribute_cal(filtered_data_tip, start_tip, end_tip,
  80. tower_dist_tip, angle_tip_new, cen_blade_tip)
  81. for df in result_line_tip:
  82. first_column = df.iloc[:, 0]
  83. df.iloc[:, 0] = first_column * v_speed_tip
  84. for df in result_line_root:
  85. first_column = df.iloc[:, 0]
  86. df.iloc[:, 0] = first_column * v_speed_root
  87. avg_tip = result_avg_tip.iloc[:, 0]
  88. result_avg_tip.iloc[:, 0] = avg_tip * v_speed_tip
  89. avg_root = result_avg_root.iloc[:, 0]
  90. result_avg_root.iloc[:, 0] = avg_root * v_speed_root
  91. twist_1 = round(np.abs(pitch_angle_root[0] - pitch_angle_tip[0]), 2)
  92. twist_2 = round(np.abs(pitch_angle_root[1] - pitch_angle_tip[1]), 2)
  93. twist_3 = round(np.abs(pitch_angle_root[2] - pitch_angle_tip[2]), 2)
  94. twist_avg = round((twist_1 + twist_2 + twist_3) / 3, 2)
  95. sampling_num = int(0.01 * sampling_fq_1)
  96. data_tip.iloc[:, 0] = data_tip.iloc[:, 0] / 5000000
  97. data_root.iloc[:, 0] = data_root.iloc[:, 0] / 5000000
  98. lowpass_data.iloc[:, 0] = lowpass_data.iloc[:, 0] / 5000000
  99. return_list.append(time_code)
  100. return_list.append(wind_name)
  101. return_list.append(turbine_code)
  102. return_list.append(sampling_fq_1)
  103. return_list.append(pitch_angle_root[0])
  104. return_list.append(pitch_angle_root[1])
  105. return_list.append(pitch_angle_root[2])
  106. return_list.append(pitch_angle_root[3])
  107. return_list.append(aero_dist_tip[0])
  108. return_list.append(aero_dist_tip[1])
  109. return_list.append(aero_dist_tip[2])
  110. return_list.append(aero_dist_tip[3])
  111. return_list.append(twist_1)
  112. return_list.append(twist_2)
  113. return_list.append(twist_3)
  114. return_list.append(twist_avg)
  115. return_list.append(tower_max)
  116. return_list.append(tower_freq)
  117. # 将return_list转换为DataFrame并追加到CSV文件
  118. df_new_row = pd.DataFrame([return_list],
  119. columns=['时间', '场站', '风机编号', '采样频率',
  120. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  121. '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
  122. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  123. '振动幅值', '振动主频'])
  124. json_output = {
  125. 'original_plot': {
  126. 'blade_tip': {
  127. 'xdata': data_tip.iloc[:, 0].tolist()[::sampling_num],
  128. 'ydata': data_tip.iloc[:, 1].tolist()[::sampling_num]
  129. },
  130. 'blade_root': {
  131. 'xdata': data_root.iloc[:, 0].tolist()[::sampling_num],
  132. 'ydata': data_root.iloc[:, 1].tolist()[::sampling_num]
  133. }
  134. },
  135. 'fft_plot': {
  136. 'lowpass': {
  137. 'xdata': lowpass_data['time'].tolist()[::sampling_num],
  138. 'ydata': lowpass_data['distance_filtered'].tolist()[::sampling_num],
  139. 'xmax': max(lowpass_data['time'].tolist()),
  140. 'xmin': min(lowpass_data['time'].tolist()),
  141. 'ymax': max(lowpass_data['distance_filtered'].tolist()),
  142. 'ymin': min(lowpass_data['distance_filtered'].tolist())
  143. },
  144. 'fft': {
  145. 'xdata': fft_x,
  146. 'ydata': fft_y,
  147. 'xmax': max(fft_x),
  148. 'xmin': min(fft_x),
  149. 'ymax': max(fft_y),
  150. 'ymin': min(fft_y)
  151. }
  152. },
  153. 'blade_tip': {
  154. 'first_blade': {
  155. 'xdata': result_line_tip[0].iloc[:, 0].tolist(),
  156. 'ydata': result_line_tip[0].iloc[:, 1].tolist()
  157. },
  158. 'second_blade': {
  159. 'xdata': result_line_tip[1].iloc[:, 0].tolist(),
  160. 'ydata': result_line_tip[1].iloc[:, 1].tolist()
  161. },
  162. 'third_blade': {
  163. 'xdata': result_line_tip[2].iloc[:, 0].tolist(),
  164. 'ydata': result_line_tip[2].iloc[:, 1].tolist()
  165. },
  166. 'avg_blade': {
  167. 'xdata': result_avg_tip.iloc[:, 0].tolist(),
  168. 'ydata': result_avg_tip.iloc[:, 1].tolist()
  169. }
  170. },
  171. 'blade_root': {
  172. 'first_blade': {
  173. 'xdata': result_line_root[0].iloc[:, 0].tolist(),
  174. 'ydata': result_line_root[0].iloc[:, 1].tolist()
  175. },
  176. 'second_blade': {
  177. 'xdata': result_line_root[1].iloc[:, 0].tolist(),
  178. 'ydata': result_line_root[1].iloc[:, 1].tolist()
  179. },
  180. 'third_blade': {
  181. 'xdata': result_line_root[2].iloc[:, 0].tolist(),
  182. 'ydata': result_line_root[2].iloc[:, 1].tolist()
  183. },
  184. 'avg_blade': {
  185. 'xdata': result_avg_root.iloc[:, 0].tolist(),
  186. 'ydata': result_avg_root.iloc[:, 1].tolist()
  187. }
  188. },
  189. 'dist_distribution': {
  190. 'first_blade': {
  191. 'xdata': dist_distribute[0].iloc[:, 0].tolist(),
  192. 'ydata': dist_distribute[0].iloc[:, 1].tolist()
  193. },
  194. 'second_blade': {
  195. 'xdata': dist_distribute[1].iloc[:, 0].tolist(),
  196. 'ydata': dist_distribute[1].iloc[:, 1].tolist()
  197. },
  198. 'third_blade': {
  199. 'xdata': dist_distribute[2].iloc[:, 0].tolist(),
  200. 'ydata': dist_distribute[2].iloc[:, 1].tolist()
  201. }
  202. },
  203. 'analyse_table': {
  204. 'pitch_angle_diff': {
  205. 'blade_1': pitch_angle_root[0],
  206. 'blade_2': pitch_angle_root[1],
  207. 'blade_3': pitch_angle_root[2],
  208. 'blade_relate': pitch_angle_root[3]
  209. },
  210. 'aero_dist': {
  211. 'blade_1': aero_dist_tip[0],
  212. 'blade_2': aero_dist_tip[1],
  213. 'blade_3': aero_dist_tip[2],
  214. 'blade_avg': aero_dist_tip[3]
  215. },
  216. 'blade_twist': {
  217. 'blade_1': twist_1,
  218. 'blade_2': twist_2,
  219. 'blade_3': twist_3,
  220. 'blade_avg': twist_avg
  221. },
  222. 'tower_vibration': {
  223. 'max_vibration': tower_max,
  224. 'main_vibration_freq': tower_freq
  225. }
  226. }
  227. }
  228. # 获取当前程序的绝对路径
  229. python_interpreter_path = sys.executable
  230. project_directory = os.path.dirname(python_interpreter_path)
  231. data_folder = os.path.join(project_directory, 'data')
  232. # 检查data文件夹是否存在,如果不存在则创建
  233. if not os.path.exists(data_folder):
  234. os.makedirs(data_folder)
  235. # CSV文件路径
  236. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  237. # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件
  238. if not os.path.exists(csv_file_path):
  239. pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
  240. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  241. '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
  242. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  243. '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
  244. df_new_row.to_csv(csv_file_path, mode='a', header=False, index=False)
  245. return json_output
  246. def process_data(file_path):
  247. """
  248. 打开、解决时间重置、按时间清洗异常值、分列数据
  249. """
  250. # 读取第2、4、9列的数据
  251. data = pd.read_csv(file_path, usecols=[1, 3, 8], header=None, engine='c')
  252. data = data.head(int(len(data) * 0.95))
  253. '''
  254. # 绘制原始数据图
  255. # 只取前1%的数据
  256. # data = data.head(int(len(data)* 0.01))
  257. data.columns = ['time', 'distance1', 'distance2']
  258. plt.figure(figsize=(300, 150))
  259. sns.scatterplot(data=data, x='time', y='distance1', s=50, color='green')
  260. sns.scatterplot(data=data, x='time', y='distance2', s=50, color='red')
  261. abxy = plt.gca() # 获取当前坐标轴对象
  262. plt.grid(linewidth=2) # 设置网格线宽度为2
  263. abxy.xaxis.set_major_locator(MaxNLocator(nbins=100)) # 设置x轴主刻度的最大数量为10
  264. plt.xlabel('时间', fontsize=16, fontweight='bold') # 添加x轴标签
  265. plt.ylabel('距离(m)', fontsize=16, fontweight='bold') # 添加y轴标签
  266. abxy.tick_params(axis='x', labelsize=14, labelcolor='black', width=2) # 设置x轴刻度标签
  267. abxy.tick_params(axis='y', labelsize=14, labelcolor='black', width=2) # 设置y轴刻度标签
  268. plt.savefig(f"{"original"}.png", dpi=100, pil_kwargs={"icc_profile": False})
  269. plt.close()
  270. '''
  271. # 找到第一列中最大值和最小值的位置
  272. max_value = data.iloc[:, 0].max()
  273. max_index = data.iloc[:, 0].idxmax()
  274. min_index = data.iloc[:, 0].idxmin()
  275. # 检查最小值的位置是否是最大值位置的下一个
  276. if min_index == max_index + 1:
  277. # 将最小值及其之后的所有值都加上最大值
  278. data.iloc[min_index:, 0] += max_value
  279. # 按时间列筛选清洗异常值
  280. last_time = data.iloc[-1, 0]
  281. first_time = data.iloc[0, 0]
  282. data = data[data.iloc[:, 0] >= first_time]
  283. data = data[data.iloc[:, 0] <= last_time]
  284. data.reset_index(drop=True, inplace=True)
  285. # 计算最小值
  286. min_time = data.iloc[:, 0].min()
  287. data.iloc[:, 0] -= min_time
  288. # 分为两组数据
  289. data_1 = data.iloc[:, [0, 1]]
  290. data_2 = data.iloc[:, [0, 2]]
  291. # 分别命名列
  292. data_1.columns = ['time', 'distance']
  293. data_2.columns = ['time', 'distance']
  294. return data_1, data_2
  295. def tower_filter(data_group: pd.DataFrame, noise_threshold: float):
  296. """
  297. 对轮毂中心数据进行降噪,和前项填充
  298. :param data_group: process_data计算完成后轮毂中心的数据。
  299. :param noise_threshold: 去掉占比小于noise_threshold的数据。
  300. :return: filtered_data:降噪后的数据
  301. """
  302. time.sleep(1)
  303. # 计算distance的分布
  304. distance_counts = data_group['distance'].value_counts(normalize=True)
  305. noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
  306. noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
  307. data_group.loc[noise_indices, 'distance'] = np.nan
  308. # 选择频率最大的5个值
  309. top_5_distances = distance_counts.head(5).index
  310. mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
  311. data_group.loc[(data_group['distance'] < mean_values - 20) | (
  312. data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
  313. # 前向填充
  314. data_group['distance'] = data_group['distance'].fillna(method='ffill')
  315. filtered_data = data_group
  316. return filtered_data
  317. def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distance: float):
  318. """
  319. 对数据进行降噪,和前项填充;计算数据的周期节点,叶片前缘突变点、后缘突变点
  320. :param data_group: process_data计算完成后的数据。
  321. :param noise_threshold: 去掉占比小于noise_threshold的数据。
  322. :param min_distance: 区分叶片和塔筒的距离差值。
  323. :return: start_points:周期开始点, end_points:周期结束点, filtered_data:降噪后的数据
  324. """
  325. time.sleep(1)
  326. # 计算distance的分布
  327. distance_counts = data_group['distance'].value_counts(normalize=True)
  328. noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
  329. noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
  330. data_group.loc[noise_indices, 'distance'] = np.nan
  331. # 选择频率最大的5个值
  332. top_5_distances = distance_counts.head(5).index
  333. mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
  334. data_group.loc[(data_group['distance'] < mean_values - 20) | (
  335. data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
  336. # 前向填充
  337. data_group['distance'] = data_group['distance'].fillna(method='ffill')
  338. filtered_data = data_group
  339. # 计算相邻两行distance的差值
  340. filtered_data['distance_diff'] = filtered_data['distance'].diff()
  341. large_diff_indices = filtered_data[filtered_data['distance_diff'] > min_distance].index
  342. small_diff_indices = filtered_data[filtered_data['distance_diff'] < -min_distance].index
  343. filtered_data = filtered_data.drop(columns=['distance_diff'])
  344. start_points = pd.DataFrame()
  345. end_points = pd.DataFrame()
  346. # 遍历所有差值大于的行
  347. for idx in large_diff_indices:
  348. # 获取当前行的 distance 值
  349. current_distance = filtered_data.loc[idx, 'distance']
  350. next_rows_large = filtered_data.loc[idx - 1000: idx - 1]
  351. # 检查是否任意 distance 的值小于 current_distance - 2
  352. if next_rows_large['distance'].le(current_distance - min_distance).all():
  353. # 如果都小于,则将当前行和下一行添加到 special_points 中
  354. end_points = pd.concat([end_points, filtered_data.loc[[idx - 1]]])
  355. for idx in small_diff_indices:
  356. # 获取当前行的 distance 值
  357. current_distance = filtered_data.loc[idx - 1, 'distance']
  358. next_rows_small = filtered_data.iloc[idx: idx + 1000]
  359. # 检查是否任意 distance 的值小于 current_distance - 2
  360. if next_rows_small['distance'].le(current_distance - min_distance).all():
  361. # 如果都小于,则将当前行和下一行添加到 special_points 中
  362. start_points = pd.concat([start_points, filtered_data.loc[[idx]]])
  363. if end_points.iloc[0, 0] < start_points.iloc[0, 0]:
  364. end_points = end_points.drop(end_points.index[0])
  365. if end_points.iloc[-1, 0] < start_points.iloc[-1, 0]:
  366. start_points = start_points.drop(start_points.index[-1])
  367. else:
  368. pass
  369. return start_points, end_points, filtered_data
  370. def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame, fs) \
  371. -> Tuple[List[pd.DataFrame], List[pd.DataFrame], List[pd.DataFrame], int]:
  372. """
  373. 提取每个叶片的数据并归一化,输出散点图和拟合图
  374. :param data_group: cycle_calculate计算完成后的数据。
  375. :param start_points: 所有每个周期开始点,叶片前缘突变点。
  376. :param end_points: 叶片后缘突变点。
  377. :param fs: 采样频率。
  378. :return: turbines_processed: 每个叶片的拟合数据,
  379. turbines_scattered: 每个叶片的散点数据,
  380. border_rows: 每个叶片的2个边缘数据,
  381. normalize_cycle: 周期长度
  382. """
  383. a = fs
  384. time.sleep(1)
  385. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  386. # 检查排序后的数据从start开始,end结束
  387. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  388. combined_df_sorted = combined_df_sorted.iloc[1:]
  389. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  390. combined_df_sorted = combined_df_sorted.iloc[:-1]
  391. combined_df_sorted.reset_index(drop=True, inplace=True)
  392. # 将 start_points 中的时间点转换为列表
  393. start_times = combined_df_sorted['time'].tolist()
  394. time.sleep(1)
  395. normalize_cycle = start_times[1] - start_times[0]
  396. full_cycle = int((start_times[2] - start_times[0]) * 3)
  397. turbines = [pd.DataFrame() for _ in range(3)]
  398. # 遍历所有起始时间点
  399. for i in range(0, len(start_times), 2):
  400. # 获取当前起始和结束时间点
  401. start_time = start_times[i]
  402. end_time = start_times[i + 1]
  403. # 根据当前起始时间点和结束时间点对数据进行分段
  404. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  405. if segment is None:
  406. pass
  407. else:
  408. # 周期归一化
  409. ratio = (end_time - start_time) / normalize_cycle
  410. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  411. # 将结果添加到相应的 turbine 数据框中
  412. turbines[i % 3] = pd.concat([turbines[i % 3], segment])
  413. # 数据分组清洗、求平均
  414. turbines_processed = []
  415. turbines_scattered = []
  416. sd_time = [-1, -1]
  417. time_list = list(range(0, normalize_cycle, 1000))
  418. for turbine in turbines:
  419. # 按时间排序
  420. turbine_sorted = turbine.sort_values(by='time').reset_index(drop=True)
  421. # 找到time列的第一个值
  422. first_time = turbine_sorted['time'].iloc[0]
  423. # 分组,时间列每1000为一组(每40个时间点一组)
  424. bins = list(range(int(first_time), int(turbine_sorted['time'].max()), 1000))
  425. # 原始代码
  426. # bins = list(range(int(first_time), int(turbine_sorted['time'].max()) + len(start_times), int(fs / 50)))
  427. grouped = turbine_sorted.groupby(pd.cut(turbine_sorted['time'], bins=bins, right=False))
  428. # 初始化一个空的 DataFrame 用于存储处理后的数据
  429. processed_df = pd.DataFrame()
  430. scattered_df = pd.DataFrame()
  431. mean_points = []
  432. diff_points = []
  433. # 对每个组进行处理
  434. for _, group in grouped:
  435. # 去除 distance 最大和最小的前5%
  436. quantile_5 = group['distance'].quantile(0.05)
  437. quantile_95 = group['distance'].quantile(0.95)
  438. filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
  439. # 计算均值
  440. mean_point = filtered_group['distance'].mean()
  441. mean_points.append(mean_point)
  442. # 遍历 mean_points 列表,计算每个元素与其下一个元素的差值
  443. for i in range(len(mean_points) - 1):
  444. diff = abs(mean_points[i + 1] - mean_points[i])
  445. diff_points.append(diff)
  446. start_index = int(len(diff_points) * 0.05)
  447. end_index = int(len(diff_points) * 0.95)
  448. subset1 = diff_points[start_index:end_index]
  449. sdr_diff = np.max(subset1) * 1.1
  450. # 找到第一个和最后一个小于 sdr_diff 的序号
  451. first_index = np.where(diff_points < sdr_diff)[0][0]
  452. last_index = np.where(diff_points < sdr_diff)[0][-1]
  453. for index, (bin, group) in enumerate(grouped):
  454. # 去除 distance 最大和最小的前5%
  455. quantile_5 = group['distance'].quantile(0.05)
  456. quantile_95 = group['distance'].quantile(0.95)
  457. filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
  458. if first_index <= index < last_index: # 如果斜率小于,则认为该组数据不是突变点
  459. # 计算中点
  460. mid_point = filtered_group.mean()
  461. # 将中点转换为 DataFrame 并添加到处理后的 DataFrame 中
  462. mid_point_df = pd.DataFrame([mid_point])
  463. mid_point_df.iloc[0, 0] = time_list[index]
  464. processed_df = pd.concat([processed_df, mid_point_df], ignore_index=True)
  465. scattered_df = pd.concat([scattered_df, filtered_group], ignore_index=True)
  466. else:
  467. pass
  468. # 找到time列的最小值和最大值
  469. min_time = processed_df['time'].min()
  470. max_time = processed_df['time'].max()
  471. if sd_time == [-1, -1]:
  472. sd_time = [min_time, max_time]
  473. elif sd_time[0] < min_time:
  474. sd_time[0] = min_time
  475. elif sd_time[1] > max_time:
  476. sd_time[1] = max_time
  477. # 将处理后的 DataFrame 添加到列表中
  478. turbines_processed.append(processed_df)
  479. turbines_scattered.append(scattered_df)
  480. border_rows = []
  481. for i, turbine in enumerate(turbines_processed):
  482. # 找到离 sd_time[0] 最近的行的索引
  483. closest_index_0 = (turbine['time'] - sd_time[0]).abs().idxmin()
  484. turbine.at[closest_index_0, 'time'] = sd_time[0]
  485. sd_time_row_0 = turbine.loc[closest_index_0]
  486. # 找到离 sd_time[1] 最近的行的索引
  487. closest_index_1 = (turbine['time'] - sd_time[1]).abs().idxmin()
  488. turbine.at[closest_index_1, 'time'] = sd_time[1]
  489. sd_time_row_1 = turbine.loc[closest_index_1]
  490. # 切片 turbine,从 closest_index_0 到 closest_index_1
  491. turbines_processed[i] = turbine.iloc[closest_index_0:closest_index_1 + 1].reset_index(drop=True)
  492. sd_time_rows_turbine = pd.concat([pd.DataFrame([sd_time_row_0]), pd.DataFrame([sd_time_row_1])]
  493. , ignore_index=True)
  494. border_rows.append(sd_time_rows_turbine)
  495. time.sleep(1)
  496. return turbines_processed, turbines_scattered, border_rows, full_cycle
  497. def blade_shape(turbines_processed: List[pd.DataFrame]):
  498. """
  499. 计算叶片平均形状、叶片形状偏差。
  500. :param turbines_processed:叶片拟合曲线数据,来自data_normalize
  501. :return: 叶片平均形状、叶片形状偏差
  502. """
  503. row_counts = [df.shape[0] for df in turbines_processed]
  504. num_rows = min(row_counts)
  505. # 创建一个新的data.frame用于保存结果
  506. turbine_avg = pd.DataFrame(index=range(num_rows), columns=['time', 'distance'])
  507. turbine_diff = [pd.DataFrame(index=range(num_rows), columns=['time', 'distance']) for _ in turbines_processed]
  508. # 遍历每一行
  509. for i in range(num_rows):
  510. distances = [df.loc[i, 'distance'] for df in turbines_processed] # 获取每个data.frame的distance列的值
  511. avg_distance = sum(distances) / len(distances) # 计算distance列的平均值
  512. time_value = turbines_processed[0].loc[i, 'time'] # 获取time列的值
  513. turbine_avg.loc[i, 'time'] = time_value
  514. turbine_avg.loc[i, 'distance'] = avg_distance
  515. for j in range(len(distances)):
  516. distances[j] = distances[j] - avg_distance
  517. turbine_diff[j].loc[i, 'time'] = time_value
  518. turbine_diff[j].loc[i, 'distance'] = distances[j]
  519. time.sleep(10)
  520. return turbine_avg, turbine_diff
  521. def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
  522. """
  523. 将叶尖测量数据和叶根、轮毂中心的测量原点归一化。
  524. :param tip_border_rows: 3个叶尖边缘数据
  525. :param tip_angle: 叶尖测量俯仰角
  526. :return: 归一化后叶尖数据,叶尖俯仰角
  527. """
  528. tip_angle1 = np.deg2rad(tip_angle)
  529. tip_angle_list = []
  530. for turbine in tip_border_rows:
  531. tip_angle_cal = np.arctan((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
  532. np.cos(tip_angle1) * turbine['distance'])
  533. turbine['distance'] = (turbine['distance'] ** 2 + 0.0057881664 -
  534. 0.15216 * turbine['distance'] * np.sin(tip_angle1)) ** 0.5
  535. tip_angle_list.append(tip_angle_cal)
  536. tip_angle_new = float(np.mean(tip_angle_list))
  537. tip_angle_new1 = np.rad2deg(tip_angle_new)
  538. return tip_border_rows, tip_angle
  539. def radius_cal(border_rows, meas_angle, cen_dist, cen_angle, angle_main, angle_rotate):
  540. """
  541. 计算测量点处的旋转半径。
  542. :param border_rows: 三个叶片的边界
  543. :param meas_angle: 回波俯仰角
  544. :param cen_dist: 轮毂中心距离
  545. :param cen_angle: 轮毂中心俯仰角
  546. :param angle_main: 主轴倾角
  547. :param angle_rotate: 锥角
  548. :return: 旋转半径
  549. """
  550. aero_dist = (pd.concat([df['distance'] for df in border_rows]).mean())
  551. cen_x = np.cos(np.deg2rad(cen_angle)) * cen_dist
  552. cen_y = np.sin(np.deg2rad(cen_angle)) * cen_dist
  553. aero_x = np.cos(np.deg2rad(meas_angle)) * aero_dist
  554. aero_y = np.sin(np.deg2rad(meas_angle)) * aero_dist
  555. theta_4 = np.tan(np.pi - np.deg2rad(angle_main))
  556. theta_5 = np.tan(np.pi / 2 - np.deg2rad(angle_main) + np.deg2rad(angle_rotate))
  557. if np.abs(np.deg2rad(angle_main) - np.deg2rad(angle_rotate)) < 0.0001:
  558. radius = np.abs((cen_y - aero_y) - theta_4 * (cen_x - aero_x))
  559. else:
  560. radius = (np.abs((theta_4 * (cen_x - aero_x) - (cen_y - aero_y)) / (theta_4 - theta_5))
  561. * (1 + theta_5 ** 2) ** 0.5)
  562. return radius
  563. def blade_angle_aero_dist(border_rows: List[pd.DataFrame], radius: float, full_cycle: int,
  564. tower_dist: float, v_angle: float):
  565. """
  566. 计算叶片相对桨距角和叶片净空距离。
  567. :param border_rows: 三个叶片的边界
  568. :param radius: 旋转半径
  569. :param full_cycle: 全周期
  570. :param tower_dist: 塔筒距离
  571. :param v_angle: 俯仰角度
  572. :return: 绝对桨距角,净空距离,叶片线速度
  573. """
  574. v_speed = 2 * np.pi * radius / full_cycle # 叶片线速度m/(1计时器单位)
  575. pitch_angle_list = []
  576. aero_dist_list = []
  577. cen_blade = []
  578. for turbine in border_rows:
  579. diff_time = turbine.iloc[1, 0] - turbine.iloc[0, 0]
  580. diff_len = turbine.iloc[1, 1] - turbine.iloc[0, 1]
  581. mean_col2 = (turbine.iloc[1, 1] + turbine.iloc[0, 1]) / 2
  582. aero_dist = abs(mean_col2 - tower_dist) * np.cos(np.deg2rad(v_angle))
  583. pitch_angle = np.degrees(np.arctan(diff_len / (diff_time * v_speed)))
  584. pitch_angle_list.append(pitch_angle)
  585. aero_dist_list.append(aero_dist)
  586. cen_blade.append(mean_col2)
  587. pitch_mean = np.mean(pitch_angle_list)
  588. pitch_angle_list = [angle - pitch_mean for angle in pitch_angle_list]
  589. pitch_angle_list.append(max(pitch_angle_list) - min(pitch_angle_list))
  590. aero_dist_list.append(np.mean(aero_dist_list))
  591. pitch_angle_list = [round(num, 2) for num in pitch_angle_list]
  592. aero_dist_list = [round(num, 2) for num in aero_dist_list]
  593. return pitch_angle_list, aero_dist_list, v_speed, cen_blade
  594. def find_param(path: str):
  595. """
  596. 根据文件路径获取参数
  597. """
  598. path = path.replace('\\', '/')
  599. last_slash_index = path.rfind('/')
  600. result = path[last_slash_index + 1:]
  601. underscore_indices = []
  602. start = 0
  603. while True:
  604. index = result.find('_', start)
  605. if index == -1:
  606. break
  607. underscore_indices.append(index)
  608. start = index + 1
  609. wind_name = result[: underscore_indices[0]]
  610. turbine_code = result[underscore_indices[0] + 1: underscore_indices[1]]
  611. time_code = result[underscore_indices[1] + 1: underscore_indices[2]]
  612. sampling_fq = int(result[underscore_indices[2] + 1: underscore_indices[3]])
  613. tunnel_1 = float(result[underscore_indices[3] + 1: underscore_indices[4]])
  614. tunnel_2 = float(result[underscore_indices[4] + 1: -4])
  615. dt = datetime.strptime(time_code, "%Y%m%d%H%M%S")
  616. standard_time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
  617. return wind_name, turbine_code, standard_time_str, sampling_fq, tunnel_1, tunnel_2
  618. def blade_dist_distribute_cal(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame,
  619. tower_dist: float, v_angle: float, blade_cen_dist: list):
  620. """
  621. 计算每个叶片每个周期的转速和净空距离
  622. :param data_group: cycle_calculate计算完成后的数据。
  623. :param start_points: 所有每个周期开始点,叶片前缘突变点。
  624. :param end_points: 叶片后缘突变点。
  625. :param tower_dist: 塔筒距离。
  626. :param v_angle: 测量俯仰角度。
  627. :param blade_cen_dist: 叶片内部距离。
  628. """
  629. time.sleep(1)
  630. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  631. # 检查排序后的数据从start开始,end结束
  632. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  633. combined_df_sorted = combined_df_sorted.iloc[1:]
  634. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  635. combined_df_sorted = combined_df_sorted.iloc[:-1]
  636. combined_df_sorted.reset_index(drop=True, inplace=True)
  637. # 将 start_points 中的时间点转换为列表
  638. start_times = combined_df_sorted['time'].tolist()
  639. normalize_cycle = start_times[1] - start_times[0]
  640. tower_clearance = [pd.DataFrame() for _ in range(3)]
  641. # 遍历所有起始时间点
  642. for i in range(0, len(start_times), 2):
  643. # 获取当前起始和结束时间点
  644. start_time = start_times[i]
  645. end_time = start_times[i + 1]
  646. # 根据当前起始时间点和结束时间点对数据进行分段
  647. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  648. min_distance = segment['distance'].min()
  649. clearance = np.abs(tower_dist - min_distance - blade_cen_dist[i % 3]) * np.cos(np.deg2rad(v_angle))
  650. r_speed = (start_times[i + 2] - start_times[i]) * 3 / 5000000
  651. # 周期归一化
  652. ratio = (end_time - start_time) / normalize_cycle
  653. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  654. new_df = pd.DataFrame({
  655. 'clearance': [clearance],
  656. 'r_speed': [r_speed]
  657. })
  658. # 将结果添加到相应的 turbine 数据框中
  659. tower_clearance[i % 3] = pd.concat([tower_clearance[i % 3], new_df])
  660. return tower_clearance