data_analyse_origin.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976
  1. import os
  2. import pandas as pd
  3. import numpy as np
  4. import seaborn as sns
  5. import matplotlib.pyplot as plt
  6. from matplotlib.ticker import MaxNLocator
  7. from typing import Tuple, List
  8. import warnings
  9. import time
  10. import sys
  11. import frequency_filter as ff
  12. from datetime import datetime
  13. warnings.filterwarnings("ignore", category=FutureWarning) # 忽略特定警告
  14. plt.rcParams['font.sans-serif'] = ['SimHei'] # 使用黑体
  15. plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
  16. # TODO 3个叶片净空距离的分布情况,每10圈算一次净空
  17. def result_main():
  18. """
  19. 创建data目录,返回历史分析数据存放的文件路径
  20. """
  21. # 获取当前程序的绝对路径
  22. python_interpreter_path = sys.executable
  23. project_directory = os.path.dirname(python_interpreter_path)
  24. data_folder = os.path.join(project_directory, 'data')
  25. # 检查data文件夹是否存在,如果不存在则创建
  26. if not os.path.exists(data_folder):
  27. os.makedirs(data_folder)
  28. # CSV文件路径
  29. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  30. # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件
  31. if not os.path.exists(csv_file_path):
  32. pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
  33. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  34. '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
  35. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  36. '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
  37. return csv_file_path
  38. def data_analyse(path: List[str]):
  39. """
  40. 创建data目录,把分析数据保存到历史记录中,同时返回全量分析数据
  41. """
  42. locate_file = path[0]
  43. measure_file = path[1]
  44. noise_reduction = 0.000001 # 如果一个距离值的所有样本量小于总样本量的noise_reduction,则被去掉
  45. min_difference = 1.5 # 如果相邻2个点的距离差大于min_difference,则被注意是否是周期节点
  46. angle_cone = float(path[2]) # 锥角
  47. axial_inclination = float(path[3]) # 轴向倾角
  48. return_list = []
  49. wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen = find_param(locate_file)
  50. wind_name_1, turbine_code_1, time_code, sampling_fq_1, angle_tip, angle_root = find_param(measure_file)
  51. sampling_fq_1 = sampling_fq_1 * 1000
  52. sampling_fq = sampling_fq * 1000
  53. print(wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen)
  54. print(wind_name_1, turbine_code_1, time_code, sampling_fq_1, angle_tip, angle_root)
  55. data_nan, data_cen = process_data(locate_file)
  56. data_tip, data_root = process_data(measure_file)
  57. start_tip, end_tip, filtered_data_tip = cycle_calculate(data_tip, noise_reduction, min_difference)
  58. start_root, end_root, filtered_data_root = cycle_calculate(data_root, noise_reduction, min_difference)
  59. # start_nan, end_nan, filtered_data_nan = cycle_calculate(data_nan, noise_reduction, min_difference)
  60. filtered_data_cen = tower_filter(data_cen, noise_reduction)
  61. dist_cen = np.mean(filtered_data_cen.iloc[:, 1].tolist())
  62. if end_tip.iloc[0, 0] < start_root.iloc[0, 0]:
  63. start_tip = start_tip.drop(start_tip.index[0])
  64. end_tip = end_tip.drop(end_tip.index[0])
  65. if start_root.iloc[0, 0] < start_tip.iloc[0, 0] < end_tip.iloc[0, 0] < end_root.iloc[0, 0]:
  66. pass
  67. else:
  68. raise ValueError("The elements are not in the expected order.")
  69. tower_dist_tip = ff.tower_cal(filtered_data_tip, start_tip, end_tip, sampling_fq_1)
  70. tower_dist_root = ff.tower_cal(filtered_data_root, start_root, end_root, sampling_fq_1)
  71. lowpass_data, fft_x, fft_y, tower_freq, tower_max= ff.process_fft(filtered_data_cen, sampling_fq)
  72. result_line_tip, result_scatter_tip, border_rows_tip, cycle_len_tip \
  73. = data_normalize(filtered_data_tip, start_tip, end_tip)
  74. result_line_root, result_scatter_root, border_rows_root, cycle_len_root \
  75. = data_normalize(filtered_data_root, start_root, end_root)
  76. result_avg_tip, result_diff_tip = blade_shape(result_line_tip)
  77. result_avg_root, result_diff_root = blade_shape(result_line_root)
  78. border_rows_tip_new, angle_tip_new = coordinate_normalize(border_rows_tip, angle_tip)
  79. print('新俯仰角' + str(angle_tip_new))
  80. print('轮毂中心距离' + str(dist_cen))
  81. tip_r = radius_cal(border_rows_tip_new, angle_tip_new, dist_cen, angle_cen, axial_inclination, angle_cone)
  82. root_r = radius_cal(border_rows_root, angle_root, dist_cen, angle_cen, axial_inclination, angle_cone)
  83. pitch_angle_tip, aero_dist_tip, v_speed_tip, cen_blade_tip = (
  84. blade_angle_aero_dist(border_rows_tip, tip_r, cycle_len_tip, tower_dist_tip, angle_tip_new))
  85. pitch_angle_root, aero_dist_root, v_speed_root = (
  86. blade_angle_aero_dist(border_rows_root, root_r, cycle_len_root, tower_dist_root, angle_root))
  87. dist_distribute = blade_dist_distribute_cal(filtered_data_tip, start_tip, end_tip,
  88. tower_dist_tip, angle_tip_new, cen_blade_tip)
  89. plot_data(result_line_tip, 'line', 'data1')
  90. # plot_data(result_diff_tip, 'line', 'data_diff_1')
  91. plot_data(result_scatter_tip, 'scatter', 'data1')
  92. plot_data(result_line_root, 'line', 'data2')
  93. # plot_data(result_diff_root, 'line', 'data_diff_2')
  94. plot_data(result_scatter_root, 'scatter', 'data2')
  95. for df in result_line_tip:
  96. first_column = df.iloc[:, 0]
  97. df.iloc[:, 0] = first_column * v_speed_tip
  98. for df in result_line_root:
  99. first_column = df.iloc[:, 0]
  100. df.iloc[:, 0] = first_column * v_speed_root
  101. print(v_speed_tip, v_speed_root)
  102. avg_tip = result_avg_tip.iloc[:, 0]
  103. result_avg_tip.iloc[:, 0] = avg_tip * v_speed_tip
  104. avg_root = result_avg_root.iloc[:, 0]
  105. result_avg_root.iloc[:, 0] = avg_root * v_speed_root
  106. twist_1 = round(np.abs(pitch_angle_root[0] - pitch_angle_tip[0]), 2)
  107. twist_2 = round(np.abs(pitch_angle_root[1] - pitch_angle_tip[1]), 2)
  108. twist_3 = round(np.abs(pitch_angle_root[2] - pitch_angle_tip[2]), 2)
  109. twist_avg = round((twist_1 + twist_2 + twist_3) / 3, 2)
  110. sampling_num = int(0.01 * sampling_fq_1)
  111. data_tip.iloc[:, 0] = data_tip.iloc[:, 0] / 5000000
  112. data_root.iloc[:, 0] = data_root.iloc[:, 0] / 5000000
  113. lowpass_data.iloc[:, 0] = lowpass_data.iloc[:, 0] / 5000000
  114. print('time_length:' + str(data_root.iloc[-1, 0]))
  115. return_list.append(time_code)
  116. return_list.append(wind_name)
  117. return_list.append(turbine_code)
  118. return_list.append(sampling_fq_1)
  119. return_list.append(pitch_angle_root[0])
  120. return_list.append(pitch_angle_root[1])
  121. return_list.append(pitch_angle_root[2])
  122. return_list.append(pitch_angle_root[3])
  123. return_list.append(aero_dist_tip[0])
  124. return_list.append(aero_dist_tip[1])
  125. return_list.append(aero_dist_tip[2])
  126. return_list.append(aero_dist_tip[3])
  127. return_list.append(twist_1)
  128. return_list.append(twist_2)
  129. return_list.append(twist_3)
  130. return_list.append(twist_avg)
  131. return_list.append(tower_max)
  132. return_list.append(tower_freq)
  133. print(result_line_tip[0].iloc[:, 0])
  134. print(result_line_root[0].iloc[:, 0])
  135. print('振动主频' + str(tower_freq))
  136. print('振动幅值' + str(tower_max))
  137. # 将return_list转换为DataFrame并追加到CSV文件
  138. df_new_row = pd.DataFrame([return_list],
  139. columns=['时间', '场站', '风机编号', '采样频率',
  140. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  141. '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
  142. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  143. '振动幅值', '振动主频'])
  144. json_output = {
  145. 'original_plot': {
  146. 'blade_tip': {
  147. 'xdata': data_tip.iloc[:, 0].tolist()[::sampling_num],
  148. 'ydata': data_tip.iloc[:, 1].tolist()[::sampling_num]
  149. },
  150. 'blade_root': {
  151. 'xdata': data_root.iloc[:, 0].tolist()[::sampling_num],
  152. 'ydata': data_root.iloc[:, 1].tolist()[::sampling_num]
  153. }
  154. },
  155. 'fft_plot': {
  156. 'lowpass': {
  157. 'xdata': lowpass_data['time'].tolist()[::sampling_num],
  158. 'ydata': lowpass_data['distance_filtered'].tolist()[::sampling_num],
  159. 'xmax': max(lowpass_data['time'].tolist()),
  160. 'xmin': min(lowpass_data['time'].tolist()),
  161. 'ymax': max(lowpass_data['distance_filtered'].tolist()),
  162. 'ymin': min(lowpass_data['distance_filtered'].tolist())
  163. },
  164. 'fft': {
  165. 'xdata': fft_x,
  166. 'ydata': fft_y,
  167. 'xmax': max(fft_x),
  168. 'xmin': min(fft_x),
  169. 'ymax': max(fft_y),
  170. 'ymin': min(fft_y)
  171. }
  172. },
  173. 'blade_tip': {
  174. 'first_blade': {
  175. 'xdata': result_line_tip[0].iloc[:, 0].tolist(),
  176. 'ydata': result_line_tip[0].iloc[:, 1].tolist()
  177. },
  178. 'second_blade': {
  179. 'xdata': result_line_tip[1].iloc[:, 0].tolist(),
  180. 'ydata': result_line_tip[1].iloc[:, 1].tolist()
  181. },
  182. 'third_blade': {
  183. 'xdata': result_line_tip[2].iloc[:, 0].tolist(),
  184. 'ydata': result_line_tip[2].iloc[:, 1].tolist()
  185. },
  186. 'avg_blade': {
  187. 'xdata': result_avg_tip.iloc[:, 0].tolist(),
  188. 'ydata': result_avg_tip.iloc[:, 1].tolist()
  189. }
  190. },
  191. 'blade_root': {
  192. 'first_blade': {
  193. 'xdata': result_line_root[0].iloc[:, 0].tolist(),
  194. 'ydata': result_line_root[0].iloc[:, 1].tolist()
  195. },
  196. 'second_blade': {
  197. 'xdata': result_line_root[1].iloc[:, 0].tolist(),
  198. 'ydata': result_line_root[1].iloc[:, 1].tolist()
  199. },
  200. 'third_blade': {
  201. 'xdata': result_line_root[2].iloc[:, 0].tolist(),
  202. 'ydata': result_line_root[2].iloc[:, 1].tolist()
  203. },
  204. 'avg_blade': {
  205. 'xdata': result_avg_root.iloc[:, 0].tolist(),
  206. 'ydata': result_avg_root.iloc[:, 1].tolist()
  207. }
  208. },
  209. 'dist_distribution': {
  210. 'first_blade': {
  211. 'xdata': dist_distribute[0].iloc[:, 0].tolist(),
  212. 'ydata': dist_distribute[0].iloc[:, 1].tolist()
  213. },
  214. 'second_blade': {
  215. 'xdata': dist_distribute[1].iloc[:, 0].tolist(),
  216. 'ydata': dist_distribute[1].iloc[:, 1].tolist()
  217. },
  218. 'third_blade': {
  219. 'xdata': dist_distribute[2].iloc[:, 0].tolist(),
  220. 'ydata': dist_distribute[2].iloc[:, 1].tolist()
  221. }
  222. },
  223. 'analyse_table': {
  224. 'pitch_angle_diff': {
  225. 'blade_1': pitch_angle_root[0],
  226. 'blade_2': pitch_angle_root[1],
  227. 'blade_3': pitch_angle_root[2],
  228. 'blade_relate': pitch_angle_root[3]
  229. },
  230. 'aero_dist': {
  231. 'blade_1': aero_dist_tip[0],
  232. 'blade_2': aero_dist_tip[1],
  233. 'blade_3': aero_dist_tip[2],
  234. 'blade_avg': aero_dist_tip[3]
  235. },
  236. 'blade_twist': {
  237. 'blade_1': twist_1,
  238. 'blade_2': twist_2,
  239. 'blade_3': twist_3,
  240. 'blade_avg': twist_avg
  241. },
  242. 'tower_vibration': {
  243. 'max_vibration': tower_max,
  244. 'main_vibration_freq': tower_freq
  245. }
  246. }
  247. }
  248. # 获取当前程序的绝对路径
  249. python_interpreter_path = sys.executable
  250. project_directory = os.path.dirname(python_interpreter_path)
  251. data_folder = os.path.join(project_directory, 'data')
  252. # 检查data文件夹是否存在,如果不存在则创建
  253. if not os.path.exists(data_folder):
  254. os.makedirs(data_folder)
  255. # CSV文件路径
  256. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  257. # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件
  258. if not os.path.exists(csv_file_path):
  259. pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
  260. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  261. '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
  262. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  263. '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
  264. df_new_row.to_csv(csv_file_path, mode='a', header=False, index=False)
  265. print(csv_file_path)
  266. return json_output
  267. def process_data(file_path):
  268. """
  269. 打开、解决时间重置、按时间清洗异常值、分列数据
  270. """
  271. # 读取第2、4、9列的数据
  272. data = pd.read_csv(file_path, usecols=[1, 3, 8], header=None, engine='c')
  273. data = data.head(int(len(data) * 0.95))
  274. print(len(data))
  275. '''
  276. # 绘制原始数据图
  277. # 只取前1%的数据
  278. # data = data.head(int(len(data)* 0.01))
  279. data.columns = ['time', 'distance1', 'distance2']
  280. plt.figure(figsize=(300, 150))
  281. sns.scatterplot(data=data, x='time', y='distance1', s=50, color='green')
  282. sns.scatterplot(data=data, x='time', y='distance2', s=50, color='red')
  283. abxy = plt.gca() # 获取当前坐标轴对象
  284. plt.grid(linewidth=2) # 设置网格线宽度为2
  285. abxy.xaxis.set_major_locator(MaxNLocator(nbins=100)) # 设置x轴主刻度的最大数量为10
  286. plt.xlabel('时间', fontsize=16, fontweight='bold') # 添加x轴标签
  287. plt.ylabel('距离(m)', fontsize=16, fontweight='bold') # 添加y轴标签
  288. abxy.tick_params(axis='x', labelsize=14, labelcolor='black', width=2) # 设置x轴刻度标签
  289. abxy.tick_params(axis='y', labelsize=14, labelcolor='black', width=2) # 设置y轴刻度标签
  290. plt.savefig(f"{"original"}.png", dpi=100, pil_kwargs={"icc_profile": False})
  291. plt.close()
  292. '''
  293. # 找到第一列中最大值和最小值的位置
  294. max_value = data.iloc[:, 0].max()
  295. max_index = data.iloc[:, 0].idxmax()
  296. min_index = data.iloc[:, 0].idxmin()
  297. # 检查最小值的位置是否是最大值位置的下一个
  298. if min_index == max_index + 1:
  299. # 将最小值及其之后的所有值都加上最大值
  300. data.iloc[min_index:, 0] += max_value
  301. # 按时间列筛选清洗异常值
  302. last_time = data.iloc[-1, 0]
  303. first_time = data.iloc[0, 0]
  304. filtered_data = data[(data.iloc[:, 0] > last_time) & (data.iloc[:, 0] < first_time)]
  305. print(f'时间列异常数据: {filtered_data}')
  306. print(f'起止时间: {first_time}, {last_time}')
  307. data = data[data.iloc[:, 0] >= first_time]
  308. data = data[data.iloc[:, 0] <= last_time]
  309. data.reset_index(drop=True, inplace=True)
  310. # 计算最小值
  311. min_time = data.iloc[:, 0].min()
  312. data.iloc[:, 0] -= min_time
  313. # 分为两组数据
  314. data_1 = data.iloc[:, [0, 1]]
  315. data_2 = data.iloc[:, [0, 2]]
  316. # 分别命名列
  317. data_1.columns = ['time', 'distance']
  318. data_2.columns = ['time', 'distance']
  319. return data_1, data_2
  320. def tower_filter(data_group: pd.DataFrame, noise_threshold: float):
  321. """
  322. 对轮毂中心数据进行降噪,和前项填充
  323. :param data_group: process_data计算完成后轮毂中心的数据。
  324. :param noise_threshold: 去掉占比小于noise_threshold的数据。
  325. :return: filtered_data:降噪后的数据
  326. """
  327. print('正在进行数据清洗......')
  328. time.sleep(1)
  329. # 计算distance的分布
  330. distance_counts = data_group['distance'].value_counts(normalize=True)
  331. noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
  332. noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
  333. data_group.loc[noise_indices, 'distance'] = np.nan
  334. # 选择频率最大的5个值
  335. top_5_distances = distance_counts.head(5).index
  336. mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
  337. data_group.loc[(data_group['distance'] < mean_values-20) | (
  338. data_group['distance'] > mean_values*1.1), 'distance'] = np.nan
  339. nan_count = data_group['distance'].isna().sum()
  340. all_count = data_group.shape[0]
  341. print(f"中值是:{mean_values},替换为NaN的distance值的数量是: {nan_count}, 总数量是: {all_count},"
  342. f"占比: {nan_count / all_count * 100:.2f}%")
  343. # 前向填充
  344. data_group['distance'] = data_group['distance'].fillna(method='ffill')
  345. filtered_data = data_group
  346. return filtered_data
  347. def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distance: float):
  348. """
  349. 对数据进行降噪,和前项填充;计算数据的周期节点,叶片前缘突变点、后缘突变点
  350. :param data_group: process_data计算完成后的数据。
  351. :param noise_threshold: 去掉占比小于noise_threshold的数据。
  352. :param min_distance: 区分叶片和塔筒的距离差值。
  353. :return: start_points:周期开始点, end_points:周期结束点, filtered_data:降噪后的数据
  354. """
  355. print('正在计算周期节点......')
  356. time.sleep(1)
  357. # 计算distance的分布
  358. distance_counts = data_group['distance'].value_counts(normalize=True)
  359. noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
  360. noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
  361. data_group.loc[noise_indices, 'distance'] = np.nan
  362. # 选择频率最大的5个值
  363. top_5_distances = distance_counts.head(5).index
  364. mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
  365. data_group.loc[(data_group['distance'] < mean_values-20) | (
  366. data_group['distance'] > mean_values*1.1), 'distance'] = np.nan
  367. nan_count = data_group['distance'].isna().sum()
  368. all_count = data_group.shape[0]
  369. print(f"中值是:{mean_values},替换为NaN的distance值的数量是: {nan_count}, 总数量是: {all_count},"
  370. f"占比: {nan_count / all_count * 100:.2f}%")
  371. # 前向填充
  372. data_group['distance'] = data_group['distance'].fillna(method='ffill')
  373. filtered_data = data_group
  374. # 计算相邻两行distance的差值
  375. filtered_data['distance_diff'] = filtered_data['distance'].diff()
  376. large_diff_indices = filtered_data[filtered_data['distance_diff'] > min_distance].index
  377. small_diff_indices = filtered_data[filtered_data['distance_diff'] < -min_distance].index
  378. filtered_data = filtered_data.drop(columns=['distance_diff'])
  379. start_points = pd.DataFrame()
  380. end_points = pd.DataFrame()
  381. # 遍历所有差值大于的行
  382. for idx in large_diff_indices:
  383. # 获取当前行的 distance 值
  384. current_distance = filtered_data.loc[idx, 'distance']
  385. next_rows_large = filtered_data.loc[idx - 1000: idx - 1]
  386. # 检查是否任意 distance 的值小于 current_distance - 2
  387. if next_rows_large['distance'].le(current_distance - min_distance).all():
  388. # 如果都小于,则将当前行和下一行添加到 special_points 中
  389. end_points = pd.concat([end_points, filtered_data.loc[[idx - 1]]])
  390. for idx in small_diff_indices:
  391. # 获取当前行的 distance 值
  392. current_distance = filtered_data.loc[idx - 1, 'distance']
  393. next_rows_small = filtered_data.iloc[idx: idx + 1000]
  394. # 检查是否任意 distance 的值小于 current_distance - 2
  395. if next_rows_small['distance'].le(current_distance - min_distance).all():
  396. # 如果都小于,则将当前行和下一行添加到 special_points 中
  397. start_points = pd.concat([start_points, filtered_data.loc[[idx]]])
  398. if end_points.iloc[0, 0] < start_points.iloc[0, 0]:
  399. end_points = end_points.drop(end_points.index[0])
  400. if end_points.iloc[-1, 0] < start_points.iloc[-1, 0]:
  401. start_points = start_points.drop(start_points.index[-1])
  402. else:
  403. pass
  404. return start_points, end_points, filtered_data
  405. def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame) \
  406. -> Tuple[List[pd.DataFrame], List[pd.DataFrame], List[pd.DataFrame], int]:
  407. """
  408. 提取每个叶片的数据并归一化,输出散点图和拟合图
  409. :param data_group: cycle_calculate计算完成后的数据。
  410. :param start_points: 所有每个周期开始点,叶片前缘突变点。
  411. :param end_points: 叶片后缘突变点。
  412. :return: turbines_processed: 每个叶片的拟合数据,
  413. turbines_scattered: 每个叶片的散点数据,
  414. border_rows: 每个叶片的2个边缘数据,
  415. normalize_cycle: 周期长度
  416. """
  417. print('正在进行各周期归一化......')
  418. time.sleep(1)
  419. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  420. # 检查排序后的数据从start开始,end结束
  421. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  422. combined_df_sorted = combined_df_sorted.iloc[1:]
  423. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  424. combined_df_sorted = combined_df_sorted.iloc[:-1]
  425. combined_df_sorted.reset_index(drop=True, inplace=True)
  426. # 将 start_points 中的时间点转换为列表
  427. start_times = combined_df_sorted['time'].tolist()
  428. print('本次测量风机完整旋转圈数:'+ str(len(start_times) / 2))
  429. time.sleep(1)
  430. normalize_cycle = start_times[1] - start_times[0]
  431. full_cycle = int((start_times[2] - start_times[0]) * 3)
  432. turbines = [pd.DataFrame() for _ in range(3)]
  433. # 遍历所有起始时间点
  434. for i in range(0, len(start_times), 2):
  435. # 获取当前起始和结束时间点
  436. start_time = start_times[i]
  437. end_time = start_times[i + 1]
  438. # 根据当前起始时间点和结束时间点对数据进行分段
  439. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  440. if segment is None:
  441. pass
  442. else:
  443. # 周期归一化
  444. ratio = (end_time - start_time) / normalize_cycle
  445. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  446. # segment.loc[:, 'distance'] = ff.butter_lowpass_filter(segment['distance'], cutoff_low, fs)
  447. # 将结果添加到相应的 turbine 数据框中
  448. turbines[i % 3] = pd.concat([turbines[i % 3], segment])
  449. # 数据分组清洗、求平均
  450. turbines_processed = []
  451. turbines_scattered = []
  452. sd_time = [-1, -1]
  453. time_list = list(range(0, normalize_cycle, 1000))
  454. # time_list = [(i + 1) * normalize_cycle / fs * 100 for i in range(fs * 100)] # 生成时间序列
  455. for turbine in turbines:
  456. # 按时间排序
  457. turbine_sorted = turbine.sort_values(by='time').reset_index(drop=True)
  458. # 找到time列的第一个值
  459. first_time = turbine_sorted['time'].iloc[0]
  460. # 分组,时间列每1000为一组(每40个时间点一组)
  461. bins = list(range(int(first_time), int(turbine_sorted['time'].max()), 1000))
  462. # 原始代码
  463. # bins = list(range(int(first_time), int(turbine_sorted['time'].max()) + len(start_times), int(fs / 50)))
  464. grouped = turbine_sorted.groupby(pd.cut(turbine_sorted['time'], bins=bins, right=False))
  465. # 初始化一个空的 DataFrame 用于存储处理后的数据
  466. processed_df = pd.DataFrame()
  467. scattered_df = pd.DataFrame()
  468. mean_points = []
  469. diff_points = []
  470. # 对每个组进行处理
  471. for _, group in grouped:
  472. # 去除 distance 最大和最小的前5%
  473. quantile_5 = group['distance'].quantile(0.05)
  474. quantile_95 = group['distance'].quantile(0.95)
  475. filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
  476. # 计算均值
  477. mean_point = filtered_group['distance'].mean()
  478. mean_points.append(mean_point)
  479. # 遍历 mean_points 列表,计算每个元素与其下一个元素的差值
  480. for i in range(len(mean_points) - 1):
  481. diff = abs(mean_points[i + 1] - mean_points[i])
  482. diff_points.append(diff)
  483. start_index = int(len(diff_points) * 0.05)
  484. end_index = int(len(diff_points) * 0.95)
  485. subset1 = diff_points[start_index:end_index]
  486. sdr_diff = np.max(subset1) * 1.1
  487. # 找到第一个和最后一个小于 sdr_diff 的序号
  488. first_index = np.where(diff_points < sdr_diff)[0][0]
  489. last_index = np.where(diff_points < sdr_diff)[0][-1]
  490. '''
  491. # 创建一个总图中有3个分图的形式
  492. fig, axs = plt.subplots(1, 1, figsize=(15, 9))
  493. plt.subplots_adjust(hspace=2)
  494. # 绘制 diff_points 的折线图
  495. axs.plot(diff_points, label='Diff Points', color='red', marker='x', markersize=2)
  496. axs.axhline(y=sdr_diff, color='red', linestyle='--')
  497. axs.legend()
  498. axs.set_title('Diff Points')
  499. axs.set_xlabel('Index')
  500. axs.set_ylabel('Value')
  501. # 显示图形
  502. plt.tight_layout()
  503. plt.show()
  504. '''
  505. for index, (bin, group) in enumerate(grouped):
  506. # 去除 distance 最大和最小的前5%
  507. quantile_5 = group['distance'].quantile(0.05)
  508. quantile_95 = group['distance'].quantile(0.95)
  509. filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
  510. if first_index <= index < last_index: # 如果斜率小于,则认为该组数据不是突变点
  511. # 计算中点
  512. mid_point = filtered_group.mean()
  513. # 将中点转换为 DataFrame 并添加到处理后的 DataFrame 中
  514. mid_point_df = pd.DataFrame([mid_point])
  515. mid_point_df.iloc[0, 0] = time_list[index]
  516. processed_df = pd.concat([processed_df, mid_point_df], ignore_index=True)
  517. scattered_df = pd.concat([scattered_df, filtered_group], ignore_index=True)
  518. else: pass
  519. # 找到time列的最小值和最大值
  520. min_time = processed_df['time'].min()
  521. max_time = processed_df['time'].max()
  522. if sd_time == [-1, -1]:
  523. sd_time = [min_time, max_time]
  524. elif sd_time[0] < min_time:
  525. sd_time[0] = min_time
  526. elif sd_time[1] > max_time:
  527. sd_time[1] = max_time
  528. # 将处理后的 DataFrame 添加到列表中
  529. turbines_processed.append(processed_df)
  530. turbines_scattered.append(scattered_df)
  531. border_rows = []
  532. for i, turbine in enumerate(turbines_processed):
  533. # 找到离 sd_time[0] 最近的行的索引
  534. closest_index_0 = (turbine['time'] - sd_time[0]).abs().idxmin()
  535. turbine.at[closest_index_0, 'time'] = sd_time[0]
  536. sd_time_row_0 = turbine.loc[closest_index_0]
  537. # 找到离 sd_time[1] 最近的行的索引
  538. closest_index_1 = (turbine['time'] - sd_time[1]).abs().idxmin()
  539. turbine.at[closest_index_1, 'time'] = sd_time[1]
  540. sd_time_row_1 = turbine.loc[closest_index_1]
  541. # 切片 turbine,从 closest_index_0 到 closest_index_1
  542. turbines_processed[i] = turbine.iloc[closest_index_0:closest_index_1 + 1].reset_index(drop=True)
  543. sd_time_rows_turbine = pd.concat([pd.DataFrame([sd_time_row_0]), pd.DataFrame([sd_time_row_1])]
  544. , ignore_index=True)
  545. border_rows.append(sd_time_rows_turbine)
  546. time.sleep(1)
  547. return turbines_processed, turbines_scattered, border_rows, full_cycle
  548. def blade_shape(turbines_processed: List[pd.DataFrame]):
  549. """
  550. 计算叶片平均形状、叶片形状偏差。
  551. :param turbines_processed:叶片拟合曲线数据,来自data_normalize
  552. :return: 叶片平均形状、叶片形状偏差
  553. """
  554. print('正在进行叶片外形偏差计算......')
  555. row_counts = [df.shape[0] for df in turbines_processed]
  556. num_rows = min(row_counts)
  557. # 创建一个新的data.frame用于保存结果
  558. turbine_avg = pd.DataFrame(index=range(num_rows), columns=['time', 'distance'])
  559. turbine_diff = [pd.DataFrame(index=range(num_rows), columns=['time', 'distance']) for _ in turbines_processed]
  560. # 遍历每一行
  561. for i in range(num_rows):
  562. distances = [df.loc[i, 'distance'] for df in turbines_processed] # 获取每个data.frame的distance列的值
  563. avg_distance = sum(distances) / len(distances) # 计算distance列的平均值
  564. time_value = turbines_processed[0].loc[i, 'time'] # 获取time列的值
  565. turbine_avg.loc[i, 'time'] = time_value
  566. turbine_avg.loc[i, 'distance'] = avg_distance
  567. for j in range(len(distances)):
  568. distances[j] = distances[j] - avg_distance
  569. turbine_diff[j].loc[i, 'time'] = time_value
  570. turbine_diff[j].loc[i, 'distance'] = distances[j]
  571. time.sleep(10)
  572. return turbine_avg, turbine_diff
  573. def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
  574. """
  575. 将叶尖测量数据和叶根、轮毂中心的测量原点归一化。
  576. :param tip_border_rows: 3个叶尖边缘数据
  577. :param tip_angle: 叶尖测量俯仰角
  578. :return: 归一化后叶尖数据,叶尖俯仰角
  579. """
  580. tip_angle1 = np.deg2rad(tip_angle)
  581. tip_angle_list = []
  582. for turbine in tip_border_rows:
  583. tip_angle_cal = np.arctan((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
  584. np.cos(tip_angle1) * turbine['distance'])
  585. turbine['distance'] = (turbine['distance']**2 + 0.0057881664 -
  586. 0.15216*turbine['distance']*np.sin(tip_angle1)) ** 0.5
  587. tip_angle_list.append(tip_angle_cal)
  588. tip_angle_new = float(np.mean(tip_angle_list))
  589. tip_angle_new1 = np.rad2deg(tip_angle_new)
  590. print('叶尖俯仰角: ' + str(tip_angle_new1))
  591. return tip_border_rows, tip_angle
  592. def radius_cal(border_rows, meas_angle, cen_dist, cen_angle, angle_main, angle_rotate):
  593. """
  594. 计算测量点处的旋转半径。
  595. :param border_rows: 三个叶片的边界
  596. :param meas_angle: 回波俯仰角
  597. :param cen_dist: 轮毂中心距离
  598. :param cen_angle: 轮毂中心俯仰角
  599. :param angle_main: 主轴倾角
  600. :param angle_rotate: 锥角
  601. :return: 旋转半径
  602. """
  603. aero_dist = (pd.concat([df['distance'] for df in border_rows]).mean())
  604. cen_x = np.cos(np.deg2rad(cen_angle)) * cen_dist
  605. cen_y = np.sin(np.deg2rad(cen_angle)) * cen_dist
  606. aero_x = np.cos(np.deg2rad(meas_angle)) * aero_dist
  607. aero_y = np.sin(np.deg2rad(meas_angle)) * aero_dist
  608. theta_4 = np.tan(np.pi - np.deg2rad(angle_main))
  609. theta_5 = np.tan(np.pi/2 - np.deg2rad(angle_main) + np.deg2rad(angle_rotate))
  610. if np.abs(np.deg2rad(angle_main) - np.deg2rad(angle_rotate)) < 0.0001:
  611. radius = np.abs((cen_y - aero_y) - theta_4 * (cen_x - aero_x))
  612. else:
  613. radius = (np.abs((theta_4 * (cen_x - aero_x) - (cen_y - aero_y))/(theta_4 - theta_5))
  614. * (1 + theta_5 ** 2) ** 0.5)
  615. print('半径:' + str(radius))
  616. return radius
  617. def blade_angle_aero_dist(border_rows: List[pd.DataFrame], radius: float, full_cycle: int,
  618. tower_dist: float, v_angle: float):
  619. """
  620. 计算叶片相对桨距角和叶片净空距离。
  621. :param border_rows: 三个叶片的边界
  622. :param radius: 旋转半径
  623. :param full_cycle: 全周期
  624. :param tower_dist: 塔筒距离
  625. :param v_angle: 俯仰角度
  626. :return: 绝对桨距角,净空距离,叶片线速度
  627. """
  628. print('正在进行相对桨距角和叶片净空距离计算......')
  629. v_speed = 2 * np.pi * radius / full_cycle # 叶片线速度m/(1计时器单位)
  630. pitch_angle_list = []
  631. aero_dist_list = []
  632. cen_blade = []
  633. for turbine in border_rows:
  634. diff_time = turbine.iloc[1, 0] - turbine.iloc[0, 0]
  635. diff_len = turbine.iloc[1, 1] - turbine.iloc[0, 1]
  636. mean_col2 = (turbine.iloc[1, 1] + turbine.iloc[0, 1]) / 2
  637. aero_dist = abs(mean_col2 - tower_dist) * np.cos(np.deg2rad(v_angle))
  638. pitch_angle = np.degrees(np.arctan(diff_len / (diff_time * v_speed)))
  639. print('单个叶片绝对桨距角' + str(pitch_angle))
  640. pitch_angle_list.append(pitch_angle)
  641. aero_dist_list.append(aero_dist)
  642. cen_blade.append(mean_col2)
  643. pitch_mean = np.mean(pitch_angle_list)
  644. pitch_angle_list = [angle - pitch_mean for angle in pitch_angle_list]
  645. pitch_angle_list.append(max(pitch_angle_list) - min(pitch_angle_list))
  646. aero_dist_list.append(np.mean(aero_dist_list))
  647. pitch_angle_list = [round(num, 2) for num in pitch_angle_list]
  648. aero_dist_list = [round(num, 2) for num in aero_dist_list]
  649. print('叶片相对角度偏差:' + '\n' + '叶片1:' + str(pitch_angle_list[0]) + '\n'
  650. + '叶片2:' + str(pitch_angle_list[1]) + '\n' + '叶片3:' + str(pitch_angle_list[2])
  651. + '\n' + '相对偏差范围:' + str(pitch_angle_list[3]))
  652. print('叶片净空距离:' + '\n' + '叶片1:' + str(aero_dist_list[0]) + '\n'
  653. + '叶片2:' + str(aero_dist_list[1]) + '\n' + '叶片3:' + str(aero_dist_list[2])
  654. + '\n' + '平均净空距离:' + str(aero_dist_list[3]))
  655. return pitch_angle_list, aero_dist_list, v_speed, cen_blade
  656. def plot_data(data, plot_type: str, data_name: str):
  657. """
  658. 绘制数据图表并保存为文件。
  659. :param data: 数据列表,每个元素是一个 DataFrame。
  660. :param plot_type: 图表类型,'line' 或 'scatter'。
  661. :param data_name: 数据名称,用于生成文件名。
  662. """
  663. print('正在画图......')
  664. time.sleep(1)
  665. save_name = fr"{data_name}_{plot_type}.png" # 生成文件名
  666. plt.figure(figsize=(300, 150))
  667. if plot_type == 'line':
  668. for df, color in zip(data, ['blue', 'green', 'red']):
  669. sns.lineplot(data=df, x='time', y='distance', color=color)
  670. elif plot_type == 'scatter':
  671. for df, (size, color) in zip(data, [(50, 'blue'), (25, 'green'), (10, 'red')]):
  672. sns.scatterplot(data=df, x='time', y='distance', s=size, color=color)
  673. else:
  674. raise ValueError("plot_type must be either 'line' or 'scatter'")
  675. axy = plt.gca() # 获取当前坐标轴对象
  676. plt.grid(which='both', linewidth=2) # 设置网格线宽度为2
  677. axy.xaxis.set_major_locator(MaxNLocator(nbins=200)) # 设置x轴主刻度的最大数量为10
  678. axy.yaxis.set_major_locator(MaxNLocator(nbins=100)) # 设置y轴主刻度的最大数量为10
  679. plt.xlabel('时间', fontsize=100, fontweight='bold') # 添加x轴标签
  680. plt.ylabel('距离(m)', fontsize=100, fontweight='bold') # 添加y轴标签
  681. axy.tick_params(axis='x', labelsize=10, labelcolor='black', width=2) # 设置x轴刻度标签
  682. axy.tick_params(axis='y', labelsize=60, labelcolor='black', width=10) # 设置y轴刻度标签
  683. plt.savefig(save_name)
  684. plt.close()
  685. abs_path = os.path.abspath(save_name)
  686. print(f" {save_name} 已完成")
  687. return abs_path
  688. def find_param(path: str):
  689. """
  690. 根据文件路径获取参数
  691. """
  692. path = path.replace('\\', '/')
  693. last_slash_index = path.rfind('/')
  694. result = path[last_slash_index + 1:]
  695. underscore_indices = []
  696. start = 0
  697. while True:
  698. index = result.find('_', start)
  699. if index == -1:
  700. break
  701. underscore_indices.append(index)
  702. start = index + 1
  703. wind_name = result[: underscore_indices[0]]
  704. turbine_code = result[underscore_indices[0] + 1: underscore_indices[1]]
  705. time_code = result[underscore_indices[1] + 1: underscore_indices[2]]
  706. sampling_fq = int(result[underscore_indices[2] + 1: underscore_indices[3]])
  707. tunnel_1 = float(result[underscore_indices[3] + 1: underscore_indices[4]])
  708. tunnel_2 = float(result[underscore_indices[4] + 1: -4])
  709. dt = datetime.strptime(time_code, "%Y%m%d%H%M%S")
  710. standard_time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
  711. return wind_name, turbine_code, standard_time_str, sampling_fq, tunnel_1, tunnel_2
  712. def blade_dist_distribute_cal(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame,
  713. tower_dist: float, v_angle: float, blade_cen_dist: list):
  714. """
  715. 计算每个叶片每个周期的转速和净空距离
  716. :param data_group: cycle_calculate计算完成后的数据。
  717. :param start_points: 所有每个周期开始点,叶片前缘突变点。
  718. :param end_points: 叶片后缘突变点。
  719. :param tower_dist: 塔筒距离。
  720. :param v_angle: 测量俯仰角度。
  721. :param blade_cen_dist: 叶片内部距离。
  722. """
  723. print('正在进行各周期净空距离计算......')
  724. time.sleep(1)
  725. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  726. # 检查排序后的数据从start开始,end结束
  727. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  728. combined_df_sorted = combined_df_sorted.iloc[1:]
  729. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  730. combined_df_sorted = combined_df_sorted.iloc[:-1]
  731. combined_df_sorted.reset_index(drop=True, inplace=True)
  732. # 将 start_points 中的时间点转换为列表
  733. start_times = combined_df_sorted['time'].tolist()
  734. normalize_cycle = start_times[1] - start_times[0]
  735. tower_clearance = [pd.DataFrame() for _ in range(3)]
  736. # 遍历所有起始时间点
  737. for i in range(0, len(start_times), 2):
  738. # 获取当前起始和结束时间点
  739. start_time = start_times[i]
  740. end_time = start_times[i + 1]
  741. # 根据当前起始时间点和结束时间点对数据进行分段
  742. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  743. min_distance = segment['distance'].min()
  744. clearance = np.abs(tower_dist - min_distance - blade_cen_dist[i % 3]) * np.cos(np.deg2rad(v_angle))
  745. r_speed = (start_times[i + 2] - start_times[i]) * 3 / 5000000
  746. # 周期归一化
  747. ratio = (end_time - start_time) / normalize_cycle
  748. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  749. new_df = pd.DataFrame({
  750. 'clearance': [clearance],
  751. 'r_speed': [r_speed]
  752. })
  753. # 将结果添加到相应的 turbine 数据框中
  754. tower_clearance[i % 3] = pd.concat([tower_clearance[i % 3], new_df])
  755. return tower_clearance
  756. # measure_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/pl_9_20250514083317_100_24.8_30.06.csv"
  757. # locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/pl_9_20250514083134_100_33.53_33.53.csv"
  758. # measure_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/p'l_9p_20250514084957_100_27.48_29.47.csv"
  759. # locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/p'l_9p_20250514084814_100_27.75_32.93.csv"
  760. locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/pl_63_20250513160502_100_41.38_41.38.csv"
  761. measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/pl_63_20250513160655_100_28.10_35.43.csv"
  762. # locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/平陆_4_20250514073658_100_16.00_20.56.csv"
  763. # measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/平陆_4_20250514073248_100_12.72_17.3.csv"
  764. # locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250416/__20250416134815_50_14.55_17.00.csv"
  765. # measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250416/__20250416135017_50_11.85_14.31.csv"
  766. start_t = time.time() # 记录开始时间
  767. data_path = [locate_path, measure_path, 5, 3.5]
  768. list_1 = data_analyse(data_path)
  769. # print(list_1)
  770. print(f"耗时: {time.time() - start_t:.2f} 秒")