data_clean.py 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. import os
  2. import json
  3. import pandas as pd
  4. import numpy as np
  5. import matplotlib.pyplot as plt
  6. from typing import Tuple, List
  7. import warnings
  8. import time
  9. import sys
  10. import frequency_filter as ff
  11. from datetime import datetime
  12. warnings.filterwarnings("ignore", category=FutureWarning) # 忽略特定警告
  13. plt.rcParams['font.sans-serif'] = ['SimHei'] # 使用黑体
  14. plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
  15. def result_main():
  16. """
  17. 创建data目录,返回历史分析数据存放的文件路径
  18. """
  19. # 获取当前程序的绝对路径
  20. python_interpreter_path = sys.executable
  21. project_directory = os.path.dirname(python_interpreter_path)
  22. data_folder = os.path.join(project_directory, 'data')
  23. # 检查data文件夹是否存在,如果不存在则创建
  24. if not os.path.exists(data_folder):
  25. os.makedirs(data_folder)
  26. # CSV文件路径
  27. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  28. # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件
  29. if not os.path.exists(csv_file_path):
  30. pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
  31. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  32. '叶片1净空值', '叶片2净空值', '叶片3净空值',
  33. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  34. '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
  35. return csv_file_path
  36. def delete_data(names):
  37. """
  38. 删除历史分析数据
  39. :param names: 删除条件
  40. :return: csv文件路径
  41. """
  42. # 获取当前程序的绝对路径
  43. python_interpreter_path = sys.executable
  44. project_directory = os.path.dirname(python_interpreter_path)
  45. data_folder = os.path.join(project_directory, 'data')
  46. # CSV文件路径
  47. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  48. df = pd.read_csv(csv_file_path)
  49. for name in names:
  50. # 检查条件
  51. condition = ((df['时间'].str.contains(name[0])) &
  52. (df['场站'].str.contains(name[1])) &
  53. (df['风机编号'].str.contains(name[2])))
  54. # 删除满足条件的行
  55. df = df[~condition]
  56. # 如果需要,可以将修改后的 DataFrame 保存回 CSV 文件
  57. df.to_csv(csv_file_path, index=False)
  58. return csv_file_path
  59. def history_data(name):
  60. """
  61. 读取历史分析数据
  62. :param name: 接口返回列表
  63. :return:
  64. """
  65. wind_name, turbine_code, time_code = name[1], name[2], name[0]
  66. # 获取当前程序的绝对路径
  67. python_interpreter_path = sys.executable
  68. project_directory = os.path.dirname(python_interpreter_path)
  69. data_folder = os.path.join(project_directory, 'data')
  70. time_code_cleaned = time_code.replace("-", "").replace(":", "").replace(" ", "")
  71. json_filename = f"{wind_name}_{turbine_code}_{time_code_cleaned}.json"
  72. json_file_path = os.path.join(data_folder, json_filename)
  73. if not os.path.exists(json_file_path):
  74. raise ValueError("文件不存在")
  75. with open(json_file_path, 'r') as f:
  76. data = json.load(f)
  77. return data
  78. def data_analyse(path: List[str]):
  79. """
  80. 创建data目录,把分析数据保存到历史记录中,同时返回全量分析数据
  81. """
  82. locate_file = path[0]
  83. measure_file = path[1]
  84. noise_reduction = 0.000001 # 如果一个距离值的所有样本量小于总样本量的noise_reduction,则被去掉
  85. min_difference = 1.5 # 如果相邻2个点的距离差大于min_difference,则被注意是否是周期节点
  86. angle_cone = float(path[2]) # 锥角
  87. axial_inclination = float(path[3]) # 轴向倾角
  88. return_list = []
  89. wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen = find_param(locate_file)
  90. wind_name_1, turbine_code_1, time_code, sampling_fq_1, angle_tip, angle_root = find_param(measure_file)
  91. sampling_fq_1 = sampling_fq_1 * 1000
  92. sampling_fq = sampling_fq * 1000
  93. data_nan, data_cen = process_data(locate_file)
  94. data_tip, data_root = process_data(measure_file)
  95. start_tip, end_tip, filtered_data_tip = cycle_calculate(data_tip, noise_reduction, min_difference)
  96. start_root, end_root, filtered_data_root = cycle_calculate(data_root, noise_reduction, min_difference)
  97. filtered_data_cen = tower_filter(data_cen, noise_reduction)
  98. dist_cen = np.mean(filtered_data_cen.iloc[:, 1].tolist())
  99. if end_tip.iloc[0, 0] < start_root.iloc[0, 0]:
  100. start_tip = start_tip.drop(start_tip.index[0])
  101. end_tip = end_tip.drop(end_tip.index[0])
  102. if start_root.iloc[0, 0] < start_tip.iloc[0, 0] < end_tip.iloc[0, 0] < end_root.iloc[0, 0]:
  103. pass
  104. else:
  105. raise ValueError("The elements are not in the expected order.")
  106. tower_dist_tip = ff.tower_cal(filtered_data_tip, start_tip, end_tip, sampling_fq_1)
  107. tower_dist_root = ff.tower_cal(filtered_data_root, start_root, end_root, sampling_fq_1)
  108. lowpass_data, fft_x, fft_y, tower_freq, tower_max = ff.process_fft(filtered_data_cen, sampling_fq)
  109. result_line_tip, result_scatter_tip, border_rows_tip, cycle_len_tip, min_tip \
  110. = data_normalize(filtered_data_tip, start_tip, end_tip)
  111. result_line_root, result_scatter_root, border_rows_root, cycle_len_root, min_root \
  112. = data_normalize(filtered_data_root, start_root, end_root)
  113. result_avg_tip, result_diff_tip = blade_shape(result_line_tip)
  114. result_avg_root, result_diff_root = blade_shape(result_line_root)
  115. border_rows_tip_new, angle_tip_new = coordinate_normalize(border_rows_tip, angle_tip)
  116. tip_r = radius_cal(border_rows_tip_new, angle_tip_new, dist_cen, angle_cen, axial_inclination, angle_cone)
  117. root_r = radius_cal(border_rows_root, angle_root, dist_cen, angle_cen, axial_inclination, angle_cone)
  118. pitch_angle_tip, aero_dist_tip, v_speed_tip, cen_blade_tip = (
  119. blade_angle_aero_dist(border_rows_tip, tip_r, cycle_len_tip, tower_dist_tip, angle_tip_new))
  120. pitch_angle_root, aero_dist_root, v_speed_root, cen_blade_root = (
  121. blade_angle_aero_dist(border_rows_root, root_r, cycle_len_root, tower_dist_root, angle_root))
  122. # 将列表转换为 numpy 数组
  123. cen_blade_tip_array = np.array(cen_blade_tip)
  124. min_tip_array = np.array(min_tip)
  125. abs_diff = np.abs(cen_blade_tip_array - min_tip_array) # 计算差值的绝对值
  126. blade_dist_tip = abs_diff * np.cos(np.deg2rad(angle_tip_new))
  127. blade_dist_tip.tolist() # 如果需要将结果转换回列表
  128. dist_distribute = blade_dist_distribute_cal(filtered_data_tip, start_tip, end_tip,
  129. tower_dist_tip, angle_tip_new, blade_dist_tip)
  130. dist_distribute = [df.round(5) for df in dist_distribute]
  131. # 获取每个 DataFrame 第二列的最小值和最大值,以及它们对应的第一列的值,并分别保存在列表中
  132. min_values = []
  133. min_keys = []
  134. max_values = []
  135. max_keys = []
  136. mean_values = []
  137. for df in dist_distribute:
  138. second_col_min = df[df.columns[1]].min()
  139. second_col_max = df[df.columns[1]].max()
  140. min_row = df[df[df.columns[1]] == second_col_min]
  141. max_row = df[df[df.columns[1]] == second_col_max]
  142. min_values.append(second_col_min)
  143. min_keys.append(min_row.iloc[0][df.columns[0]])
  144. max_values.append(second_col_max)
  145. max_keys.append(max_row.iloc[0][df.columns[0]])
  146. for i in range(3):
  147. mean_values.append(round((max_values[i] + min_values[i]) / 2, 2))
  148. for df in result_line_tip:
  149. first_column = df.iloc[:, 0]
  150. sec_column = df.iloc[:, 1]
  151. df.iloc[:, 0] = first_column * v_speed_tip
  152. df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_tip_new))
  153. for df in result_line_root:
  154. first_column = df.iloc[:, 0]
  155. sec_column = df.iloc[:, 1]
  156. df.iloc[:, 0] = first_column * v_speed_root
  157. df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_root))
  158. avg_tip = result_avg_tip.iloc[:, 0]
  159. result_avg_tip.iloc[:, 0] = avg_tip * v_speed_tip
  160. avg_root = result_avg_root.iloc[:, 0]
  161. result_avg_root.iloc[:, 0] = avg_root * v_speed_root
  162. twist_1 = round(np.abs(pitch_angle_root[0] - pitch_angle_tip[0]), 2)
  163. twist_2 = round(np.abs(pitch_angle_root[1] - pitch_angle_tip[1]), 2)
  164. twist_3 = round(np.abs(pitch_angle_root[2] - pitch_angle_tip[2]), 2)
  165. twist_avg = round((twist_1 + twist_2 + twist_3) / 3, 2)
  166. sampling_num = int(0.01 * sampling_fq_1)
  167. data_tip.iloc[:, 0] = data_tip.iloc[:, 0] / 5000000
  168. data_root.iloc[:, 0] = data_root.iloc[:, 0] / 5000000
  169. lowpass_data.iloc[:, 0] = lowpass_data.iloc[:, 0] / 5000000
  170. return_list.append(time_code)
  171. return_list.append(wind_name)
  172. return_list.append(turbine_code)
  173. return_list.append(sampling_fq_1)
  174. return_list.append(pitch_angle_root[0])
  175. return_list.append(pitch_angle_root[1])
  176. return_list.append(pitch_angle_root[2])
  177. return_list.append(pitch_angle_root[3])
  178. return_list.append(mean_values[0])
  179. return_list.append(mean_values[1])
  180. return_list.append(mean_values[2])
  181. return_list.append(twist_1)
  182. return_list.append(twist_2)
  183. return_list.append(twist_3)
  184. return_list.append(twist_avg)
  185. return_list.append(tower_max)
  186. return_list.append(tower_freq)
  187. # 将return_list转换为DataFrame并追加到CSV文件
  188. df_new_row = pd.DataFrame([return_list],
  189. columns=['时间', '场站', '风机编号', '采样频率',
  190. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  191. '叶片1净空值', '叶片2净空值', '叶片3净空值',
  192. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  193. '振动幅值', '振动主频'])
  194. json_output = {
  195. 'original_plot': {
  196. 'blade_tip': {
  197. 'xdata': data_tip.iloc[:, 0].tolist()[::sampling_num],
  198. 'ydata': data_tip.iloc[:, 1].tolist()[::sampling_num]
  199. },
  200. 'blade_root': {
  201. 'xdata': data_root.iloc[:, 0].tolist()[::sampling_num],
  202. 'ydata': data_root.iloc[:, 1].tolist()[::sampling_num]
  203. }
  204. },
  205. 'fft_plot': {
  206. 'lowpass': {
  207. 'xdata': lowpass_data['time'].tolist()[::sampling_num],
  208. 'ydata': lowpass_data['distance_filtered'].tolist()[::sampling_num],
  209. 'xmax': max(lowpass_data['time'].tolist()),
  210. 'xmin': min(lowpass_data['time'].tolist()),
  211. 'ymax': max(lowpass_data['distance_filtered'].tolist()),
  212. 'ymin': min(lowpass_data['distance_filtered'].tolist())
  213. },
  214. 'fft': {
  215. 'xdata': fft_x,
  216. 'ydata': fft_y,
  217. 'xmax': max(fft_x),
  218. 'xmin': min(fft_x),
  219. 'ymax': max(fft_y),
  220. 'ymin': min(fft_y)
  221. }
  222. },
  223. 'blade_tip': {
  224. 'first_blade': {
  225. 'xdata': result_line_tip[0].iloc[:, 0].tolist(),
  226. 'ydata': result_line_tip[0].iloc[:, 1].tolist()
  227. },
  228. 'second_blade': {
  229. 'xdata': result_line_tip[1].iloc[:, 0].tolist(),
  230. 'ydata': result_line_tip[1].iloc[:, 1].tolist()
  231. },
  232. 'third_blade': {
  233. 'xdata': result_line_tip[2].iloc[:, 0].tolist(),
  234. 'ydata': result_line_tip[2].iloc[:, 1].tolist()
  235. },
  236. 'avg_blade': {
  237. 'xdata': result_avg_tip.iloc[:, 0].tolist(),
  238. 'ydata': result_avg_tip.iloc[:, 1].tolist()
  239. }
  240. },
  241. 'blade_root': {
  242. 'first_blade': {
  243. 'xdata': result_line_root[0].iloc[:, 0].tolist(),
  244. 'ydata': result_line_root[0].iloc[:, 1].tolist()
  245. },
  246. 'second_blade': {
  247. 'xdata': result_line_root[1].iloc[:, 0].tolist(),
  248. 'ydata': result_line_root[1].iloc[:, 1].tolist()
  249. },
  250. 'third_blade': {
  251. 'xdata': result_line_root[2].iloc[:, 0].tolist(),
  252. 'ydata': result_line_root[2].iloc[:, 1].tolist()
  253. },
  254. 'avg_blade': {
  255. 'xdata': result_avg_root.iloc[:, 0].tolist(),
  256. 'ydata': result_avg_root.iloc[:, 1].tolist()
  257. }
  258. },
  259. 'dist_distribution': {
  260. 'first_blade': {
  261. 'xdata': dist_distribute[0].iloc[:, 0].tolist(),
  262. 'ydata': dist_distribute[0].iloc[:, 1].tolist()
  263. },
  264. 'second_blade': {
  265. 'xdata': dist_distribute[1].iloc[:, 0].tolist(),
  266. 'ydata': dist_distribute[1].iloc[:, 1].tolist()
  267. },
  268. 'third_blade': {
  269. 'xdata': dist_distribute[2].iloc[:, 0].tolist(),
  270. 'ydata': dist_distribute[2].iloc[:, 1].tolist()
  271. }
  272. },
  273. 'analyse_table': {
  274. 'pitch_angle_diff': {
  275. 'blade_1': pitch_angle_root[0],
  276. 'blade_2': pitch_angle_root[1],
  277. 'blade_3': pitch_angle_root[2],
  278. 'blade_relate': pitch_angle_root[3]
  279. },
  280. 'aero_dist': {
  281. 'first_blade': {
  282. 'x_min': min_keys[0],
  283. 'y_min': min_values[0],
  284. 'x_max': max_keys[0],
  285. 'y_max': max_values[0],
  286. 'y_diff': np.abs(max_values[0] - min_values[0]),
  287. 'y_ava': mean_values[0]
  288. },
  289. 'second_blade': {
  290. 'x_min': min_keys[1],
  291. 'y_min': min_values[1],
  292. 'x_max': max_keys[1],
  293. 'y_max': max_values[1],
  294. 'y_diff': np.abs(max_values[1] - min_values[1]),
  295. 'y_ava': mean_values[1]
  296. },
  297. 'third_blade': {
  298. 'x_min': min_keys[2],
  299. 'y_min': min_values[2],
  300. 'x_max': max_keys[2],
  301. 'y_max': max_values[2],
  302. 'y_diff': np.abs(max_values[2] - min_values[2]),
  303. 'y_ava': mean_values[2]
  304. }
  305. },
  306. 'blade_twist': {
  307. 'blade_1': twist_1,
  308. 'blade_2': twist_2,
  309. 'blade_3': twist_3,
  310. 'blade_avg': twist_avg
  311. },
  312. 'tower_vibration': {
  313. 'max_vibration': tower_max,
  314. 'main_vibration_freq': tower_freq
  315. }
  316. }
  317. }
  318. # 获取当前程序的绝对路径
  319. python_interpreter_path = sys.executable
  320. project_directory = os.path.dirname(python_interpreter_path)
  321. data_folder = os.path.join(project_directory, 'data')
  322. # 检查data文件夹是否存在,如果不存在则创建
  323. if not os.path.exists(data_folder):
  324. os.makedirs(data_folder)
  325. # CSV文件路径
  326. csv_file_path = os.path.join(data_folder, 'history_data.csv')
  327. # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件
  328. if not os.path.exists(csv_file_path):
  329. pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
  330. '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
  331. '叶片1净空值', '叶片2净空值', '叶片3净空值',
  332. '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
  333. '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
  334. df_new_row.to_csv(csv_file_path, mode='a', header=False, index=False)
  335. time_code_cleaned = time_code.replace("-", "").replace(":", "").replace(" ", "")
  336. json_filename = f"{wind_name}_{turbine_code}_{time_code_cleaned}.json"
  337. json_file_path = os.path.join(data_folder, json_filename)
  338. with open(json_file_path, 'w') as json_file:
  339. json.dump(json_output, json_file, indent=4)
  340. return json_output
  341. def process_data(file_path):
  342. """
  343. 打开、解决时间重置、按时间清洗异常值、分列数据
  344. """
  345. # 读取第2、4、9列的数据
  346. data = pd.read_csv(file_path, usecols=[1, 3, 8], header=None, engine='c')
  347. data = data.head(int(len(data) * 0.95))
  348. # 找到第一列中最大值和最小值的位置
  349. max_value = data.iloc[:, 0].max()
  350. max_index = data.iloc[:, 0].idxmax()
  351. min_index = data.iloc[:, 0].idxmin()
  352. # 检查最小值的位置是否是最大值位置的下一个
  353. if min_index == max_index + 1:
  354. # 将最小值及其之后的所有值都加上最大值
  355. data.iloc[min_index:, 0] += max_value
  356. # 按时间列筛选清洗异常值
  357. last_time = data.iloc[-1, 0]
  358. first_time = data.iloc[0, 0]
  359. data = data[data.iloc[:, 0] >= first_time]
  360. data = data[data.iloc[:, 0] <= last_time]
  361. data.reset_index(drop=True, inplace=True)
  362. # 计算最小值
  363. min_time = data.iloc[:, 0].min()
  364. data.iloc[:, 0] -= min_time
  365. # 分为两组数据
  366. data_1 = data.iloc[:, [0, 1]]
  367. data_2 = data.iloc[:, [0, 2]]
  368. # 分别命名列
  369. data_1.columns = ['time', 'distance']
  370. data_2.columns = ['time', 'distance']
  371. return data_1, data_2
  372. def tower_filter(data_group: pd.DataFrame, noise_threshold: float):
  373. """
  374. 对轮毂中心数据进行降噪,和前项填充
  375. :param data_group: process_data计算完成后轮毂中心的数据。
  376. :param noise_threshold: 去掉占比小于noise_threshold的数据。
  377. :return: filtered_data:降噪后的数据
  378. """
  379. time.sleep(1)
  380. # 计算distance的分布
  381. distance_counts = data_group['distance'].value_counts(normalize=True)
  382. noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
  383. noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
  384. data_group.loc[noise_indices, 'distance'] = np.nan
  385. # 选择频率最大的5个值
  386. top_5_distances = distance_counts.head(5).index
  387. mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
  388. data_group.loc[(data_group['distance'] < mean_values - 20) | (
  389. data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
  390. # 前向填充
  391. data_group['distance'] = data_group['distance'].fillna(method='ffill')
  392. filtered_data = data_group
  393. return filtered_data
  394. def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distance: float):
  395. """
  396. 对数据进行降噪,和前项填充;计算数据的周期节点,叶片前缘突变点、后缘突变点
  397. :param data_group: process_data计算完成后的数据。
  398. :param noise_threshold: 去掉占比小于noise_threshold的数据。
  399. :param min_distance: 区分叶片和塔筒的距离差值。
  400. :return: start_points:周期开始点, end_points:周期结束点, filtered_data:降噪后的数据
  401. """
  402. time.sleep(1)
  403. # 计算distance的分布
  404. distance_counts = data_group['distance'].value_counts(normalize=True)
  405. noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
  406. noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
  407. data_group.loc[noise_indices, 'distance'] = np.nan
  408. # 选择频率最大的5个值
  409. top_5_distances = distance_counts.head(5).index
  410. mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
  411. data_group.loc[(data_group['distance'] < mean_values - 20) | (
  412. data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
  413. # 前向填充
  414. data_group['distance'] = data_group['distance'].fillna(method='ffill')
  415. filtered_data = data_group
  416. # 计算相邻两行distance的差值
  417. filtered_data['distance_diff'] = filtered_data['distance'].diff()
  418. large_diff_indices = filtered_data[filtered_data['distance_diff'] > min_distance].index
  419. small_diff_indices = filtered_data[filtered_data['distance_diff'] < -min_distance].index
  420. filtered_data = filtered_data.drop(columns=['distance_diff'])
  421. start_points = pd.DataFrame()
  422. end_points = pd.DataFrame()
  423. # 遍历所有差值大于的行
  424. for idx in large_diff_indices:
  425. # 获取当前行的 distance 值
  426. current_distance = filtered_data.loc[idx, 'distance']
  427. next_rows_large = filtered_data.loc[idx - 1000: idx - 1]
  428. # 检查是否任意 distance 的值小于 current_distance - 2
  429. if next_rows_large['distance'].le(current_distance - min_distance).all():
  430. # 如果都小于,则将当前行和下一行添加到 special_points 中
  431. end_points = pd.concat([end_points, filtered_data.loc[[idx - 1]]])
  432. for idx in small_diff_indices:
  433. # 获取当前行的 distance 值
  434. current_distance = filtered_data.loc[idx - 1, 'distance']
  435. next_rows_small = filtered_data.iloc[idx: idx + 1000]
  436. # 检查是否任意 distance 的值小于 current_distance - 2
  437. if next_rows_small['distance'].le(current_distance - min_distance).all():
  438. # 如果都小于,则将当前行和下一行添加到 special_points 中
  439. start_points = pd.concat([start_points, filtered_data.loc[[idx]]])
  440. if end_points.iloc[0, 0] < start_points.iloc[0, 0]:
  441. end_points = end_points.drop(end_points.index[0])
  442. if end_points.iloc[-1, 0] < start_points.iloc[-1, 0]:
  443. start_points = start_points.drop(start_points.index[-1])
  444. else:
  445. pass
  446. return start_points, end_points, filtered_data
  447. def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame) \
  448. -> Tuple[List[pd.DataFrame], List[pd.DataFrame], List[pd.DataFrame], int, list]:
  449. """
  450. 提取每个叶片的数据并归一化,输出散点图和拟合图
  451. :param data_group: cycle_calculate计算完成后的数据。
  452. :param start_points: 所有每个周期开始点,叶片前缘突变点。
  453. :param end_points: 叶片后缘突变点。
  454. :return: turbines_processed: 每个叶片的拟合数据,
  455. turbines_scattered: 每个叶片的散点数据,
  456. border_rows: 每个叶片的2个边缘数据,
  457. normalize_cycle: 周期长度
  458. """
  459. time.sleep(1)
  460. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  461. # 检查排序后的数据从start开始,end结束
  462. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  463. combined_df_sorted = combined_df_sorted.iloc[1:]
  464. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  465. combined_df_sorted = combined_df_sorted.iloc[:-1]
  466. combined_df_sorted.reset_index(drop=True, inplace=True)
  467. # 将 start_points 中的时间点转换为列表
  468. start_times = combined_df_sorted['time'].tolist()
  469. time.sleep(1)
  470. normalize_cycle = start_times[1] - start_times[0]
  471. full_cycle = int((start_times[2] - start_times[0]) * 3)
  472. turbines = [pd.DataFrame() for _ in range(3)]
  473. # 遍历所有起始时间点
  474. for i in range(0, len(start_times), 2):
  475. # 获取当前起始和结束时间点
  476. start_time = start_times[i]
  477. end_time = start_times[i + 1]
  478. # 根据当前起始时间点和结束时间点对数据进行分段
  479. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  480. if segment is None:
  481. pass
  482. else:
  483. # 周期归一化
  484. ratio = (end_time - start_time) / normalize_cycle
  485. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  486. # 将结果添加到相应的 turbine 数据框中
  487. turbines[i % 3] = pd.concat([turbines[i % 3], segment])
  488. # 数据分组清洗、求平均
  489. turbines_processed = []
  490. turbines_scattered = []
  491. min_list = []
  492. sd_time = [-1, -1]
  493. time_list = list(range(0, normalize_cycle, 1000))
  494. for turbine in turbines:
  495. # 按时间排序
  496. turbine_sorted = turbine.sort_values(by='time').reset_index(drop=True)
  497. # 找到time列的第一个值
  498. first_time = turbine_sorted['time'].iloc[0]
  499. # 分组,时间列每1000为一组(每40个时间点一组)
  500. bins = list(range(int(first_time), int(turbine_sorted['time'].max()), 1000))
  501. # 原始代码
  502. # bins = list(range(int(first_time), int(turbine_sorted['time'].max()) + len(start_times), int(fs / 50)))
  503. grouped = turbine_sorted.groupby(pd.cut(turbine_sorted['time'], bins=bins, right=False))
  504. # 初始化一个空的 DataFrame 用于存储处理后的数据
  505. processed_df = pd.DataFrame()
  506. scattered_df = pd.DataFrame()
  507. mean_points = []
  508. diff_points = []
  509. # 对每个组进行处理
  510. for _, group in grouped:
  511. # 去除 distance 最大和最小的前5%
  512. quantile_5 = group['distance'].quantile(0.05)
  513. quantile_95 = group['distance'].quantile(0.95)
  514. filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
  515. # 计算均值
  516. mean_point = filtered_group['distance'].mean()
  517. mean_points.append(mean_point)
  518. # 遍历 mean_points 列表,计算每个元素与其下一个元素的差值
  519. for i in range(len(mean_points) - 1):
  520. diff = abs(mean_points[i + 1] - mean_points[i])
  521. diff_points.append(diff)
  522. start_index = int(len(diff_points) * 0.05)
  523. end_index = int(len(diff_points) * 0.95)
  524. subset1 = diff_points[start_index:end_index]
  525. sdr_diff = np.max(subset1) * 1.1
  526. min_list.append(min(mean_points))
  527. # 找到第一个和最后一个小于 sdr_diff 的序号
  528. first_index = np.where(diff_points < sdr_diff)[0][0]
  529. last_index = np.where(diff_points < sdr_diff)[0][-1]
  530. for index, (bin, group) in enumerate(grouped):
  531. # 去除 distance 最大和最小的前5%
  532. quantile_5 = group['distance'].quantile(0.05)
  533. quantile_95 = group['distance'].quantile(0.95)
  534. filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
  535. if first_index <= index < last_index: # 如果斜率小于,则认为该组数据不是突变点
  536. # 计算中点
  537. mid_point = filtered_group.mean()
  538. # 将中点转换为 DataFrame 并添加到处理后的 DataFrame 中
  539. mid_point_df = pd.DataFrame([mid_point])
  540. mid_point_df.iloc[0, 0] = time_list[index]
  541. processed_df = pd.concat([processed_df, mid_point_df], ignore_index=True)
  542. scattered_df = pd.concat([scattered_df, filtered_group], ignore_index=True)
  543. else:
  544. pass
  545. # 找到time列的最小值和最大值
  546. min_time = processed_df['time'].min()
  547. max_time = processed_df['time'].max()
  548. if sd_time == [-1, -1]:
  549. sd_time = [min_time, max_time]
  550. elif sd_time[0] < min_time:
  551. sd_time[0] = min_time
  552. elif sd_time[1] > max_time:
  553. sd_time[1] = max_time
  554. # 将处理后的 DataFrame 添加到列表中
  555. turbines_processed.append(processed_df)
  556. turbines_scattered.append(scattered_df)
  557. border_rows = []
  558. for i, turbine in enumerate(turbines_processed):
  559. # 找到离 sd_time[0] 最近的行的索引
  560. closest_index_0 = (turbine['time'] - sd_time[0]).abs().idxmin()
  561. turbine.at[closest_index_0, 'time'] = sd_time[0]
  562. sd_time_row_0 = turbine.loc[closest_index_0]
  563. # 找到离 sd_time[1] 最近的行的索引
  564. closest_index_1 = (turbine['time'] - sd_time[1]).abs().idxmin()
  565. turbine.at[closest_index_1, 'time'] = sd_time[1]
  566. sd_time_row_1 = turbine.loc[closest_index_1]
  567. # 切片 turbine,从 closest_index_0 到 closest_index_1
  568. turbines_processed[i] = turbine.iloc[closest_index_0:closest_index_1 + 1].reset_index(drop=True)
  569. sd_time_rows_turbine = pd.concat([pd.DataFrame([sd_time_row_0]), pd.DataFrame([sd_time_row_1])]
  570. , ignore_index=True)
  571. border_rows.append(sd_time_rows_turbine)
  572. time.sleep(1)
  573. return turbines_processed, turbines_scattered, border_rows, full_cycle, min_list
  574. def blade_shape(turbines_processed: List[pd.DataFrame]):
  575. """
  576. 计算叶片平均形状、叶片形状偏差。
  577. :param turbines_processed:叶片拟合曲线数据,来自data_normalize
  578. :return: 叶片平均形状、叶片形状偏差
  579. """
  580. row_counts = [df.shape[0] for df in turbines_processed]
  581. num_rows = min(row_counts)
  582. # 创建一个新的data.frame用于保存结果
  583. turbine_avg = pd.DataFrame(index=range(num_rows), columns=['time', 'distance'])
  584. turbine_diff = [pd.DataFrame(index=range(num_rows), columns=['time', 'distance']) for _ in turbines_processed]
  585. # 遍历每一行
  586. for i in range(num_rows):
  587. distances = [df.loc[i, 'distance'] for df in turbines_processed] # 获取每个data.frame的distance列的值
  588. avg_distance = sum(distances) / len(distances) # 计算distance列的平均值
  589. time_value = turbines_processed[0].loc[i, 'time'] # 获取time列的值
  590. turbine_avg.loc[i, 'time'] = time_value
  591. turbine_avg.loc[i, 'distance'] = avg_distance
  592. for j in range(len(distances)):
  593. distances[j] = distances[j] - avg_distance
  594. turbine_diff[j].loc[i, 'time'] = time_value
  595. turbine_diff[j].loc[i, 'distance'] = distances[j]
  596. time.sleep(10)
  597. return turbine_avg, turbine_diff
  598. def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
  599. """
  600. 将叶尖测量数据和叶根、轮毂中心的测量原点归一化。
  601. :param tip_border_rows: 3个叶尖边缘数据
  602. :param tip_angle: 叶尖测量俯仰角
  603. :return: 归一化后叶尖数据,叶尖俯仰角
  604. """
  605. tip_angle1 = np.deg2rad(tip_angle)
  606. tip_angle_list = []
  607. for turbine in tip_border_rows:
  608. tip_angle_cal = np.arctan((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
  609. np.cos(tip_angle1) * turbine['distance'])
  610. turbine['distance'] = (turbine['distance'] ** 2 + 0.0057881664 -
  611. 0.15216 * turbine['distance'] * np.sin(tip_angle1)) ** 0.5
  612. tip_angle_list.append(tip_angle_cal)
  613. tip_angle_new = float(np.mean(tip_angle_list))
  614. tip_angle_new1 = np.rad2deg(tip_angle_new)
  615. return tip_border_rows, tip_angle
  616. def radius_cal(border_rows, meas_angle, cen_dist, cen_angle, angle_main, angle_rotate):
  617. """
  618. 计算测量点处的旋转半径。
  619. :param border_rows: 三个叶片的边界
  620. :param meas_angle: 回波俯仰角
  621. :param cen_dist: 轮毂中心距离
  622. :param cen_angle: 轮毂中心俯仰角
  623. :param angle_main: 主轴倾角
  624. :param angle_rotate: 锥角
  625. :return: 旋转半径
  626. """
  627. aero_dist = (pd.concat([df['distance'] for df in border_rows]).mean())
  628. cen_x = np.cos(np.deg2rad(cen_angle)) * cen_dist
  629. cen_y = np.sin(np.deg2rad(cen_angle)) * cen_dist
  630. aero_x = np.cos(np.deg2rad(meas_angle)) * aero_dist
  631. aero_y = np.sin(np.deg2rad(meas_angle)) * aero_dist
  632. theta_4 = np.tan(np.pi - np.deg2rad(angle_main))
  633. theta_5 = np.tan(np.pi / 2 - np.deg2rad(angle_main) + np.deg2rad(angle_rotate))
  634. if np.abs(np.deg2rad(angle_main) - np.deg2rad(angle_rotate)) < 0.0001:
  635. radius = np.abs((cen_y - aero_y) - theta_4 * (cen_x - aero_x))
  636. else:
  637. radius = (np.abs((theta_4 * (cen_x - aero_x) - (cen_y - aero_y)) / (theta_4 - theta_5))
  638. * (1 + theta_5 ** 2) ** 0.5)
  639. return radius
  640. def blade_angle_aero_dist(border_rows: List[pd.DataFrame], radius: float, full_cycle: int,
  641. tower_dist: float, v_angle: float):
  642. """
  643. 计算叶片相对桨距角和叶片净空距离。
  644. :param border_rows: 三个叶片的边界
  645. :param radius: 旋转半径
  646. :param full_cycle: 全周期
  647. :param tower_dist: 塔筒距离
  648. :param v_angle: 俯仰角度
  649. :return: 绝对桨距角,净空距离,叶片线速度
  650. """
  651. v_speed = 2 * np.pi * radius / full_cycle # 叶片线速度m/(1计时器单位)
  652. pitch_angle_list = []
  653. aero_dist_list = []
  654. cen_blade = []
  655. for turbine in border_rows:
  656. diff_time = turbine.iloc[1, 0] - turbine.iloc[0, 0]
  657. diff_len = turbine.iloc[1, 1] - turbine.iloc[0, 1]
  658. mean_col2 = (turbine.iloc[1, 1] + turbine.iloc[0, 1]) / 2
  659. aero_dist = abs(mean_col2 - tower_dist) * np.cos(np.deg2rad(v_angle))
  660. pitch_angle = np.degrees(np.arctan(diff_len / (diff_time * v_speed)))
  661. pitch_angle_list.append(pitch_angle)
  662. aero_dist_list.append(aero_dist)
  663. cen_blade.append(mean_col2)
  664. pitch_mean = np.mean(pitch_angle_list)
  665. pitch_angle_list = [angle - pitch_mean for angle in pitch_angle_list]
  666. pitch_angle_list.append(max(pitch_angle_list) - min(pitch_angle_list))
  667. aero_dist_list.append(np.mean(aero_dist_list))
  668. pitch_angle_list = [round(num, 2) for num in pitch_angle_list]
  669. aero_dist_list = [round(num, 2) for num in aero_dist_list]
  670. return pitch_angle_list, aero_dist_list, v_speed, cen_blade
  671. def find_param(path: str):
  672. """
  673. 根据文件路径获取参数
  674. """
  675. path = path.replace('\\', '/')
  676. last_slash_index = path.rfind('/')
  677. result = path[last_slash_index + 1:]
  678. underscore_indices = []
  679. start = 0
  680. while True:
  681. index = result.find('_', start)
  682. if index == -1:
  683. break
  684. underscore_indices.append(index)
  685. start = index + 1
  686. wind_name = result[: underscore_indices[0]]
  687. turbine_code = result[underscore_indices[0] + 1: underscore_indices[1]]
  688. time_code = result[underscore_indices[1] + 1: underscore_indices[2]]
  689. sampling_fq = int(result[underscore_indices[2] + 1: underscore_indices[3]])
  690. tunnel_1 = float(result[underscore_indices[3] + 1: underscore_indices[4]])
  691. tunnel_2 = float(result[underscore_indices[4] + 1: -4])
  692. dt = datetime.strptime(time_code, "%Y%m%d%H%M%S")
  693. standard_time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
  694. return wind_name, turbine_code, standard_time_str, sampling_fq, tunnel_1, tunnel_2
  695. def blade_dist_distribute_cal(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame,
  696. tower_dist: float, v_angle: float, blade_cen_dist: list):
  697. """
  698. 计算每个叶片每个周期的转速和净空距离
  699. :param data_group: cycle_calculate计算完成后的数据。
  700. :param start_points: 所有每个周期开始点,叶片前缘突变点。
  701. :param end_points: 叶片后缘突变点。
  702. :param tower_dist: 塔筒距离。
  703. :param v_angle: 测量俯仰角度。
  704. :param blade_cen_dist: 叶片内部距离。
  705. """
  706. time.sleep(1)
  707. combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
  708. # 检查排序后的数据从start开始,end结束
  709. if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
  710. combined_df_sorted = combined_df_sorted.iloc[1:]
  711. if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
  712. combined_df_sorted = combined_df_sorted.iloc[:-1]
  713. combined_df_sorted.reset_index(drop=True, inplace=True)
  714. # 将 start_points 中的时间点转换为列表
  715. start_times = combined_df_sorted['time'].tolist()
  716. normalize_cycle = start_times[1] - start_times[0]
  717. tower_clearance = [pd.DataFrame() for _ in range(3)]
  718. # 遍历所有起始时间点
  719. for i in range(0, len(start_times) - 2, 2):
  720. # 获取当前起始和结束时间点
  721. start_time = start_times[i]
  722. end_time = start_times[i + 1]
  723. # 根据当前起始时间点和结束时间点对数据进行分段
  724. segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
  725. min_distance = segment['distance'].min()
  726. clearance = np.abs(tower_dist - min_distance - blade_cen_dist[i % 3]) * np.cos(np.deg2rad(v_angle))
  727. r_speed = (start_times[i + 2] - start_times[i]) * 3 / 5000000
  728. # 周期归一化
  729. ratio = (end_time - start_time) / normalize_cycle
  730. segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
  731. new_df = pd.DataFrame({
  732. 'r_speed': [r_speed],
  733. 'clearance': [clearance]
  734. })
  735. # 将结果添加到相应的 turbine 数据框中
  736. tower_clearance[i % 3] = pd.concat([tower_clearance[i % 3], new_df])
  737. return tower_clearance