il y a 1 an · 57b5a9357d
--- a/data_analyse_origin.py
+++ b/data_analyse_origin.py
@@ -109,6 +109,7 @@ def data_analyse(path: List[str]):
 
				     创建data目录，把分析数据保存到历史记录中，同时返回全量分析数据
			
 
				     """
			
 
				 
			
 
				+    # 基础配置参数
			
 
				     locate_file = path[0]
			
 
				     measure_file = path[1]
			
 
				     noise_reduction = 0.000001  # 如果一个距离值的所有样本量小于总样本量的noise_reduction，则被去掉
			
@@ -117,24 +118,26 @@ def data_analyse(path: List[str]):
 
				     axial_inclination = float(path[3])  # 轴向倾角
			
 
				     return_list = []
			
 
				 
			
 
				+    # 读取文件信息，包括风场名、风机编号、时间、采样频率、2个通道俯仰角
			
 
				     wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen = find_param(locate_file)
			
 
				     wind_name_1, turbine_code_1, time_code, sampling_fq_1, angle_tip, angle_root = find_param(measure_file)
			
 
				-
			
 
				-
			
 
				     sampling_fq_1 = sampling_fq_1 * 1000
			
 
				     sampling_fq = sampling_fq * 1000
			
 
				     print(wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen)
			
 
				     print(wind_name_1, turbine_code_1, time_code, sampling_fq_1, angle_tip, angle_root)
			
 
				 
			
 
				+    # 读取数据，并检查是否有时间序列异常，分离2通道数据
			
 
				     data_nan, data_cen = process_data(locate_file)
			
 
				     data_tip, data_root = process_data(measure_file)
			
 
				 
			
 
				+    # 全部数据进行降噪、去除异常点处理，叶根叶尖数据计算叶片扫掠起始、结束点，轮毂中心数据计算距离均值
			
 
				     start_tip, end_tip, filtered_data_tip = cycle_calculate(data_tip, noise_reduction, min_difference)
			
 
				     start_root, end_root, filtered_data_root = cycle_calculate(data_root, noise_reduction, min_difference)
			
 
				     # start_nan, end_nan, filtered_data_nan = cycle_calculate(data_nan, noise_reduction, min_difference)
			
 
				     filtered_data_cen = tower_filter(data_cen, noise_reduction)
			
 
				     dist_cen = np.mean(filtered_data_cen.iloc[:, 1].tolist())
			
 
				 
			
 
				+    # 检查起始结束点顺序，确保叶根叶尖测点同步开始、结束
			
 
				     if end_tip.iloc[0, 0] < start_root.iloc[0, 0]:
			
 
				         start_tip = start_tip.drop(start_tip.index[0])
			
 
				         end_tip = end_tip.drop(end_tip.index[0])
			
@@ -143,23 +146,29 @@ def data_analyse(path: List[str]):
 
				     else:
			
 
				         raise ValueError("The elements are not in the expected order.")
			
 
				 
			
 
				+    # 计算叶根、叶尖处的塔筒距离，对轮毂中心做FFT分析
			
 
				     tower_dist_tip = ff.tower_cal(filtered_data_tip, start_tip, end_tip, sampling_fq_1)
			
 
				     tower_dist_root = ff.tower_cal(filtered_data_root, start_root, end_root, sampling_fq_1)
			
 
				     lowpass_data, fft_x, fft_y, tower_freq, tower_max= ff.process_fft(filtered_data_cen, sampling_fq)
			
 
				 
			
 
				+    # 根据起始结束点，对叶根、对叶片数据进行归一化处理，计算每个叶片的散点表、线表、边界点表、标准循环周期长度、每个叶片平均最小值
			
 
				     result_line_tip, result_scatter_tip, border_rows_tip, cycle_len_tip, min_tip \
			
 
				         = data_normalize(filtered_data_tip, start_tip, end_tip)
			
 
				     result_line_root, result_scatter_root, border_rows_root, cycle_len_root, min_root \
			
 
				         = data_normalize(filtered_data_root, start_root, end_root)
			
 
				 
			
 
				+    # 计算3个叶片的平均轮廓，3个叶片的形状差
			
 
				     result_avg_tip, result_diff_tip = blade_shape(result_line_tip)
			
 
				     result_avg_root, result_diff_root = blade_shape(result_line_root)
			
 
				 
			
 
				+    # 对叶尖的边界点表和俯仰角做坐标归一化处理
			
 
				     border_rows_tip_new, angle_tip_new = coordinate_normalize(border_rows_tip, angle_tip)
			
 
				 
			
 
				+    # 对叶片的边界点表做半径计算
			
 
				     tip_r = radius_cal(border_rows_tip_new, angle_tip_new, dist_cen, angle_cen, axial_inclination, angle_cone)
			
 
				     root_r = radius_cal(border_rows_root, angle_root, dist_cen, angle_cen, axial_inclination, angle_cone)
			
 
				 
			
 
				+    # 计算叶片测量位置处的绝对桨距角、相对桨距角、线速度、叶片内部中心点距离
			
 
				     pitch_angle_tip, aero_dist_tip, v_speed_tip, cen_blade_tip = (
			
 
				         blade_angle_aero_dist(border_rows_tip, tip_r, cycle_len_tip, tower_dist_tip, angle_tip_new))
			
 
				     pitch_angle_root, aero_dist_root, v_speed_root, cen_blade_root = (
			
@@ -172,29 +181,31 @@ def data_analyse(path: List[str]):
 
				     blade_dist_tip = abs_diff * np.cos(np.deg2rad(angle_tip_new))
			
 
				     blade_dist_tip.tolist()  # 如果需要将结果转换回列表
			
 
				 
			
 
				+    # 计算叶片转速-净空散点表
			
 
				     dist_distribute = blade_dist_distribute_cal(filtered_data_tip, start_tip, end_tip,
			
 
				                                                 tower_dist_tip, angle_tip_new, blade_dist_tip)
			
 
				     dist_distribute = [df.round(5) for df in dist_distribute]
			
 
				 
			
 
				-    # 获取每个 DataFrame 第二列的最小值和最大值，以及它们对应的第一列的值，并分别保存在列表中
			
 
				+    # 获取净空距离的最小值和最大值，以及它们对应的转速值，并分别保存在列表中
			
 
				     min_values = []
			
 
				     min_keys = []
			
 
				     max_values = []
			
 
				     max_keys = []
			
 
				     mean_values = []
			
 
				     for df in dist_distribute:
			
 
				-        second_col_min = df[df.columns[1]].min()
			
 
				-        second_col_max = df[df.columns[1]].max()
			
 
				+        second_col_min = round(df[df.columns[1]].min(), 2)
			
 
				+        second_col_max = round(df[df.columns[1]].max(), 2)
			
 
				         min_row = df[df[df.columns[1]] == second_col_min]
			
 
				         max_row = df[df[df.columns[1]] == second_col_max]
			
 
				         min_values.append(second_col_min)
			
 
				-        min_keys.append(min_row.iloc[0][df.columns[0]])
			
 
				+        min_keys.append(round(min_row.iloc[0][df.columns[0]], 2))
			
 
				         max_values.append(second_col_max)
			
 
				-        max_keys.append(max_row.iloc[0][df.columns[0]])
			
 
				+        max_keys.append(round(max_row.iloc[0][df.columns[0]], 2))
			
 
				 
			
 
				     for i in range(3):
			
 
				         mean_values.append(round((max_values[i] + min_values[i]) / 2, 2))
			
 
				 
			
 
				+    # 将叶片线表数据乘以线速度，和俯仰角，得到叶片横截面的真实轮廓
			
 
				     for df in result_line_tip:
			
 
				         first_column = df.iloc[:, 0]
			
 
				         sec_column = df.iloc[:, 1]
			
@@ -207,7 +218,7 @@ def data_analyse(path: List[str]):
 
				         df.iloc[:, 0] = first_column * v_speed_root
			
 
				         df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_root))
			
 
				 
			
 
				-
			
 
				+    # 将叶片平均轮廓数据乘以线速度，得到实际叶片长度
			
 
				     avg_tip = result_avg_tip.iloc[:, 0]
			
 
				     result_avg_tip.iloc[:, 0] = avg_tip * v_speed_tip
			
 
				     avg_root = result_avg_root.iloc[:, 0]
			
@@ -218,12 +229,15 @@ def data_analyse(path: List[str]):
 
				     twist_3 = round(np.abs(pitch_angle_root[2] - pitch_angle_tip[2]), 2)
			
 
				     twist_avg = round((twist_1 + twist_2 + twist_3) / 3, 2)
			
 
				 
			
 
				+    # 降低给数据采样频率，降低接口负担
			
 
				     sampling_num = int(0.01 * sampling_fq_1)
			
 
				+
			
 
				+    # 将原始数据的时间列由计时时钟转换为实际时间
			
 
				     data_tip.iloc[:, 0] = data_tip.iloc[:, 0] / 5000000
			
 
				     data_root.iloc[:, 0] = data_root.iloc[:, 0] / 5000000
			
 
				     lowpass_data.iloc[:, 0] = lowpass_data.iloc[:, 0] / 5000000
			
 
				 
			
 
				-
			
 
				+    # 将需要保存到CSV的数据添加到return_list中
			
 
				     return_list.append(time_code)
			
 
				     return_list.append(wind_name)
			
 
				     return_list.append(turbine_code)
			
@@ -843,8 +857,9 @@ def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
 
				     tip_angle_list = []
			
 
				     for turbine in tip_border_rows:
			
 
				 
			
 
				-        tip_angle_cal = np.arctan((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
			
 
				-                                  np.cos(tip_angle1) * turbine['distance'])
			
 
				+        tip_angle_cal0 = ((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
			
 
				+                          (np.cos(tip_angle1) * turbine['distance']))
			
 
				+        tip_angle_cal = np.arctan(tip_angle_cal0)
			
 
				         turbine['distance'] = (turbine['distance']**2 + 0.0057881664 -
			
 
				                                0.15216*turbine['distance']*np.sin(tip_angle1)) ** 0.5
			
 
				 
			
@@ -854,7 +869,7 @@ def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
 
				     tip_angle_new1 = np.rad2deg(tip_angle_new)
			
 
				     print('叶尖俯仰角: ' + str(tip_angle_new1))
			
 
				     
			
 
				-    return tip_border_rows, tip_angle
			
 
				+    return tip_border_rows, tip_angle_new1
			
 
				 
			
 
				 
			
 
				 
			
--- a/data_clean.py
+++ b/data_clean.py
@@ -5,33 +5,27 @@ import numpy as np
 
				 import matplotlib.pyplot as plt
			
 
				 from typing import Tuple, List
			
 
				 import warnings
			
 
				-import time
			
 
				 import sys
			
 
				 import frequency_filter as ff
			
 
				 from datetime import datetime
			
 
				 
			
 
				-warnings.filterwarnings("ignore", category=FutureWarning)  # 忽略特定警告
			
 
				-plt.rcParams['font.sans-serif'] = ['SimHei']  # 使用黑体
			
 
				-plt.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题
			
 
				+warnings.filterwarnings("ignore", category=FutureWarning)  
			
 
				+plt.rcParams['font.sans-serif'] = ['SimHei']  
			
 
				+plt.rcParams['axes.unicode_minus'] = False  
			
 
				 
			
 
				 
			
 
				 def result_main():
			
 
				-
			
 
				-    """
			
 
				-    创建data目录，返回历史分析数据存放的文件路径
			
 
				-    """
			
 
				-
			
 
				-    # 获取当前程序的绝对路径
			
 
				+    
			
 
				     python_interpreter_path = sys.executable
			
 
				     project_directory = os.path.dirname(python_interpreter_path)
			
 
				     data_folder = os.path.join(project_directory, 'data')
			
 
				-    # 检查data文件夹是否存在，如果不存在则创建
			
 
				+    
			
 
				     if not os.path.exists(data_folder):
			
 
				         os.makedirs(data_folder)
			
 
				 
			
 
				-    # CSV文件路径
			
 
				+    
			
 
				     csv_file_path = os.path.join(data_folder, 'history_data.csv')
			
 
				-    # 检查CSV文件是否存在，如果不存在则创建一个空的CSV文件
			
 
				+    
			
 
				     if not os.path.exists(csv_file_path):
			
 
				         pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
			
 
				                               '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
			
@@ -44,44 +38,33 @@ def result_main():
 
				 
			
 
				 def delete_data(names):
			
 
				 
			
 
				-    """
			
 
				-    删除历史分析数据
			
 
				-    :param names: 删除条件
			
 
				-    :return: csv文件路径
			
 
				-    """
			
 
				 
			
 
				-    # 获取当前程序的绝对路径
			
 
				     python_interpreter_path = sys.executable
			
 
				     project_directory = os.path.dirname(python_interpreter_path)
			
 
				     data_folder = os.path.join(project_directory, 'data')
			
 
				 
			
 
				-    # CSV文件路径
			
 
				+    
			
 
				     csv_file_path = os.path.join(data_folder, 'history_data.csv')
			
 
				     df = pd.read_csv(csv_file_path)
			
 
				 
			
 
				     for name in names:
			
 
				-        # 检查条件
			
 
				+        
			
 
				         condition = ((df['时间'].str.contains(name[0])) &
			
 
				                      (df['场站'].str.contains(name[1])) &
			
 
				                      (df['风机编号'].str.contains(name[2])))
			
 
				 
			
 
				-        # 删除满足条件的行
			
 
				+        
			
 
				         df = df[~condition]
			
 
				-    # 如果需要，可以将修改后的 DataFrame 保存回 CSV 文件
			
 
				+    
			
 
				     df.to_csv(csv_file_path, index=False)
			
 
				 
			
 
				     return csv_file_path
			
 
				 
			
 
				 
			
 
				 def history_data(name):
			
 
				-    """
			
 
				-    读取历史分析数据
			
 
				-    :param name: 接口返回列表
			
 
				-    :return:
			
 
				-    """
			
 
				 
			
 
				     wind_name, turbine_code, time_code = name[1], name[2], name[0]
			
 
				-    # 获取当前程序的绝对路径
			
 
				+    
			
 
				     python_interpreter_path = sys.executable
			
 
				     project_directory = os.path.dirname(python_interpreter_path)
			
 
				     data_folder = os.path.join(project_directory, 'data')
			
@@ -100,16 +83,13 @@ def history_data(name):
 
				 
			
 
				 def data_analyse(path: List[str]):
			
 
				 
			
 
				-    """
			
 
				-    创建data目录，把分析数据保存到历史记录中，同时返回全量分析数据
			
 
				-    """
			
 
				 
			
 
				     locate_file = path[0]
			
 
				     measure_file = path[1]
			
 
				-    noise_reduction = 0.000001  # 如果一个距离值的所有样本量小于总样本量的noise_reduction，则被去掉
			
 
				-    min_difference = 1.5  # 如果相邻2个点的距离差大于min_difference，则被注意是否是周期节点
			
 
				-    angle_cone = float(path[2])  # 锥角
			
 
				-    axial_inclination = float(path[3])  # 轴向倾角
			
 
				+    noise_reduction = 0.000001  
			
 
				+    min_difference = 1.5  
			
 
				+    angle_cone = float(path[2])  
			
 
				+    axial_inclination = float(path[3])  
			
 
				     return_list = []
			
 
				 
			
 
				     wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen = find_param(locate_file)
			
@@ -156,32 +136,32 @@ def data_analyse(path: List[str]):
 
				     pitch_angle_root, aero_dist_root, v_speed_root, cen_blade_root = (
			
 
				         blade_angle_aero_dist(border_rows_root, root_r, cycle_len_root, tower_dist_root, angle_root))
			
 
				 
			
 
				-    # 将列表转换为 numpy 数组
			
 
				+    
			
 
				     cen_blade_tip_array = np.array(cen_blade_tip)
			
 
				     min_tip_array = np.array(min_tip)
			
 
				-    abs_diff = np.abs(cen_blade_tip_array - min_tip_array)  # 计算差值的绝对值
			
 
				+    abs_diff = np.abs(cen_blade_tip_array - min_tip_array)  
			
 
				     blade_dist_tip = abs_diff * np.cos(np.deg2rad(angle_tip_new))
			
 
				-    blade_dist_tip.tolist()  # 如果需要将结果转换回列表
			
 
				+    blade_dist_tip.tolist()  
			
 
				 
			
 
				     dist_distribute = blade_dist_distribute_cal(filtered_data_tip, start_tip, end_tip,
			
 
				                                                 tower_dist_tip, angle_tip_new, blade_dist_tip)
			
 
				     dist_distribute = [df.round(5) for df in dist_distribute]
			
 
				 
			
 
				-    # 获取每个 DataFrame 第二列的最小值和最大值，以及它们对应的第一列的值，并分别保存在列表中
			
 
				+    
			
 
				     min_values = []
			
 
				     min_keys = []
			
 
				     max_values = []
			
 
				     max_keys = []
			
 
				     mean_values = []
			
 
				     for df in dist_distribute:
			
 
				-        second_col_min = df[df.columns[1]].min()
			
 
				-        second_col_max = df[df.columns[1]].max()
			
 
				+        second_col_min = round(df[df.columns[1]].min(), 2)
			
 
				+        second_col_max = round(df[df.columns[1]].max(), 2)
			
 
				         min_row = df[df[df.columns[1]] == second_col_min]
			
 
				         max_row = df[df[df.columns[1]] == second_col_max]
			
 
				         min_values.append(second_col_min)
			
 
				-        min_keys.append(min_row.iloc[0][df.columns[0]])
			
 
				+        min_keys.append(round(min_row.iloc[0][df.columns[0]], 2))
			
 
				         max_values.append(second_col_max)
			
 
				-        max_keys.append(max_row.iloc[0][df.columns[0]])
			
 
				+        max_keys.append(round(max_row.iloc[0][df.columns[0]], 2))
			
 
				 
			
 
				     for i in range(3):
			
 
				         mean_values.append(round((max_values[i] + min_values[i]) / 2, 2))
			
@@ -233,7 +213,7 @@ def data_analyse(path: List[str]):
 
				     return_list.append(tower_freq)
			
 
				 
			
 
				 
			
 
				-    # 将return_list转换为DataFrame并追加到CSV文件
			
 
				+    
			
 
				     df_new_row = pd.DataFrame([return_list],
			
 
				                               columns=['时间', '场站', '风机编号', '采样频率',
			
 
				                                        '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
			
@@ -366,17 +346,17 @@ def data_analyse(path: List[str]):
 
				         }
			
 
				     }
			
 
				 
			
 
				-    # 获取当前程序的绝对路径
			
 
				+    
			
 
				     python_interpreter_path = sys.executable
			
 
				     project_directory = os.path.dirname(python_interpreter_path)
			
 
				     data_folder = os.path.join(project_directory, 'data')
			
 
				-    # 检查data文件夹是否存在，如果不存在则创建
			
 
				+    
			
 
				     if not os.path.exists(data_folder):
			
 
				         os.makedirs(data_folder)
			
 
				 
			
 
				-    # CSV文件路径
			
 
				+    
			
 
				     csv_file_path = os.path.join(data_folder, 'history_data.csv')
			
 
				-    # 检查CSV文件是否存在，如果不存在则创建一个空的CSV文件
			
 
				+    
			
 
				     if not os.path.exists(csv_file_path):
			
 
				         pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
			
 
				                               '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
			
@@ -397,39 +377,35 @@ def data_analyse(path: List[str]):
 
				 
			
 
				 def process_data(file_path):
			
 
				 
			
 
				-    """
			
 
				-    打开、解决时间重置、按时间清洗异常值、分列数据
			
 
				-    """
			
 
				-
			
 
				-    # 读取第2、4、9列的数据
			
 
				+    
			
 
				     data = pd.read_csv(file_path, usecols=[1, 3, 8], header=None, engine='c')
			
 
				     data = data.head(int(len(data) * 0.95))
			
 
				 
			
 
				-    # 找到第一列中最大值和最小值的位置
			
 
				+    
			
 
				     max_value = data.iloc[:, 0].max()
			
 
				     max_index = data.iloc[:, 0].idxmax()
			
 
				     min_index = data.iloc[:, 0].idxmin()
			
 
				 
			
 
				-    # 检查最小值的位置是否是最大值位置的下一个
			
 
				+    
			
 
				     if min_index == max_index + 1:
			
 
				-        # 将最小值及其之后的所有值都加上最大值
			
 
				+        
			
 
				         data.iloc[min_index:, 0] += max_value
			
 
				 
			
 
				-    # 按时间列筛选清洗异常值
			
 
				+    
			
 
				     last_time = data.iloc[-1, 0]
			
 
				     first_time = data.iloc[0, 0]
			
 
				     data = data[data.iloc[:, 0] >= first_time]
			
 
				     data = data[data.iloc[:, 0] <= last_time]
			
 
				     data.reset_index(drop=True, inplace=True)
			
 
				-    # 计算最小值
			
 
				+    
			
 
				     min_time = data.iloc[:, 0].min()
			
 
				     data.iloc[:, 0] -= min_time
			
 
				 
			
 
				-    # 分为两组数据
			
 
				+    
			
 
				     data_1 = data.iloc[:, [0, 1]]
			
 
				     data_2 = data.iloc[:, [0, 2]]
			
 
				 
			
 
				-    # 分别命名列
			
 
				+    
			
 
				     data_1.columns = ['time', 'distance']
			
 
				     data_2.columns = ['time', 'distance']
			
 
				 
			
@@ -438,28 +414,19 @@ def process_data(file_path):
 
				 
			
 
				 def tower_filter(data_group: pd.DataFrame, noise_threshold: float):
			
 
				 
			
 
				-    """
			
 
				-    对轮毂中心数据进行降噪，和前项填充
			
 
				-    :param data_group: process_data计算完成后轮毂中心的数据。
			
 
				-    :param noise_threshold: 去掉占比小于noise_threshold的数据。
			
 
				-    :return: filtered_data：降噪后的数据
			
 
				-    """
			
 
				 
			
 
				-    time.sleep(1)
			
 
				-
			
 
				-    # 计算distance的分布
			
 
				     distance_counts = data_group['distance'].value_counts(normalize=True)
			
 
				     noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
			
 
				     noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
			
 
				     data_group.loc[noise_indices, 'distance'] = np.nan
			
 
				 
			
 
				-    # 选择频率最大的5个值
			
 
				+    
			
 
				     top_5_distances = distance_counts.head(5).index
			
 
				     mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
			
 
				     data_group.loc[(data_group['distance'] < mean_values - 20) | (
			
 
				             data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
			
 
				 
			
 
				-    # 前向填充
			
 
				+    
			
 
				     data_group['distance'] = data_group['distance'].fillna(method='ffill')
			
 
				     filtered_data = data_group
			
 
				 
			
@@ -468,34 +435,24 @@ def tower_filter(data_group: pd.DataFrame, noise_threshold: float):
 
				 
			
 
				 def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distance: float):
			
 
				 
			
 
				-    """
			
 
				-    对数据进行降噪，和前项填充；计算数据的周期节点，叶片前缘突变点、后缘突变点
			
 
				-    :param data_group: process_data计算完成后的数据。
			
 
				-    :param noise_threshold: 去掉占比小于noise_threshold的数据。
			
 
				-    :param min_distance: 区分叶片和塔筒的距离差值。
			
 
				-    :return: start_points：周期开始点, end_points：周期结束点, filtered_data：降噪后的数据
			
 
				-    """
			
 
				-
			
 
				-    time.sleep(1)
			
 
				-
			
 
				-    # 计算distance的分布
			
 
				+    
			
 
				     distance_counts = data_group['distance'].value_counts(normalize=True)
			
 
				     noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
			
 
				     noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
			
 
				     data_group.loc[noise_indices, 'distance'] = np.nan
			
 
				 
			
 
				-    # 选择频率最大的5个值
			
 
				+    
			
 
				     top_5_distances = distance_counts.head(5).index
			
 
				     mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
			
 
				     data_group.loc[(data_group['distance'] < mean_values - 20) | (
			
 
				             data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
			
 
				 
			
 
				 
			
 
				-    # 前向填充
			
 
				+    
			
 
				     data_group['distance'] = data_group['distance'].fillna(method='ffill')
			
 
				     filtered_data = data_group
			
 
				 
			
 
				-    # 计算相邻两行distance的差值
			
 
				+    
			
 
				     filtered_data['distance_diff'] = filtered_data['distance'].diff()
			
 
				     large_diff_indices = filtered_data[filtered_data['distance_diff'] > min_distance].index
			
 
				     small_diff_indices = filtered_data[filtered_data['distance_diff'] < -min_distance].index
			
@@ -504,27 +461,27 @@ def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distan
 
				     start_points = pd.DataFrame()
			
 
				     end_points = pd.DataFrame()
			
 
				 
			
 
				-    # 遍历所有差值大于的行
			
 
				+    
			
 
				     for idx in large_diff_indices:
			
 
				-        # 获取当前行的 distance 值
			
 
				+        
			
 
				         current_distance = filtered_data.loc[idx, 'distance']
			
 
				 
			
 
				         next_rows_large = filtered_data.loc[idx - 1000: idx - 1]
			
 
				 
			
 
				-        # 检查是否任意 distance 的值小于 current_distance - 2
			
 
				+        
			
 
				         if next_rows_large['distance'].le(current_distance - min_distance).all():
			
 
				-            # 如果都小于，则将当前行和下一行添加到 special_points 中
			
 
				+            
			
 
				             end_points = pd.concat([end_points, filtered_data.loc[[idx - 1]]])
			
 
				 
			
 
				     for idx in small_diff_indices:
			
 
				-        # 获取当前行的 distance 值
			
 
				+        
			
 
				         current_distance = filtered_data.loc[idx - 1, 'distance']
			
 
				 
			
 
				         next_rows_small = filtered_data.iloc[idx: idx + 1000]
			
 
				 
			
 
				-        # 检查是否任意 distance 的值小于 current_distance - 2
			
 
				+        
			
 
				         if next_rows_small['distance'].le(current_distance - min_distance).all():
			
 
				-            # 如果都小于，则将当前行和下一行添加到 special_points 中
			
 
				+            
			
 
				             start_points = pd.concat([start_points, filtered_data.loc[[idx]]])
			
 
				 
			
 
				     if end_points.iloc[0, 0] < start_points.iloc[0, 0]:
			
@@ -540,56 +497,43 @@ def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distan
 
				 def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame) \
			
 
				         -> Tuple[List[pd.DataFrame], List[pd.DataFrame], List[pd.DataFrame], int, list]:
			
 
				 
			
 
				-    """
			
 
				-    提取每个叶片的数据并归一化，输出散点图和拟合图
			
 
				-    :param data_group: cycle_calculate计算完成后的数据。
			
 
				-    :param start_points: 所有每个周期开始点，叶片前缘突变点。
			
 
				-    :param end_points: 叶片后缘突变点。
			
 
				-    :return: turbines_processed: 每个叶片的拟合数据，
			
 
				-             turbines_scattered: 每个叶片的散点数据，
			
 
				-             border_rows: 每个叶片的2个边缘数据，
			
 
				-             normalize_cycle: 周期长度
			
 
				-    """
			
 
				-
			
 
				-    time.sleep(1)
			
 
				 
			
 
				     combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
			
 
				-    # 检查排序后的数据从start开始，end结束
			
 
				+    
			
 
				     if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
			
 
				         combined_df_sorted = combined_df_sorted.iloc[1:]
			
 
				     if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
			
 
				         combined_df_sorted = combined_df_sorted.iloc[:-1]
			
 
				     combined_df_sorted.reset_index(drop=True, inplace=True)
			
 
				 
			
 
				-    # 将 start_points 中的时间点转换为列表
			
 
				+    
			
 
				     start_times = combined_df_sorted['time'].tolist()
			
 
				-    time.sleep(1)
			
 
				 
			
 
				     normalize_cycle = start_times[1] - start_times[0]
			
 
				     full_cycle = int((start_times[2] - start_times[0]) * 3)
			
 
				     turbines = [pd.DataFrame() for _ in range(3)]
			
 
				 
			
 
				-    # 遍历所有起始时间点
			
 
				+    
			
 
				     for i in range(0, len(start_times), 2):
			
 
				 
			
 
				-        # 获取当前起始和结束时间点
			
 
				+        
			
 
				         start_time = start_times[i]
			
 
				         end_time = start_times[i + 1]
			
 
				 
			
 
				-        # 根据当前起始时间点和结束时间点对数据进行分段
			
 
				+        
			
 
				         segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
			
 
				 
			
 
				         if segment is None:
			
 
				             pass
			
 
				         else:
			
 
				-            # 周期归一化
			
 
				+            
			
 
				             ratio = (end_time - start_time) / normalize_cycle
			
 
				             segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
			
 
				 
			
 
				-            # 将结果添加到相应的 turbine 数据框中
			
 
				+            
			
 
				             turbines[i % 3] = pd.concat([turbines[i % 3], segment])
			
 
				 
			
 
				-    # 数据分组清洗、求平均
			
 
				+    
			
 
				     turbines_processed = []
			
 
				     turbines_scattered = []
			
 
				     min_list = []
			
@@ -597,36 +541,36 @@ def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_poi
 
				     time_list = list(range(0, normalize_cycle, 1000))
			
 
				 
			
 
				     for turbine in turbines:
			
 
				-        # 按时间排序
			
 
				+        
			
 
				         turbine_sorted = turbine.sort_values(by='time').reset_index(drop=True)
			
 
				 
			
 
				-        # 找到time列的第一个值
			
 
				+        
			
 
				         first_time = turbine_sorted['time'].iloc[0]
			
 
				 
			
 
				-        # 分组，时间列每1000为一组（每40个时间点一组）
			
 
				+        
			
 
				         bins = list(range(int(first_time), int(turbine_sorted['time'].max()), 1000))
			
 
				-        # 原始代码
			
 
				-        # bins = list(range(int(first_time), int(turbine_sorted['time'].max()) + len(start_times), int(fs / 50)))
			
 
				+        
			
 
				+        
			
 
				         grouped = turbine_sorted.groupby(pd.cut(turbine_sorted['time'], bins=bins, right=False))
			
 
				 
			
 
				-        # 初始化一个空的 DataFrame 用于存储处理后的数据
			
 
				+        
			
 
				         processed_df = pd.DataFrame()
			
 
				         scattered_df = pd.DataFrame()
			
 
				         mean_points = []
			
 
				         diff_points = []
			
 
				 
			
 
				-        # 对每个组进行处理
			
 
				+        
			
 
				         for _, group in grouped:
			
 
				-            # 去除 distance 最大和最小的前5%
			
 
				+            
			
 
				             quantile_5 = group['distance'].quantile(0.05)
			
 
				             quantile_95 = group['distance'].quantile(0.95)
			
 
				             filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
			
 
				 
			
 
				-            # 计算均值
			
 
				+            
			
 
				             mean_point = filtered_group['distance'].mean()
			
 
				             mean_points.append(mean_point)
			
 
				 
			
 
				-        # 遍历 mean_points 列表，计算每个元素与其下一个元素的差值
			
 
				+        
			
 
				         for i in range(len(mean_points) - 1):
			
 
				             diff = abs(mean_points[i + 1] - mean_points[i])
			
 
				             diff_points.append(diff)
			
@@ -637,22 +581,22 @@ def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_poi
 
				         sdr_diff = np.max(subset1) * 1.1
			
 
				         min_list.append(min(mean_points))
			
 
				 
			
 
				-        # 找到第一个和最后一个小于 sdr_diff 的序号
			
 
				+        
			
 
				         first_index = np.where(diff_points < sdr_diff)[0][0]
			
 
				         last_index = np.where(diff_points < sdr_diff)[0][-1]
			
 
				 
			
 
				         for index, (bin, group) in enumerate(grouped):
			
 
				 
			
 
				-            # 去除 distance 最大和最小的前5%
			
 
				+            
			
 
				             quantile_5 = group['distance'].quantile(0.05)
			
 
				             quantile_95 = group['distance'].quantile(0.95)
			
 
				             filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
			
 
				 
			
 
				-            if first_index <= index < last_index:  # 如果斜率小于，则认为该组数据不是突变点
			
 
				+            if first_index <= index < last_index:  
			
 
				 
			
 
				-                # 计算中点
			
 
				+                
			
 
				                 mid_point = filtered_group.mean()
			
 
				-                # 将中点转换为 DataFrame 并添加到处理后的 DataFrame 中
			
 
				+                
			
 
				                 mid_point_df = pd.DataFrame([mid_point])
			
 
				                 mid_point_df.iloc[0, 0] = time_list[index]
			
 
				                 processed_df = pd.concat([processed_df, mid_point_df], ignore_index=True)
			
@@ -660,7 +604,7 @@ def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_poi
 
				             else:
			
 
				                 pass
			
 
				 
			
 
				-        # 找到time列的最小值和最大值
			
 
				+        
			
 
				         min_time = processed_df['time'].min()
			
 
				         max_time = processed_df['time'].max()
			
 
				 
			
@@ -671,54 +615,48 @@ def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_poi
 
				         elif sd_time[1] > max_time:
			
 
				             sd_time[1] = max_time
			
 
				 
			
 
				-        # 将处理后的 DataFrame 添加到列表中
			
 
				+        
			
 
				         turbines_processed.append(processed_df)
			
 
				         turbines_scattered.append(scattered_df)
			
 
				 
			
 
				     border_rows = []
			
 
				     for i, turbine in enumerate(turbines_processed):
			
 
				-        # 找到离 sd_time[0] 最近的行的索引
			
 
				+        
			
 
				         closest_index_0 = (turbine['time'] - sd_time[0]).abs().idxmin()
			
 
				         turbine.at[closest_index_0, 'time'] = sd_time[0]
			
 
				         sd_time_row_0 = turbine.loc[closest_index_0]
			
 
				 
			
 
				-        # 找到离 sd_time[1] 最近的行的索引
			
 
				+        
			
 
				         closest_index_1 = (turbine['time'] - sd_time[1]).abs().idxmin()
			
 
				         turbine.at[closest_index_1, 'time'] = sd_time[1]
			
 
				         sd_time_row_1 = turbine.loc[closest_index_1]
			
 
				 
			
 
				-        # 切片 turbine，从 closest_index_0 到 closest_index_1
			
 
				+        
			
 
				         turbines_processed[i] = turbine.iloc[closest_index_0:closest_index_1 + 1].reset_index(drop=True)
			
 
				 
			
 
				         sd_time_rows_turbine = pd.concat([pd.DataFrame([sd_time_row_0]), pd.DataFrame([sd_time_row_1])]
			
 
				                                          , ignore_index=True)
			
 
				         border_rows.append(sd_time_rows_turbine)
			
 
				 
			
 
				-    time.sleep(1)
			
 
				 
			
 
				     return turbines_processed, turbines_scattered, border_rows, full_cycle, min_list
			
 
				 
			
 
				 
			
 
				 def blade_shape(turbines_processed: List[pd.DataFrame]):
			
 
				 
			
 
				-    """
			
 
				-    计算叶片平均形状、叶片形状偏差。
			
 
				-    :param turbines_processed:叶片拟合曲线数据，来自data_normalize
			
 
				-    :return: 叶片平均形状、叶片形状偏差
			
 
				-    """
			
 
				 
			
 
				     row_counts = [df.shape[0] for df in turbines_processed]
			
 
				     num_rows = min(row_counts)
			
 
				 
			
 
				-    # 创建一个新的data.frame用于保存结果
			
 
				+    
			
 
				     turbine_avg = pd.DataFrame(index=range(num_rows), columns=['time', 'distance'])
			
 
				     turbine_diff = [pd.DataFrame(index=range(num_rows), columns=['time', 'distance']) for _ in turbines_processed]
			
 
				 
			
 
				-    # 遍历每一行
			
 
				+    
			
 
				     for i in range(num_rows):
			
 
				-        distances = [df.loc[i, 'distance'] for df in turbines_processed]  # 获取每个data.frame的distance列的值
			
 
				-        avg_distance = sum(distances) / len(distances)  # 计算distance列的平均值
			
 
				-        time_value = turbines_processed[0].loc[i, 'time']  # 获取time列的值
			
 
				+        distances = [df.loc[i, 'distance'] for df in turbines_processed]  
			
 
				+        avg_distance = sum(distances) / len(distances)  
			
 
				+        time_value = turbines_processed[0].loc[i, 'time']  
			
 
				         turbine_avg.loc[i, 'time'] = time_value
			
 
				         turbine_avg.loc[i, 'distance'] = avg_distance
			
 
				 
			
@@ -727,25 +665,17 @@ def blade_shape(turbines_processed: List[pd.DataFrame]):
 
				             turbine_diff[j].loc[i, 'time'] = time_value
			
 
				             turbine_diff[j].loc[i, 'distance'] = distances[j]
			
 
				 
			
 
				-    time.sleep(10)
			
 
				-
			
 
				     return turbine_avg, turbine_diff
			
 
				 
			
 
				 
			
 
				 def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
			
 
				 
			
 
				-    """
			
 
				-    将叶尖测量数据和叶根、轮毂中心的测量原点归一化。
			
 
				-    :param tip_border_rows: 3个叶尖边缘数据
			
 
				-    :param tip_angle: 叶尖测量俯仰角
			
 
				-    :return: 归一化后叶尖数据，叶尖俯仰角
			
 
				-    """
			
 
				-
			
 
				     tip_angle1 = np.deg2rad(tip_angle)
			
 
				     tip_angle_list = []
			
 
				     for turbine in tip_border_rows:
			
 
				-        tip_angle_cal = np.arctan((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
			
 
				-                                  np.cos(tip_angle1) * turbine['distance'])
			
 
				+        tip_angle_cal0 = ((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
			
 
				+                          (np.cos(tip_angle1) * turbine['distance']))
			
 
				+        tip_angle_cal = np.arctan(tip_angle_cal0)
			
 
				         turbine['distance'] = (turbine['distance'] ** 2 + 0.0057881664 -
			
 
				                                0.15216 * turbine['distance'] * np.sin(tip_angle1)) ** 0.5
			
 
				 
			
@@ -754,22 +684,11 @@ def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
 
				     tip_angle_new = float(np.mean(tip_angle_list))
			
 
				     tip_angle_new1 = np.rad2deg(tip_angle_new)
			
 
				 
			
 
				-    return tip_border_rows, tip_angle
			
 
				+    return tip_border_rows, tip_angle_new1
			
 
				 
			
 
				 
			
 
				 def radius_cal(border_rows, meas_angle, cen_dist, cen_angle, angle_main, angle_rotate):
			
 
				 
			
 
				-    """
			
 
				-    计算测量点处的旋转半径。
			
 
				-    :param border_rows: 三个叶片的边界
			
 
				-    :param meas_angle: 回波俯仰角
			
 
				-    :param cen_dist: 轮毂中心距离
			
 
				-    :param cen_angle: 轮毂中心俯仰角
			
 
				-    :param angle_main: 主轴倾角
			
 
				-    :param angle_rotate: 锥角
			
 
				-    :return: 旋转半径
			
 
				-    """
			
 
				-
			
 
				     aero_dist = (pd.concat([df['distance'] for df in border_rows]).mean())
			
 
				     cen_x = np.cos(np.deg2rad(cen_angle)) * cen_dist
			
 
				     cen_y = np.sin(np.deg2rad(cen_angle)) * cen_dist
			
@@ -790,17 +709,8 @@ def radius_cal(border_rows, meas_angle, cen_dist, cen_angle, angle_main, angle_r
 
				 def blade_angle_aero_dist(border_rows: List[pd.DataFrame], radius: float, full_cycle: int,
			
 
				                           tower_dist: float, v_angle: float):
			
 
				 
			
 
				-    """
			
 
				-    计算叶片相对桨距角和叶片净空距离。
			
 
				-    :param border_rows: 三个叶片的边界
			
 
				-    :param radius: 旋转半径
			
 
				-    :param full_cycle: 全周期
			
 
				-    :param tower_dist: 塔筒距离
			
 
				-    :param v_angle: 俯仰角度
			
 
				-    :return: 绝对桨距角，净空距离，叶片线速度
			
 
				-    """
			
 
				-
			
 
				-    v_speed = 2 * np.pi * radius / full_cycle  # 叶片线速度m/(1计时器单位）
			
 
				+
			
 
				+    v_speed = 2 * np.pi * radius / full_cycle  
			
 
				     pitch_angle_list = []
			
 
				     aero_dist_list = []
			
 
				     cen_blade = []
			
@@ -827,9 +737,6 @@ def blade_angle_aero_dist(border_rows: List[pd.DataFrame], radius: float, full_c
 
				 
			
 
				 def find_param(path: str):
			
 
				 
			
 
				-    """
			
 
				-    根据文件路径获取参数
			
 
				-    """
			
 
				 
			
 
				     path = path.replace('\\', '/')
			
 
				     last_slash_index = path.rfind('/')
			
@@ -860,45 +767,34 @@ def find_param(path: str):
 
				 def blade_dist_distribute_cal(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame,
			
 
				                               tower_dist: float, v_angle: float, blade_cen_dist: list):
			
 
				 
			
 
				-    """
			
 
				-    计算每个叶片每个周期的转速和净空距离
			
 
				-    :param data_group: cycle_calculate计算完成后的数据。
			
 
				-    :param start_points: 所有每个周期开始点，叶片前缘突变点。
			
 
				-    :param end_points: 叶片后缘突变点。
			
 
				-    :param tower_dist: 塔筒距离。
			
 
				-    :param v_angle: 测量俯仰角度。
			
 
				-    :param blade_cen_dist: 叶片内部距离。
			
 
				-    """
			
 
				-
			
 
				-    time.sleep(1)
			
 
				 
			
 
				     combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
			
 
				-    # 检查排序后的数据从start开始，end结束
			
 
				+    
			
 
				     if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
			
 
				         combined_df_sorted = combined_df_sorted.iloc[1:]
			
 
				     if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
			
 
				         combined_df_sorted = combined_df_sorted.iloc[:-1]
			
 
				     combined_df_sorted.reset_index(drop=True, inplace=True)
			
 
				 
			
 
				-    # 将 start_points 中的时间点转换为列表
			
 
				+    
			
 
				     start_times = combined_df_sorted['time'].tolist()
			
 
				 
			
 
				     normalize_cycle = start_times[1] - start_times[0]
			
 
				     tower_clearance = [pd.DataFrame() for _ in range(3)]
			
 
				 
			
 
				-    # 遍历所有起始时间点
			
 
				+    
			
 
				     for i in range(0, len(start_times) - 2, 2):
			
 
				-        # 获取当前起始和结束时间点
			
 
				+        
			
 
				         start_time = start_times[i]
			
 
				         end_time = start_times[i + 1]
			
 
				 
			
 
				-        # 根据当前起始时间点和结束时间点对数据进行分段
			
 
				+        
			
 
				         segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
			
 
				         min_distance = segment['distance'].min()
			
 
				         clearance = np.abs(tower_dist - min_distance - blade_cen_dist[i % 3]) * np.cos(np.deg2rad(v_angle))
			
 
				         r_speed = (start_times[i + 2] - start_times[i]) * 3 / 5000000
			
 
				 
			
 
				-        # 周期归一化
			
 
				+        
			
 
				         ratio = (end_time - start_time) / normalize_cycle
			
 
				         segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
			
 
				 
			
@@ -907,7 +803,7 @@ def blade_dist_distribute_cal(data_group: pd.DataFrame, start_points: pd.DataFra
 
				             'clearance': [clearance]
			
 
				         })
			
 
				 
			
 
				-        # 将结果添加到相应的 turbine 数据框中
			
 
				+        
			
 
				         tower_clearance[i % 3] = pd.concat([tower_clearance[i % 3], new_df])
			
 
				 
			
 
				     return tower_clearance
			
--- a/frequency_filter.py
+++ b/frequency_filter.py
@@ -8,9 +8,9 @@ from scipy.fft import fft, fftfreq
 
				 import numpy as np
			
 
				 
			
 
				 
			
 
				-warnings.filterwarnings("ignore", category=SettingWithCopyWarning)  # 忽略特定警告
			
 
				-plt.rcParams['font.sans-serif'] = ['SimHei']  # 使用黑体
			
 
				-plt.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题
			
 
				+warnings.filterwarnings("ignore", category=SettingWithCopyWarning)
			
 
				+plt.rcParams['font.sans-serif'] = ['SimHei']
			
 
				+plt.rcParams['axes.unicode_minus'] = False
			
 
				 
			
 
				 
			
 
				 def butter_lowpass_filter(data, cutoff, fs, order=5):