wei_lai пре 6 месеци
комит
884959f5d7
4 измењених фајлова са 1978 додато и 0 уклоњено
  1. 36 0
      api_test.py
  2. 976 0
      data_analyse_origin.py
  3. 817 0
      data_clean.py
  4. 149 0
      frequency_filter.py

+ 36 - 0
api_test.py

@@ -0,0 +1,36 @@
+import sys
+import json
+import data_clean as dc
+import base64
+import io
+import locale
+
+# 设置 sys.stdout 和 sys.stdin 的编码为 UTF-8
+sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+locale.getpreferredencoding = lambda: 'UTF-8'
+
+
+if __name__ == "__main__":
+    # **🔹 解析命令行参数**
+    if len(sys.argv) < 2:
+        print(json.dumps({"error": "No API specified"}))
+        sys.exit(1)
+
+    api_name = sys.argv[1]  # 第一个参数是 API 名称
+
+    if api_name == "getpath":
+        return_path = str(dc.result_main())
+        print(json.dumps({'obj': return_path}, ensure_ascii=False))
+
+    elif api_name == "loaddata":
+        if len(sys.argv) < 3:
+            print(json.dumps({"error": "No data"}))
+            sys.exit(1)
+        data = json.loads(base64.b64decode(sys.argv[2]).decode("utf-8"))
+        return_list = dc.data_analyse(data)
+        print(json.dumps(return_list, ensure_ascii=False))
+
+    else:
+        print(json.dumps({"error": "Invalid API"}))
+

+ 976 - 0
data_analyse_origin.py

@@ -0,0 +1,976 @@
+import os
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+from matplotlib.ticker import MaxNLocator
+from typing import Tuple, List
+import warnings
+import time
+import sys
+import frequency_filter as ff
+from datetime import datetime
+
+
+warnings.filterwarnings("ignore", category=FutureWarning) # 忽略特定警告
+plt.rcParams['font.sans-serif'] = ['SimHei']  # 使用黑体
+plt.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题
+
+
+# TODO 3个叶片净空距离的分布情况,每10圈算一次净空
+
+def result_main():
+
+    """
+    创建data目录,返回历史分析数据存放的文件路径
+    """
+
+    # 获取当前程序的绝对路径
+    python_interpreter_path = sys.executable
+    project_directory = os.path.dirname(python_interpreter_path)
+    data_folder = os.path.join(project_directory, 'data')
+    # 检查data文件夹是否存在,如果不存在则创建
+    if not os.path.exists(data_folder):
+        os.makedirs(data_folder)
+
+    # CSV文件路径
+    csv_file_path = os.path.join(data_folder, 'history_data.csv')
+    # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件
+    if not os.path.exists(csv_file_path):
+        pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
+                              '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
+                              '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
+                              '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
+                              '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
+
+    return csv_file_path
+
+
+def data_analyse(path: List[str]):
+
+    """
+    创建data目录,把分析数据保存到历史记录中,同时返回全量分析数据
+    """
+    locate_file = path[0]
+    measure_file = path[1]
+    noise_reduction = 0.000001  # 如果一个距离值的所有样本量小于总样本量的noise_reduction,则被去掉
+    min_difference = 1.5  # 如果相邻2个点的距离差大于min_difference,则被注意是否是周期节点
+    angle_cone = float(path[2])  # 锥角
+    axial_inclination = float(path[3])  # 轴向倾角
+    return_list = []
+
+    wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen = find_param(locate_file)
+    wind_name_1, turbine_code_1, time_code, sampling_fq_1, angle_tip, angle_root = find_param(measure_file)
+
+
+    sampling_fq_1 = sampling_fq_1 * 1000
+    sampling_fq = sampling_fq * 1000
+    print(wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen)
+    print(wind_name_1, turbine_code_1, time_code, sampling_fq_1, angle_tip, angle_root)
+
+    data_nan, data_cen = process_data(locate_file)
+    data_tip, data_root = process_data(measure_file)
+
+    start_tip, end_tip, filtered_data_tip = cycle_calculate(data_tip, noise_reduction, min_difference)
+    start_root, end_root, filtered_data_root = cycle_calculate(data_root, noise_reduction, min_difference)
+    # start_nan, end_nan, filtered_data_nan = cycle_calculate(data_nan, noise_reduction, min_difference)
+    filtered_data_cen = tower_filter(data_cen, noise_reduction)
+    dist_cen = np.mean(filtered_data_cen.iloc[:, 1].tolist())
+
+    if end_tip.iloc[0, 0] < start_root.iloc[0, 0]:
+        start_tip = start_tip.drop(start_tip.index[0])
+        end_tip = end_tip.drop(end_tip.index[0])
+    if start_root.iloc[0, 0] < start_tip.iloc[0, 0] < end_tip.iloc[0, 0] < end_root.iloc[0, 0]:
+        pass
+    else:
+        raise ValueError("The elements are not in the expected order.")
+
+    tower_dist_tip = ff.tower_cal(filtered_data_tip, start_tip, end_tip, sampling_fq_1)
+    tower_dist_root = ff.tower_cal(filtered_data_root, start_root, end_root, sampling_fq_1)
+    lowpass_data, fft_x, fft_y, tower_freq, tower_max= ff.process_fft(filtered_data_cen, sampling_fq)
+
+    result_line_tip, result_scatter_tip, border_rows_tip, cycle_len_tip \
+        = data_normalize(filtered_data_tip, start_tip, end_tip)
+    result_line_root, result_scatter_root, border_rows_root, cycle_len_root \
+        = data_normalize(filtered_data_root, start_root, end_root)
+
+    result_avg_tip, result_diff_tip = blade_shape(result_line_tip)
+    result_avg_root, result_diff_root = blade_shape(result_line_root)
+
+    border_rows_tip_new, angle_tip_new = coordinate_normalize(border_rows_tip, angle_tip)
+    print('新俯仰角' + str(angle_tip_new))
+    print('轮毂中心距离' + str(dist_cen))
+
+    tip_r = radius_cal(border_rows_tip_new, angle_tip_new, dist_cen, angle_cen, axial_inclination, angle_cone)
+    root_r = radius_cal(border_rows_root, angle_root, dist_cen, angle_cen, axial_inclination, angle_cone)
+
+    pitch_angle_tip, aero_dist_tip, v_speed_tip, cen_blade_tip = (
+        blade_angle_aero_dist(border_rows_tip, tip_r, cycle_len_tip, tower_dist_tip, angle_tip_new))
+    pitch_angle_root, aero_dist_root, v_speed_root = (
+        blade_angle_aero_dist(border_rows_root, root_r, cycle_len_root, tower_dist_root, angle_root))
+
+    dist_distribute = blade_dist_distribute_cal(filtered_data_tip, start_tip, end_tip,
+                                                tower_dist_tip, angle_tip_new, cen_blade_tip)
+
+
+    plot_data(result_line_tip, 'line', 'data1')
+    # plot_data(result_diff_tip, 'line', 'data_diff_1')
+    plot_data(result_scatter_tip, 'scatter', 'data1')
+    plot_data(result_line_root, 'line', 'data2')
+    # plot_data(result_diff_root, 'line', 'data_diff_2')
+    plot_data(result_scatter_root, 'scatter', 'data2')
+
+
+    for df in result_line_tip:
+        first_column = df.iloc[:, 0]
+        df.iloc[:, 0] = first_column * v_speed_tip
+
+    for df in result_line_root:
+        first_column = df.iloc[:, 0]
+        df.iloc[:, 0] = first_column * v_speed_root
+    print(v_speed_tip, v_speed_root)
+
+    avg_tip = result_avg_tip.iloc[:, 0]
+    result_avg_tip.iloc[:, 0] = avg_tip * v_speed_tip
+    avg_root = result_avg_root.iloc[:, 0]
+    result_avg_root.iloc[:, 0] = avg_root * v_speed_root
+
+    twist_1 = round(np.abs(pitch_angle_root[0] - pitch_angle_tip[0]), 2)
+    twist_2 = round(np.abs(pitch_angle_root[1] - pitch_angle_tip[1]), 2)
+    twist_3 = round(np.abs(pitch_angle_root[2] - pitch_angle_tip[2]), 2)
+    twist_avg = round((twist_1 + twist_2 + twist_3) / 3, 2)
+
+    sampling_num = int(0.01 * sampling_fq_1)
+    data_tip.iloc[:, 0] = data_tip.iloc[:, 0] / 5000000
+    data_root.iloc[:, 0] = data_root.iloc[:, 0] / 5000000
+    lowpass_data.iloc[:, 0] = lowpass_data.iloc[:, 0] / 5000000
+
+    print('time_length:' + str(data_root.iloc[-1, 0]))
+
+    return_list.append(time_code)
+    return_list.append(wind_name)
+    return_list.append(turbine_code)
+    return_list.append(sampling_fq_1)
+    return_list.append(pitch_angle_root[0])
+    return_list.append(pitch_angle_root[1])
+    return_list.append(pitch_angle_root[2])
+    return_list.append(pitch_angle_root[3])
+    return_list.append(aero_dist_tip[0])
+    return_list.append(aero_dist_tip[1])
+    return_list.append(aero_dist_tip[2])
+    return_list.append(aero_dist_tip[3])
+    return_list.append(twist_1)
+    return_list.append(twist_2)
+    return_list.append(twist_3)
+    return_list.append(twist_avg)
+    return_list.append(tower_max)
+    return_list.append(tower_freq)
+
+    print(result_line_tip[0].iloc[:, 0])
+    print(result_line_root[0].iloc[:, 0])
+    print('振动主频' + str(tower_freq))
+    print('振动幅值' + str(tower_max))
+
+
+    # 将return_list转换为DataFrame并追加到CSV文件
+    df_new_row = pd.DataFrame([return_list],
+                              columns=['时间', '场站', '风机编号', '采样频率',
+                              '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
+                              '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
+                              '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
+                              '振动幅值', '振动主频'])
+
+
+    json_output = {
+        'original_plot': {
+            'blade_tip': {
+                'xdata': data_tip.iloc[:, 0].tolist()[::sampling_num],
+                'ydata': data_tip.iloc[:, 1].tolist()[::sampling_num]
+            },
+            'blade_root': {
+                'xdata': data_root.iloc[:, 0].tolist()[::sampling_num],
+                'ydata': data_root.iloc[:, 1].tolist()[::sampling_num]
+            }
+        },
+        'fft_plot': {
+            'lowpass': {
+                'xdata': lowpass_data['time'].tolist()[::sampling_num],
+                'ydata': lowpass_data['distance_filtered'].tolist()[::sampling_num],
+                'xmax': max(lowpass_data['time'].tolist()),
+                'xmin': min(lowpass_data['time'].tolist()),
+                'ymax': max(lowpass_data['distance_filtered'].tolist()),
+                'ymin': min(lowpass_data['distance_filtered'].tolist())
+            },
+            'fft': {
+                'xdata': fft_x,
+                'ydata': fft_y,
+                'xmax': max(fft_x),
+                'xmin': min(fft_x),
+                'ymax': max(fft_y),
+                'ymin': min(fft_y)
+            }
+        },
+        'blade_tip': {
+            'first_blade': {
+                'xdata': result_line_tip[0].iloc[:, 0].tolist(),
+                'ydata': result_line_tip[0].iloc[:, 1].tolist()
+            },
+            'second_blade': {
+                'xdata': result_line_tip[1].iloc[:, 0].tolist(),
+                'ydata': result_line_tip[1].iloc[:, 1].tolist()
+            },
+            'third_blade': {
+                'xdata': result_line_tip[2].iloc[:, 0].tolist(),
+                'ydata': result_line_tip[2].iloc[:, 1].tolist()
+            },
+            'avg_blade': {
+                'xdata': result_avg_tip.iloc[:, 0].tolist(),
+                'ydata': result_avg_tip.iloc[:, 1].tolist()
+            }
+        },
+        'blade_root': {
+            'first_blade': {
+                'xdata': result_line_root[0].iloc[:, 0].tolist(),
+                'ydata': result_line_root[0].iloc[:, 1].tolist()
+            },
+            'second_blade': {
+                'xdata': result_line_root[1].iloc[:, 0].tolist(),
+                'ydata': result_line_root[1].iloc[:, 1].tolist()
+            },
+            'third_blade': {
+                'xdata': result_line_root[2].iloc[:, 0].tolist(),
+                'ydata': result_line_root[2].iloc[:, 1].tolist()
+            },
+            'avg_blade': {
+                'xdata': result_avg_root.iloc[:, 0].tolist(),
+                'ydata': result_avg_root.iloc[:, 1].tolist()
+            }
+        },
+        'dist_distribution': {
+            'first_blade': {
+                'xdata': dist_distribute[0].iloc[:, 0].tolist(),
+                'ydata': dist_distribute[0].iloc[:, 1].tolist()
+            },
+            'second_blade': {
+                'xdata': dist_distribute[1].iloc[:, 0].tolist(),
+                'ydata': dist_distribute[1].iloc[:, 1].tolist()
+            },
+            'third_blade': {
+                'xdata': dist_distribute[2].iloc[:, 0].tolist(),
+                'ydata': dist_distribute[2].iloc[:, 1].tolist()
+            }
+        },
+        'analyse_table': {
+            'pitch_angle_diff': {
+                'blade_1': pitch_angle_root[0],
+                'blade_2': pitch_angle_root[1],
+                'blade_3': pitch_angle_root[2],
+                'blade_relate': pitch_angle_root[3]
+            },
+            'aero_dist': {
+                'blade_1': aero_dist_tip[0],
+                'blade_2': aero_dist_tip[1],
+                'blade_3': aero_dist_tip[2],
+                'blade_avg': aero_dist_tip[3]
+            },
+            'blade_twist': {
+                'blade_1': twist_1,
+                'blade_2': twist_2,
+                'blade_3': twist_3,
+                'blade_avg': twist_avg
+            },
+            'tower_vibration': {
+                'max_vibration': tower_max,
+                'main_vibration_freq': tower_freq
+            }
+        }
+    }
+
+
+    # 获取当前程序的绝对路径
+    python_interpreter_path = sys.executable
+    project_directory = os.path.dirname(python_interpreter_path)
+    data_folder = os.path.join(project_directory, 'data')
+    # 检查data文件夹是否存在,如果不存在则创建
+    if not os.path.exists(data_folder):
+        os.makedirs(data_folder)
+
+    # CSV文件路径
+    csv_file_path = os.path.join(data_folder, 'history_data.csv')
+    # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件
+    if not os.path.exists(csv_file_path):
+        pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
+                              '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
+                              '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
+                              '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
+                              '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
+
+    df_new_row.to_csv(csv_file_path, mode='a', header=False, index=False)
+    print(csv_file_path)
+
+    return json_output
+
+
+def process_data(file_path):
+
+    """
+    打开、解决时间重置、按时间清洗异常值、分列数据
+    """
+
+    # 读取第2、4、9列的数据
+    data = pd.read_csv(file_path, usecols=[1, 3, 8], header=None, engine='c')
+    data = data.head(int(len(data) * 0.95))
+    print(len(data))
+
+    '''
+    # 绘制原始数据图
+    # 只取前1%的数据
+    # data = data.head(int(len(data)* 0.01))
+    data.columns = ['time', 'distance1', 'distance2']
+    plt.figure(figsize=(300, 150))
+    sns.scatterplot(data=data, x='time', y='distance1', s=50, color='green')
+    sns.scatterplot(data=data, x='time', y='distance2', s=50, color='red')
+    abxy = plt.gca()  # 获取当前坐标轴对象
+    plt.grid(linewidth=2)  # 设置网格线宽度为2
+    abxy.xaxis.set_major_locator(MaxNLocator(nbins=100))  # 设置x轴主刻度的最大数量为10
+    plt.xlabel('时间', fontsize=16, fontweight='bold')  # 添加x轴标签
+    plt.ylabel('距离(m)', fontsize=16, fontweight='bold')  # 添加y轴标签
+    abxy.tick_params(axis='x', labelsize=14, labelcolor='black', width=2)  # 设置x轴刻度标签
+    abxy.tick_params(axis='y', labelsize=14, labelcolor='black', width=2)  # 设置y轴刻度标签
+    plt.savefig(f"{"original"}.png", dpi=100, pil_kwargs={"icc_profile": False})
+    plt.close()
+    '''
+
+    # 找到第一列中最大值和最小值的位置
+    max_value = data.iloc[:, 0].max()
+    max_index = data.iloc[:, 0].idxmax()
+    min_index = data.iloc[:, 0].idxmin()
+
+    # 检查最小值的位置是否是最大值位置的下一个
+    if min_index == max_index + 1:
+        # 将最小值及其之后的所有值都加上最大值
+        data.iloc[min_index:, 0] += max_value
+
+    # 按时间列筛选清洗异常值
+    last_time = data.iloc[-1, 0]
+    first_time = data.iloc[0, 0]
+    filtered_data = data[(data.iloc[:, 0] > last_time) & (data.iloc[:, 0] < first_time)]
+    print(f'时间列异常数据: {filtered_data}')
+    print(f'起止时间: {first_time}, {last_time}')
+    data = data[data.iloc[:, 0] >= first_time]
+    data = data[data.iloc[:, 0] <= last_time]
+    data.reset_index(drop=True, inplace=True)
+    # 计算最小值
+    min_time = data.iloc[:, 0].min()
+    data.iloc[:, 0] -= min_time
+
+    # 分为两组数据
+    data_1 = data.iloc[:, [0, 1]]
+    data_2 = data.iloc[:, [0, 2]]
+
+    # 分别命名列
+    data_1.columns = ['time', 'distance']
+    data_2.columns = ['time', 'distance']
+
+
+    return data_1, data_2
+
+
+def tower_filter(data_group: pd.DataFrame, noise_threshold: float):
+
+    """
+    对轮毂中心数据进行降噪,和前项填充
+    :param data_group: process_data计算完成后轮毂中心的数据。
+    :param noise_threshold: 去掉占比小于noise_threshold的数据。
+    :return: filtered_data:降噪后的数据
+    """
+
+    print('正在进行数据清洗......')
+    time.sleep(1)
+
+    # 计算distance的分布
+    distance_counts = data_group['distance'].value_counts(normalize=True)
+    noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
+    noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
+    data_group.loc[noise_indices, 'distance'] = np.nan
+
+    # 选择频率最大的5个值
+    top_5_distances = distance_counts.head(5).index
+    mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
+    data_group.loc[(data_group['distance'] < mean_values-20) | (
+                data_group['distance'] > mean_values*1.1), 'distance'] = np.nan
+
+    nan_count = data_group['distance'].isna().sum()
+    all_count = data_group.shape[0]
+    print(f"中值是:{mean_values},替换为NaN的distance值的数量是: {nan_count}, 总数量是: {all_count},"
+          f"占比: {nan_count / all_count * 100:.2f}%")
+
+    # 前向填充
+    data_group['distance'] = data_group['distance'].fillna(method='ffill')
+    filtered_data = data_group
+
+    return filtered_data
+
+
+
+def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distance: float):
+
+    """
+    对数据进行降噪,和前项填充;计算数据的周期节点,叶片前缘突变点、后缘突变点
+    :param data_group: process_data计算完成后的数据。
+    :param noise_threshold: 去掉占比小于noise_threshold的数据。
+    :param min_distance: 区分叶片和塔筒的距离差值。
+    :return: start_points:周期开始点, end_points:周期结束点, filtered_data:降噪后的数据
+    """
+
+    print('正在计算周期节点......')
+    time.sleep(1)
+
+    # 计算distance的分布
+    distance_counts = data_group['distance'].value_counts(normalize=True)
+    noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
+    noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
+    data_group.loc[noise_indices, 'distance'] = np.nan
+
+    # 选择频率最大的5个值
+    top_5_distances = distance_counts.head(5).index
+    mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
+    data_group.loc[(data_group['distance'] < mean_values-20) | (
+                data_group['distance'] > mean_values*1.1), 'distance'] = np.nan
+
+    nan_count = data_group['distance'].isna().sum()
+    all_count = data_group.shape[0]
+    print(f"中值是:{mean_values},替换为NaN的distance值的数量是: {nan_count}, 总数量是: {all_count},"
+          f"占比: {nan_count / all_count * 100:.2f}%")
+
+    # 前向填充
+    data_group['distance'] = data_group['distance'].fillna(method='ffill')
+    filtered_data = data_group
+
+    # 计算相邻两行distance的差值
+    filtered_data['distance_diff'] = filtered_data['distance'].diff()
+    large_diff_indices = filtered_data[filtered_data['distance_diff'] > min_distance].index
+    small_diff_indices = filtered_data[filtered_data['distance_diff'] < -min_distance].index
+    filtered_data = filtered_data.drop(columns=['distance_diff'])
+
+    start_points = pd.DataFrame()
+    end_points = pd.DataFrame()
+
+    # 遍历所有差值大于的行
+    for idx in large_diff_indices:
+        # 获取当前行的 distance 值
+        current_distance = filtered_data.loc[idx, 'distance']
+
+        next_rows_large = filtered_data.loc[idx - 1000: idx - 1]
+
+        # 检查是否任意 distance 的值小于 current_distance - 2
+        if next_rows_large['distance'].le(current_distance - min_distance).all():
+            # 如果都小于,则将当前行和下一行添加到 special_points 中
+            end_points = pd.concat([end_points, filtered_data.loc[[idx - 1]]])
+
+    for idx in small_diff_indices:
+        # 获取当前行的 distance 值
+        current_distance = filtered_data.loc[idx - 1, 'distance']
+
+        next_rows_small = filtered_data.iloc[idx: idx + 1000]
+
+        # 检查是否任意 distance 的值小于 current_distance - 2
+        if next_rows_small['distance'].le(current_distance - min_distance).all():
+            # 如果都小于,则将当前行和下一行添加到 special_points 中
+            start_points = pd.concat([start_points, filtered_data.loc[[idx]]])
+
+
+    if end_points.iloc[0, 0] < start_points.iloc[0, 0]:
+        end_points = end_points.drop(end_points.index[0])
+    if end_points.iloc[-1, 0] < start_points.iloc[-1, 0]:
+        start_points = start_points.drop(start_points.index[-1])
+    else:
+        pass
+
+    return start_points, end_points, filtered_data
+
+
+def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame) \
+        -> Tuple[List[pd.DataFrame], List[pd.DataFrame], List[pd.DataFrame], int]:
+
+    """
+    提取每个叶片的数据并归一化,输出散点图和拟合图
+    :param data_group: cycle_calculate计算完成后的数据。
+    :param start_points: 所有每个周期开始点,叶片前缘突变点。
+    :param end_points: 叶片后缘突变点。
+    :return: turbines_processed: 每个叶片的拟合数据,
+             turbines_scattered: 每个叶片的散点数据,
+             border_rows: 每个叶片的2个边缘数据,
+             normalize_cycle: 周期长度
+    """
+
+    print('正在进行各周期归一化......')
+    time.sleep(1)
+
+    combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
+    # 检查排序后的数据从start开始,end结束
+    if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
+        combined_df_sorted = combined_df_sorted.iloc[1:]
+    if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
+        combined_df_sorted = combined_df_sorted.iloc[:-1]
+    combined_df_sorted.reset_index(drop=True, inplace=True)
+
+    # 将 start_points 中的时间点转换为列表
+    start_times = combined_df_sorted['time'].tolist()
+    print('本次测量风机完整旋转圈数:'+ str(len(start_times) / 2))
+    time.sleep(1)
+
+
+    normalize_cycle = start_times[1] - start_times[0]
+    full_cycle = int((start_times[2] - start_times[0]) * 3)
+    turbines = [pd.DataFrame() for _ in range(3)]
+
+    # 遍历所有起始时间点
+    for i in range(0, len(start_times), 2):
+
+        # 获取当前起始和结束时间点
+        start_time = start_times[i]
+        end_time = start_times[i + 1]
+
+        # 根据当前起始时间点和结束时间点对数据进行分段
+        segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
+
+        if segment is None:
+            pass
+        else:
+        # 周期归一化
+            ratio = (end_time - start_time) / normalize_cycle
+            segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
+            # segment.loc[:, 'distance'] = ff.butter_lowpass_filter(segment['distance'], cutoff_low, fs)
+
+            # 将结果添加到相应的 turbine 数据框中
+            turbines[i % 3] = pd.concat([turbines[i % 3], segment])
+
+
+    # 数据分组清洗、求平均
+    turbines_processed = []
+    turbines_scattered = []
+    sd_time = [-1, -1]
+    time_list = list(range(0, normalize_cycle, 1000))
+    # time_list = [(i + 1) * normalize_cycle / fs * 100 for i in range(fs * 100)]  # 生成时间序列
+
+    for turbine in turbines:
+        # 按时间排序
+        turbine_sorted = turbine.sort_values(by='time').reset_index(drop=True)
+
+        # 找到time列的第一个值
+        first_time = turbine_sorted['time'].iloc[0]
+
+        # 分组,时间列每1000为一组(每40个时间点一组)
+        bins = list(range(int(first_time), int(turbine_sorted['time'].max()), 1000))
+        # 原始代码
+        # bins = list(range(int(first_time), int(turbine_sorted['time'].max()) + len(start_times), int(fs / 50)))
+        grouped = turbine_sorted.groupby(pd.cut(turbine_sorted['time'], bins=bins, right=False))
+
+        # 初始化一个空的 DataFrame 用于存储处理后的数据
+        processed_df = pd.DataFrame()
+        scattered_df = pd.DataFrame()
+        mean_points = []
+        diff_points = []
+
+        # 对每个组进行处理
+        for _, group in grouped:
+            # 去除 distance 最大和最小的前5%
+            quantile_5 = group['distance'].quantile(0.05)
+            quantile_95 = group['distance'].quantile(0.95)
+            filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
+
+            # 计算均值
+            mean_point = filtered_group['distance'].mean()
+            mean_points.append(mean_point)
+
+        # 遍历 mean_points 列表,计算每个元素与其下一个元素的差值
+        for i in range(len(mean_points) - 1):
+            diff = abs(mean_points[i + 1] - mean_points[i])
+            diff_points.append(diff)
+
+        start_index = int(len(diff_points) * 0.05)
+        end_index = int(len(diff_points) * 0.95)
+        subset1 = diff_points[start_index:end_index]
+        sdr_diff = np.max(subset1) * 1.1
+
+        # 找到第一个和最后一个小于 sdr_diff 的序号
+        first_index = np.where(diff_points < sdr_diff)[0][0]
+        last_index = np.where(diff_points < sdr_diff)[0][-1]
+
+        '''
+        # 创建一个总图中有3个分图的形式
+        fig, axs = plt.subplots(1, 1, figsize=(15, 9))
+        plt.subplots_adjust(hspace=2)
+
+        # 绘制 diff_points 的折线图
+        axs.plot(diff_points, label='Diff Points', color='red', marker='x', markersize=2)
+        axs.axhline(y=sdr_diff, color='red', linestyle='--')
+        axs.legend()
+        axs.set_title('Diff Points')
+        axs.set_xlabel('Index')
+        axs.set_ylabel('Value')
+
+        # 显示图形
+        plt.tight_layout()
+        plt.show()
+        '''
+
+        for index, (bin, group) in enumerate(grouped):
+
+            # 去除 distance 最大和最小的前5%
+            quantile_5 = group['distance'].quantile(0.05)
+            quantile_95 = group['distance'].quantile(0.95)
+            filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
+
+
+            if first_index <= index < last_index: # 如果斜率小于,则认为该组数据不是突变点
+
+                # 计算中点
+                mid_point = filtered_group.mean()
+                # 将中点转换为 DataFrame 并添加到处理后的 DataFrame 中
+                mid_point_df = pd.DataFrame([mid_point])
+                mid_point_df.iloc[0, 0] = time_list[index]
+                processed_df = pd.concat([processed_df, mid_point_df], ignore_index=True)
+                scattered_df = pd.concat([scattered_df, filtered_group], ignore_index=True)
+            else: pass
+
+        # 找到time列的最小值和最大值
+        min_time = processed_df['time'].min()
+        max_time = processed_df['time'].max()
+
+        if sd_time == [-1, -1]:
+            sd_time = [min_time, max_time]
+        elif sd_time[0] < min_time:
+            sd_time[0] = min_time
+        elif sd_time[1] > max_time:
+            sd_time[1] = max_time
+
+        # 将处理后的 DataFrame 添加到列表中
+        turbines_processed.append(processed_df)
+        turbines_scattered.append(scattered_df)
+
+
+    border_rows = []
+    for i, turbine in enumerate(turbines_processed):
+        # 找到离 sd_time[0] 最近的行的索引
+        closest_index_0 = (turbine['time'] - sd_time[0]).abs().idxmin()
+        turbine.at[closest_index_0, 'time'] = sd_time[0]
+        sd_time_row_0 = turbine.loc[closest_index_0]
+
+        # 找到离 sd_time[1] 最近的行的索引
+        closest_index_1 = (turbine['time'] - sd_time[1]).abs().idxmin()
+        turbine.at[closest_index_1, 'time'] = sd_time[1]
+        sd_time_row_1 = turbine.loc[closest_index_1]
+
+        # 切片 turbine,从 closest_index_0 到 closest_index_1
+        turbines_processed[i] = turbine.iloc[closest_index_0:closest_index_1 + 1].reset_index(drop=True)
+
+        sd_time_rows_turbine = pd.concat([pd.DataFrame([sd_time_row_0]), pd.DataFrame([sd_time_row_1])]
+                                         , ignore_index=True)
+        border_rows.append(sd_time_rows_turbine)
+
+    time.sleep(1)
+
+    return turbines_processed, turbines_scattered, border_rows, full_cycle
+
+
+
+def blade_shape(turbines_processed: List[pd.DataFrame]):
+
+    """
+    计算叶片平均形状、叶片形状偏差。
+    :param turbines_processed:叶片拟合曲线数据,来自data_normalize
+    :return: 叶片平均形状、叶片形状偏差
+    """
+
+    print('正在进行叶片外形偏差计算......')
+
+    row_counts = [df.shape[0] for df in turbines_processed]
+    num_rows = min(row_counts)
+
+    # 创建一个新的data.frame用于保存结果
+    turbine_avg = pd.DataFrame(index=range(num_rows), columns=['time', 'distance'])
+    turbine_diff = [pd.DataFrame(index=range(num_rows), columns=['time', 'distance']) for _ in turbines_processed]
+
+    # 遍历每一行
+    for i in range(num_rows):
+        distances = [df.loc[i, 'distance'] for df in turbines_processed]  # 获取每个data.frame的distance列的值
+        avg_distance = sum(distances) / len(distances)  # 计算distance列的平均值
+        time_value = turbines_processed[0].loc[i, 'time']  # 获取time列的值
+        turbine_avg.loc[i, 'time'] = time_value
+        turbine_avg.loc[i, 'distance'] = avg_distance
+
+        for j in range(len(distances)):
+            distances[j] = distances[j] - avg_distance
+            turbine_diff[j].loc[i, 'time'] = time_value
+            turbine_diff[j].loc[i, 'distance'] = distances[j]
+
+    time.sleep(10)
+
+    return turbine_avg, turbine_diff
+
+
+def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
+
+    """
+    将叶尖测量数据和叶根、轮毂中心的测量原点归一化。
+    :param tip_border_rows: 3个叶尖边缘数据
+    :param tip_angle: 叶尖测量俯仰角
+    :return: 归一化后叶尖数据,叶尖俯仰角
+    """
+
+    tip_angle1 = np.deg2rad(tip_angle)
+    tip_angle_list = []
+    for turbine in tip_border_rows:
+
+        tip_angle_cal = np.arctan((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
+                                  np.cos(tip_angle1) * turbine['distance'])
+        turbine['distance'] = (turbine['distance']**2 + 0.0057881664 -
+                               0.15216*turbine['distance']*np.sin(tip_angle1)) ** 0.5
+
+        tip_angle_list.append(tip_angle_cal)
+
+    tip_angle_new = float(np.mean(tip_angle_list))
+    tip_angle_new1 = np.rad2deg(tip_angle_new)
+    print('叶尖俯仰角: ' + str(tip_angle_new1))
+    
+    return tip_border_rows, tip_angle
+
+
+
+def radius_cal(border_rows, meas_angle, cen_dist, cen_angle, angle_main, angle_rotate):
+
+    """
+    计算测量点处的旋转半径。
+    :param border_rows: 三个叶片的边界
+    :param meas_angle: 回波俯仰角
+    :param cen_dist: 轮毂中心距离
+    :param cen_angle: 轮毂中心俯仰角
+    :param angle_main: 主轴倾角
+    :param angle_rotate: 锥角
+    :return: 旋转半径
+    """
+
+    aero_dist = (pd.concat([df['distance'] for df in border_rows]).mean())
+    cen_x = np.cos(np.deg2rad(cen_angle)) * cen_dist
+    cen_y = np.sin(np.deg2rad(cen_angle)) * cen_dist
+    aero_x = np.cos(np.deg2rad(meas_angle)) * aero_dist
+    aero_y = np.sin(np.deg2rad(meas_angle)) * aero_dist
+    theta_4 = np.tan(np.pi - np.deg2rad(angle_main))
+    theta_5 = np.tan(np.pi/2 - np.deg2rad(angle_main) + np.deg2rad(angle_rotate))
+
+    if np.abs(np.deg2rad(angle_main) - np.deg2rad(angle_rotate)) < 0.0001:
+        radius = np.abs((cen_y - aero_y) - theta_4 * (cen_x - aero_x))
+
+    else:
+        radius = (np.abs((theta_4 * (cen_x - aero_x) - (cen_y - aero_y))/(theta_4 - theta_5))
+                  * (1 + theta_5 ** 2) ** 0.5)
+    print('半径:' + str(radius))
+
+    return radius
+
+
+
+def blade_angle_aero_dist(border_rows: List[pd.DataFrame], radius: float, full_cycle: int,
+                                   tower_dist: float, v_angle: float):
+
+    """
+    计算叶片相对桨距角和叶片净空距离。
+    :param border_rows: 三个叶片的边界
+    :param radius: 旋转半径
+    :param full_cycle: 全周期
+    :param tower_dist: 塔筒距离
+    :param v_angle: 俯仰角度
+    :return: 绝对桨距角,净空距离,叶片线速度
+    """
+
+    print('正在进行相对桨距角和叶片净空距离计算......')
+    v_speed = 2 * np.pi * radius / full_cycle  # 叶片线速度m/(1计时器单位)
+    pitch_angle_list = []
+    aero_dist_list = []
+    cen_blade = []
+    for turbine in border_rows:
+
+        diff_time = turbine.iloc[1, 0] - turbine.iloc[0, 0]
+
+        diff_len = turbine.iloc[1, 1] - turbine.iloc[0, 1]
+        mean_col2 = (turbine.iloc[1, 1] + turbine.iloc[0, 1]) / 2
+        aero_dist = abs(mean_col2 - tower_dist) * np.cos(np.deg2rad(v_angle))
+
+        pitch_angle = np.degrees(np.arctan(diff_len / (diff_time * v_speed)))
+        print('单个叶片绝对桨距角' + str(pitch_angle))
+        pitch_angle_list.append(pitch_angle)
+        aero_dist_list.append(aero_dist)
+        cen_blade.append(mean_col2)
+    pitch_mean = np.mean(pitch_angle_list)
+    pitch_angle_list = [angle - pitch_mean for angle in pitch_angle_list]
+    pitch_angle_list.append(max(pitch_angle_list) - min(pitch_angle_list))
+    aero_dist_list.append(np.mean(aero_dist_list))
+    pitch_angle_list = [round(num, 2) for num in pitch_angle_list]
+    aero_dist_list = [round(num, 2) for num in aero_dist_list]
+
+    print('叶片相对角度偏差:' + '\n' + '叶片1:' + str(pitch_angle_list[0]) + '\n'
+          + '叶片2:' + str(pitch_angle_list[1]) + '\n' + '叶片3:' + str(pitch_angle_list[2])
+          + '\n' + '相对偏差范围:' + str(pitch_angle_list[3]))
+    print('叶片净空距离:' + '\n' + '叶片1:' + str(aero_dist_list[0])  + '\n'
+          + '叶片2:' + str(aero_dist_list[1]) + '\n' + '叶片3:' + str(aero_dist_list[2])
+          + '\n' + '平均净空距离:' + str(aero_dist_list[3]))
+
+    return pitch_angle_list, aero_dist_list, v_speed, cen_blade
+
+
+
+def plot_data(data, plot_type: str, data_name: str):
+
+    """
+    绘制数据图表并保存为文件。
+    :param data: 数据列表,每个元素是一个 DataFrame。
+    :param plot_type: 图表类型,'line' 或 'scatter'。
+    :param data_name: 数据名称,用于生成文件名。
+    """
+
+    print('正在画图......')
+    time.sleep(1)
+    save_name = fr"{data_name}_{plot_type}.png"  # 生成文件名
+    plt.figure(figsize=(300, 150))
+
+    if plot_type == 'line':
+        for df, color in zip(data, ['blue', 'green', 'red']):
+            sns.lineplot(data=df, x='time', y='distance', color=color)
+    elif plot_type == 'scatter':
+        for df, (size, color) in zip(data, [(50, 'blue'), (25, 'green'), (10, 'red')]):
+            sns.scatterplot(data=df, x='time', y='distance', s=size, color=color)
+    else:
+        raise ValueError("plot_type must be either 'line' or 'scatter'")
+
+    axy = plt.gca()  # 获取当前坐标轴对象
+    plt.grid(which='both', linewidth=2)  # 设置网格线宽度为2
+    axy.xaxis.set_major_locator(MaxNLocator(nbins=200))  # 设置x轴主刻度的最大数量为10
+    axy.yaxis.set_major_locator(MaxNLocator(nbins=100))  # 设置y轴主刻度的最大数量为10
+    plt.xlabel('时间', fontsize=100, fontweight='bold')  # 添加x轴标签
+    plt.ylabel('距离(m)', fontsize=100, fontweight='bold')  # 添加y轴标签
+    axy.tick_params(axis='x', labelsize=10, labelcolor='black', width=2)  # 设置x轴刻度标签
+    axy.tick_params(axis='y', labelsize=60, labelcolor='black', width=10)  # 设置y轴刻度标签
+    plt.savefig(save_name)
+    plt.close()
+    abs_path = os.path.abspath(save_name)
+    print(f" {save_name} 已完成")
+
+    return abs_path
+
+
+
+def find_param(path: str):
+
+    """
+    根据文件路径获取参数
+    """
+    path = path.replace('\\', '/')
+    last_slash_index = path.rfind('/')
+    result = path[last_slash_index + 1:]
+
+    underscore_indices = []
+    start = 0
+    while True:
+        index = result.find('_', start)
+        if index == -1:
+            break
+        underscore_indices.append(index)
+        start = index + 1
+
+    wind_name = result[: underscore_indices[0]]
+    turbine_code = result[underscore_indices[0] + 1: underscore_indices[1]]
+    time_code = result[underscore_indices[1] + 1: underscore_indices[2]]
+    sampling_fq = int(result[underscore_indices[2] + 1: underscore_indices[3]])
+    tunnel_1 = float(result[underscore_indices[3] + 1: underscore_indices[4]])
+    tunnel_2 = float(result[underscore_indices[4] + 1: -4])
+
+    dt = datetime.strptime(time_code, "%Y%m%d%H%M%S")
+    standard_time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
+
+    return wind_name, turbine_code, standard_time_str, sampling_fq, tunnel_1, tunnel_2
+
+
+
+def blade_dist_distribute_cal(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame,
+                              tower_dist: float, v_angle: float, blade_cen_dist: list):
+
+    """
+    计算每个叶片每个周期的转速和净空距离
+    :param data_group: cycle_calculate计算完成后的数据。
+    :param start_points: 所有每个周期开始点,叶片前缘突变点。
+    :param end_points: 叶片后缘突变点。
+    :param tower_dist: 塔筒距离。
+    :param v_angle: 测量俯仰角度。
+    :param blade_cen_dist: 叶片内部距离。
+    """
+
+    print('正在进行各周期净空距离计算......')
+    time.sleep(1)
+
+    combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
+    # 检查排序后的数据从start开始,end结束
+    if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
+        combined_df_sorted = combined_df_sorted.iloc[1:]
+    if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
+        combined_df_sorted = combined_df_sorted.iloc[:-1]
+    combined_df_sorted.reset_index(drop=True, inplace=True)
+
+    # 将 start_points 中的时间点转换为列表
+    start_times = combined_df_sorted['time'].tolist()
+
+    normalize_cycle = start_times[1] - start_times[0]
+    tower_clearance = [pd.DataFrame() for _ in range(3)]
+
+    # 遍历所有起始时间点
+    for i in range(0, len(start_times), 2):
+
+        # 获取当前起始和结束时间点
+        start_time = start_times[i]
+        end_time = start_times[i + 1]
+
+        # 根据当前起始时间点和结束时间点对数据进行分段
+        segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
+        min_distance = segment['distance'].min()
+        clearance = np.abs(tower_dist - min_distance - blade_cen_dist[i % 3]) * np.cos(np.deg2rad(v_angle))
+        r_speed = (start_times[i + 2] - start_times[i]) * 3 / 5000000
+
+        # 周期归一化
+        ratio = (end_time - start_time) / normalize_cycle
+        segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
+
+        new_df = pd.DataFrame({
+            'clearance': [clearance],
+            'r_speed': [r_speed]
+        })
+
+        # 将结果添加到相应的 turbine 数据框中
+        tower_clearance[i % 3] = pd.concat([tower_clearance[i % 3], new_df])
+
+    return tower_clearance
+
+
+# measure_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/pl_9_20250514083317_100_24.8_30.06.csv"
+# locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/pl_9_20250514083134_100_33.53_33.53.csv"
+
+# measure_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/p'l_9p_20250514084957_100_27.48_29.47.csv"
+# locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/p'l_9p_20250514084814_100_27.75_32.93.csv"
+
+locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/pl_63_20250513160502_100_41.38_41.38.csv"
+measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/pl_63_20250513160655_100_28.10_35.43.csv"
+
+# locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/平陆_4_20250514073658_100_16.00_20.56.csv"
+# measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250513/平陆_4_20250514073248_100_12.72_17.3.csv"
+
+# locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250416/__20250416134815_50_14.55_17.00.csv"
+# measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250416/__20250416135017_50_11.85_14.31.csv"
+
+
+
+start_t = time.time()  # 记录开始时间
+data_path = [locate_path, measure_path, 5, 3.5]
+list_1 = data_analyse(data_path)
+# print(list_1)
+print(f"耗时: {time.time() - start_t:.2f} 秒")
+

+ 817 - 0
data_clean.py

@@ -0,0 +1,817 @@
+import os
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from typing import Tuple, List
+import warnings
+import time
+import sys
+import frequency_filter as ff
+from datetime import datetime
+
+warnings.filterwarnings("ignore", category=FutureWarning)  # 忽略特定警告
+plt.rcParams['font.sans-serif'] = ['SimHei']  # 使用黑体
+plt.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题
+
+
+def result_main():
+    """
+    创建data目录,返回历史分析数据存放的文件路径
+    """
+
+    # 获取当前程序的绝对路径
+    python_interpreter_path = sys.executable
+    project_directory = os.path.dirname(python_interpreter_path)
+    data_folder = os.path.join(project_directory, 'data')
+    # 检查data文件夹是否存在,如果不存在则创建
+    if not os.path.exists(data_folder):
+        os.makedirs(data_folder)
+
+    # CSV文件路径
+    csv_file_path = os.path.join(data_folder, 'history_data.csv')
+    # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件
+    if not os.path.exists(csv_file_path):
+        pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
+                              '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
+                              '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
+                              '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
+                              '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
+
+    return csv_file_path
+
+
+def data_analyse(path: List[str]):
+    """
+    创建data目录,把分析数据保存到历史记录中,同时返回全量分析数据
+    """
+    locate_file = path[0]
+    measure_file = path[1]
+    noise_reduction = 0.000001  # 如果一个距离值的所有样本量小于总样本量的noise_reduction,则被去掉
+    min_difference = 1.5  # 如果相邻2个点的距离差大于min_difference,则被注意是否是周期节点
+    angle_cone = float(path[2])  # 锥角
+    axial_inclination = float(path[3])  # 轴向倾角
+    return_list = []
+
+    wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen = find_param(locate_file)
+    wind_name_1, turbine_code_1, time_code, sampling_fq_1, angle_tip, angle_root = find_param(measure_file)
+
+    sampling_fq_1 = sampling_fq_1 * 1000
+    sampling_fq = sampling_fq * 1000
+
+    data_nan, data_cen = process_data(locate_file)
+    data_tip, data_root = process_data(measure_file)
+
+    start_tip, end_tip, filtered_data_tip = cycle_calculate(data_tip, noise_reduction, min_difference)
+    start_root, end_root, filtered_data_root = cycle_calculate(data_root, noise_reduction, min_difference)
+    filtered_data_cen = tower_filter(data_cen, noise_reduction)
+    dist_cen = np.mean(filtered_data_cen.iloc[:, 1].tolist())
+
+    if end_tip.iloc[0, 0] < start_root.iloc[0, 0]:
+        start_tip = start_tip.drop(start_tip.index[0])
+        end_tip = end_tip.drop(end_tip.index[0])
+    if start_root.iloc[0, 0] < start_tip.iloc[0, 0] < end_tip.iloc[0, 0] < end_root.iloc[0, 0]:
+        pass
+    else:
+        raise ValueError("The elements are not in the expected order.")
+
+    tower_dist_tip = ff.tower_cal(filtered_data_tip, start_tip, end_tip, sampling_fq_1)
+    tower_dist_root = ff.tower_cal(filtered_data_root, start_root, end_root, sampling_fq_1)
+    lowpass_data, fft_x, fft_y, tower_freq, tower_max = ff.process_fft(filtered_data_cen, sampling_fq)
+
+    result_line_tip, result_scatter_tip, border_rows_tip, cycle_len_tip \
+        = data_normalize(filtered_data_tip, start_tip, end_tip, sampling_fq_1)
+    result_line_root, result_scatter_root, border_rows_root, cycle_len_root \
+        = data_normalize(filtered_data_root, start_root, end_root, sampling_fq_1)
+
+    result_avg_tip, result_diff_tip = blade_shape(result_line_tip)
+    result_avg_root, result_diff_root = blade_shape(result_line_root)
+
+    border_rows_tip_new, angle_tip_new = coordinate_normalize(border_rows_tip, angle_tip)
+
+    tip_r = radius_cal(border_rows_tip_new, angle_tip_new, dist_cen, angle_cen, axial_inclination, angle_cone)
+    root_r = radius_cal(border_rows_root, angle_root, dist_cen, angle_cen, axial_inclination, angle_cone)
+
+    pitch_angle_tip, aero_dist_tip, v_speed_tip, cen_blade_tip = (
+        blade_angle_aero_dist(border_rows_tip, tip_r, cycle_len_tip, tower_dist_tip, angle_tip_new))
+    pitch_angle_root, aero_dist_root, v_speed_root = (
+        blade_angle_aero_dist(border_rows_root, root_r, cycle_len_root, tower_dist_root, angle_root))
+
+    dist_distribute = blade_dist_distribute_cal(filtered_data_tip, start_tip, end_tip,
+                                                tower_dist_tip, angle_tip_new, cen_blade_tip)
+
+
+    for df in result_line_tip:
+        first_column = df.iloc[:, 0]
+        df.iloc[:, 0] = first_column * v_speed_tip
+
+    for df in result_line_root:
+        first_column = df.iloc[:, 0]
+        df.iloc[:, 0] = first_column * v_speed_root
+
+    avg_tip = result_avg_tip.iloc[:, 0]
+    result_avg_tip.iloc[:, 0] = avg_tip * v_speed_tip
+    avg_root = result_avg_root.iloc[:, 0]
+    result_avg_root.iloc[:, 0] = avg_root * v_speed_root
+
+    twist_1 = round(np.abs(pitch_angle_root[0] - pitch_angle_tip[0]), 2)
+    twist_2 = round(np.abs(pitch_angle_root[1] - pitch_angle_tip[1]), 2)
+    twist_3 = round(np.abs(pitch_angle_root[2] - pitch_angle_tip[2]), 2)
+    twist_avg = round((twist_1 + twist_2 + twist_3) / 3, 2)
+
+    sampling_num = int(0.01 * sampling_fq_1)
+    data_tip.iloc[:, 0] = data_tip.iloc[:, 0] / 5000000
+    data_root.iloc[:, 0] = data_root.iloc[:, 0] / 5000000
+    lowpass_data.iloc[:, 0] = lowpass_data.iloc[:, 0] / 5000000
+
+
+    return_list.append(time_code)
+    return_list.append(wind_name)
+    return_list.append(turbine_code)
+    return_list.append(sampling_fq_1)
+    return_list.append(pitch_angle_root[0])
+    return_list.append(pitch_angle_root[1])
+    return_list.append(pitch_angle_root[2])
+    return_list.append(pitch_angle_root[3])
+    return_list.append(aero_dist_tip[0])
+    return_list.append(aero_dist_tip[1])
+    return_list.append(aero_dist_tip[2])
+    return_list.append(aero_dist_tip[3])
+    return_list.append(twist_1)
+    return_list.append(twist_2)
+    return_list.append(twist_3)
+    return_list.append(twist_avg)
+    return_list.append(tower_max)
+    return_list.append(tower_freq)
+
+
+    # 将return_list转换为DataFrame并追加到CSV文件
+    df_new_row = pd.DataFrame([return_list],
+                              columns=['时间', '场站', '风机编号', '采样频率',
+                                       '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
+                                       '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
+                                       '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
+                                       '振动幅值', '振动主频'])
+
+    json_output = {
+        'original_plot': {
+            'blade_tip': {
+                'xdata': data_tip.iloc[:, 0].tolist()[::sampling_num],
+                'ydata': data_tip.iloc[:, 1].tolist()[::sampling_num]
+            },
+            'blade_root': {
+                'xdata': data_root.iloc[:, 0].tolist()[::sampling_num],
+                'ydata': data_root.iloc[:, 1].tolist()[::sampling_num]
+            }
+        },
+        'fft_plot': {
+            'lowpass': {
+                'xdata': lowpass_data['time'].tolist()[::sampling_num],
+                'ydata': lowpass_data['distance_filtered'].tolist()[::sampling_num],
+                'xmax': max(lowpass_data['time'].tolist()),
+                'xmin': min(lowpass_data['time'].tolist()),
+                'ymax': max(lowpass_data['distance_filtered'].tolist()),
+                'ymin': min(lowpass_data['distance_filtered'].tolist())
+            },
+            'fft': {
+                'xdata': fft_x,
+                'ydata': fft_y,
+                'xmax': max(fft_x),
+                'xmin': min(fft_x),
+                'ymax': max(fft_y),
+                'ymin': min(fft_y)
+            }
+        },
+        'blade_tip': {
+            'first_blade': {
+                'xdata': result_line_tip[0].iloc[:, 0].tolist(),
+                'ydata': result_line_tip[0].iloc[:, 1].tolist()
+            },
+            'second_blade': {
+                'xdata': result_line_tip[1].iloc[:, 0].tolist(),
+                'ydata': result_line_tip[1].iloc[:, 1].tolist()
+            },
+            'third_blade': {
+                'xdata': result_line_tip[2].iloc[:, 0].tolist(),
+                'ydata': result_line_tip[2].iloc[:, 1].tolist()
+            },
+            'avg_blade': {
+                'xdata': result_avg_tip.iloc[:, 0].tolist(),
+                'ydata': result_avg_tip.iloc[:, 1].tolist()
+            }
+        },
+        'blade_root': {
+            'first_blade': {
+                'xdata': result_line_root[0].iloc[:, 0].tolist(),
+                'ydata': result_line_root[0].iloc[:, 1].tolist()
+            },
+            'second_blade': {
+                'xdata': result_line_root[1].iloc[:, 0].tolist(),
+                'ydata': result_line_root[1].iloc[:, 1].tolist()
+            },
+            'third_blade': {
+                'xdata': result_line_root[2].iloc[:, 0].tolist(),
+                'ydata': result_line_root[2].iloc[:, 1].tolist()
+            },
+            'avg_blade': {
+                'xdata': result_avg_root.iloc[:, 0].tolist(),
+                'ydata': result_avg_root.iloc[:, 1].tolist()
+            }
+        },
+        'dist_distribution': {
+            'first_blade': {
+                'xdata': dist_distribute[0].iloc[:, 0].tolist(),
+                'ydata': dist_distribute[0].iloc[:, 1].tolist()
+            },
+            'second_blade': {
+                'xdata': dist_distribute[1].iloc[:, 0].tolist(),
+                'ydata': dist_distribute[1].iloc[:, 1].tolist()
+            },
+            'third_blade': {
+                'xdata': dist_distribute[2].iloc[:, 0].tolist(),
+                'ydata': dist_distribute[2].iloc[:, 1].tolist()
+            }
+        },
+        'analyse_table': {
+            'pitch_angle_diff': {
+                'blade_1': pitch_angle_root[0],
+                'blade_2': pitch_angle_root[1],
+                'blade_3': pitch_angle_root[2],
+                'blade_relate': pitch_angle_root[3]
+            },
+            'aero_dist': {
+                'blade_1': aero_dist_tip[0],
+                'blade_2': aero_dist_tip[1],
+                'blade_3': aero_dist_tip[2],
+                'blade_avg': aero_dist_tip[3]
+            },
+            'blade_twist': {
+                'blade_1': twist_1,
+                'blade_2': twist_2,
+                'blade_3': twist_3,
+                'blade_avg': twist_avg
+            },
+            'tower_vibration': {
+                'max_vibration': tower_max,
+                'main_vibration_freq': tower_freq
+            }
+        }
+    }
+
+    # 获取当前程序的绝对路径
+    python_interpreter_path = sys.executable
+    project_directory = os.path.dirname(python_interpreter_path)
+    data_folder = os.path.join(project_directory, 'data')
+    # 检查data文件夹是否存在,如果不存在则创建
+    if not os.path.exists(data_folder):
+        os.makedirs(data_folder)
+
+    # CSV文件路径
+    csv_file_path = os.path.join(data_folder, 'history_data.csv')
+    # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件
+    if not os.path.exists(csv_file_path):
+        pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率',
+                              '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差',
+                              '叶片1净空值', '叶片2净空值', '叶片3净空值', '平均净空值',
+                              '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转',
+                              '振动幅值', '振动主频']).to_csv(csv_file_path, index=False)
+
+    df_new_row.to_csv(csv_file_path, mode='a', header=False, index=False)
+
+    return json_output
+
+
+def process_data(file_path):
+
+    """
+    打开、解决时间重置、按时间清洗异常值、分列数据
+    """
+
+    # 读取第2、4、9列的数据
+    data = pd.read_csv(file_path, usecols=[1, 3, 8], header=None, engine='c')
+    data = data.head(int(len(data) * 0.95))
+
+    '''
+    # 绘制原始数据图
+    # 只取前1%的数据
+    # data = data.head(int(len(data)* 0.01))
+    data.columns = ['time', 'distance1', 'distance2']
+    plt.figure(figsize=(300, 150))
+    sns.scatterplot(data=data, x='time', y='distance1', s=50, color='green')
+    sns.scatterplot(data=data, x='time', y='distance2', s=50, color='red')
+    abxy = plt.gca()  # 获取当前坐标轴对象
+    plt.grid(linewidth=2)  # 设置网格线宽度为2
+    abxy.xaxis.set_major_locator(MaxNLocator(nbins=100))  # 设置x轴主刻度的最大数量为10
+    plt.xlabel('时间', fontsize=16, fontweight='bold')  # 添加x轴标签
+    plt.ylabel('距离(m)', fontsize=16, fontweight='bold')  # 添加y轴标签
+    abxy.tick_params(axis='x', labelsize=14, labelcolor='black', width=2)  # 设置x轴刻度标签
+    abxy.tick_params(axis='y', labelsize=14, labelcolor='black', width=2)  # 设置y轴刻度标签
+    plt.savefig(f"{"original"}.png", dpi=100, pil_kwargs={"icc_profile": False})
+    plt.close()
+    '''
+
+    # 找到第一列中最大值和最小值的位置
+    max_value = data.iloc[:, 0].max()
+    max_index = data.iloc[:, 0].idxmax()
+    min_index = data.iloc[:, 0].idxmin()
+
+    # 检查最小值的位置是否是最大值位置的下一个
+    if min_index == max_index + 1:
+        # 将最小值及其之后的所有值都加上最大值
+        data.iloc[min_index:, 0] += max_value
+
+    # 按时间列筛选清洗异常值
+    last_time = data.iloc[-1, 0]
+    first_time = data.iloc[0, 0]
+    data = data[data.iloc[:, 0] >= first_time]
+    data = data[data.iloc[:, 0] <= last_time]
+    data.reset_index(drop=True, inplace=True)
+    # 计算最小值
+    min_time = data.iloc[:, 0].min()
+    data.iloc[:, 0] -= min_time
+
+    # 分为两组数据
+    data_1 = data.iloc[:, [0, 1]]
+    data_2 = data.iloc[:, [0, 2]]
+
+    # 分别命名列
+    data_1.columns = ['time', 'distance']
+    data_2.columns = ['time', 'distance']
+
+    return data_1, data_2
+
+
+def tower_filter(data_group: pd.DataFrame, noise_threshold: float):
+
+    """
+    对轮毂中心数据进行降噪,和前项填充
+    :param data_group: process_data计算完成后轮毂中心的数据。
+    :param noise_threshold: 去掉占比小于noise_threshold的数据。
+    :return: filtered_data:降噪后的数据
+    """
+
+    time.sleep(1)
+
+    # 计算distance的分布
+    distance_counts = data_group['distance'].value_counts(normalize=True)
+    noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
+    noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
+    data_group.loc[noise_indices, 'distance'] = np.nan
+
+    # 选择频率最大的5个值
+    top_5_distances = distance_counts.head(5).index
+    mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
+    data_group.loc[(data_group['distance'] < mean_values - 20) | (
+            data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
+
+    # 前向填充
+    data_group['distance'] = data_group['distance'].fillna(method='ffill')
+    filtered_data = data_group
+
+    return filtered_data
+
+
+def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distance: float):
+
+    """
+    对数据进行降噪,和前项填充;计算数据的周期节点,叶片前缘突变点、后缘突变点
+    :param data_group: process_data计算完成后的数据。
+    :param noise_threshold: 去掉占比小于noise_threshold的数据。
+    :param min_distance: 区分叶片和塔筒的距离差值。
+    :return: start_points:周期开始点, end_points:周期结束点, filtered_data:降噪后的数据
+    """
+
+    time.sleep(1)
+
+    # 计算distance的分布
+    distance_counts = data_group['distance'].value_counts(normalize=True)
+    noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index
+    noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index
+    data_group.loc[noise_indices, 'distance'] = np.nan
+
+    # 选择频率最大的5个值
+    top_5_distances = distance_counts.head(5).index
+    mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean()
+    data_group.loc[(data_group['distance'] < mean_values - 20) | (
+            data_group['distance'] > mean_values * 1.1), 'distance'] = np.nan
+
+
+    # 前向填充
+    data_group['distance'] = data_group['distance'].fillna(method='ffill')
+    filtered_data = data_group
+
+    # 计算相邻两行distance的差值
+    filtered_data['distance_diff'] = filtered_data['distance'].diff()
+    large_diff_indices = filtered_data[filtered_data['distance_diff'] > min_distance].index
+    small_diff_indices = filtered_data[filtered_data['distance_diff'] < -min_distance].index
+    filtered_data = filtered_data.drop(columns=['distance_diff'])
+
+    start_points = pd.DataFrame()
+    end_points = pd.DataFrame()
+
+    # 遍历所有差值大于的行
+    for idx in large_diff_indices:
+        # 获取当前行的 distance 值
+        current_distance = filtered_data.loc[idx, 'distance']
+
+        next_rows_large = filtered_data.loc[idx - 1000: idx - 1]
+
+        # 检查是否任意 distance 的值小于 current_distance - 2
+        if next_rows_large['distance'].le(current_distance - min_distance).all():
+            # 如果都小于,则将当前行和下一行添加到 special_points 中
+            end_points = pd.concat([end_points, filtered_data.loc[[idx - 1]]])
+
+    for idx in small_diff_indices:
+        # 获取当前行的 distance 值
+        current_distance = filtered_data.loc[idx - 1, 'distance']
+
+        next_rows_small = filtered_data.iloc[idx: idx + 1000]
+
+        # 检查是否任意 distance 的值小于 current_distance - 2
+        if next_rows_small['distance'].le(current_distance - min_distance).all():
+            # 如果都小于,则将当前行和下一行添加到 special_points 中
+            start_points = pd.concat([start_points, filtered_data.loc[[idx]]])
+
+    if end_points.iloc[0, 0] < start_points.iloc[0, 0]:
+        end_points = end_points.drop(end_points.index[0])
+    if end_points.iloc[-1, 0] < start_points.iloc[-1, 0]:
+        start_points = start_points.drop(start_points.index[-1])
+    else:
+        pass
+
+    return start_points, end_points, filtered_data
+
+
+def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame, fs) \
+        -> Tuple[List[pd.DataFrame], List[pd.DataFrame], List[pd.DataFrame], int]:
+
+    """
+    提取每个叶片的数据并归一化,输出散点图和拟合图
+    :param data_group: cycle_calculate计算完成后的数据。
+    :param start_points: 所有每个周期开始点,叶片前缘突变点。
+    :param end_points: 叶片后缘突变点。
+    :param fs: 采样频率。
+    :return: turbines_processed: 每个叶片的拟合数据,
+             turbines_scattered: 每个叶片的散点数据,
+             border_rows: 每个叶片的2个边缘数据,
+             normalize_cycle: 周期长度
+    """
+
+    a = fs
+    time.sleep(1)
+
+    combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
+    # 检查排序后的数据从start开始,end结束
+    if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
+        combined_df_sorted = combined_df_sorted.iloc[1:]
+    if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
+        combined_df_sorted = combined_df_sorted.iloc[:-1]
+    combined_df_sorted.reset_index(drop=True, inplace=True)
+
+    # 将 start_points 中的时间点转换为列表
+    start_times = combined_df_sorted['time'].tolist()
+    time.sleep(1)
+
+    normalize_cycle = start_times[1] - start_times[0]
+    full_cycle = int((start_times[2] - start_times[0]) * 3)
+    turbines = [pd.DataFrame() for _ in range(3)]
+
+    # 遍历所有起始时间点
+    for i in range(0, len(start_times), 2):
+
+        # 获取当前起始和结束时间点
+        start_time = start_times[i]
+        end_time = start_times[i + 1]
+
+        # 根据当前起始时间点和结束时间点对数据进行分段
+        segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
+
+        if segment is None:
+            pass
+        else:
+            # 周期归一化
+            ratio = (end_time - start_time) / normalize_cycle
+            segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
+
+            # 将结果添加到相应的 turbine 数据框中
+            turbines[i % 3] = pd.concat([turbines[i % 3], segment])
+
+    # 数据分组清洗、求平均
+    turbines_processed = []
+    turbines_scattered = []
+    sd_time = [-1, -1]
+    time_list = list(range(0, normalize_cycle, 1000))
+
+    for turbine in turbines:
+        # 按时间排序
+        turbine_sorted = turbine.sort_values(by='time').reset_index(drop=True)
+
+        # 找到time列的第一个值
+        first_time = turbine_sorted['time'].iloc[0]
+
+        # 分组,时间列每1000为一组(每40个时间点一组)
+        bins = list(range(int(first_time), int(turbine_sorted['time'].max()), 1000))
+        # 原始代码
+        # bins = list(range(int(first_time), int(turbine_sorted['time'].max()) + len(start_times), int(fs / 50)))
+        grouped = turbine_sorted.groupby(pd.cut(turbine_sorted['time'], bins=bins, right=False))
+
+        # 初始化一个空的 DataFrame 用于存储处理后的数据
+        processed_df = pd.DataFrame()
+        scattered_df = pd.DataFrame()
+        mean_points = []
+        diff_points = []
+
+        # 对每个组进行处理
+        for _, group in grouped:
+            # 去除 distance 最大和最小的前5%
+            quantile_5 = group['distance'].quantile(0.05)
+            quantile_95 = group['distance'].quantile(0.95)
+            filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
+
+            # 计算均值
+            mean_point = filtered_group['distance'].mean()
+            mean_points.append(mean_point)
+
+        # 遍历 mean_points 列表,计算每个元素与其下一个元素的差值
+        for i in range(len(mean_points) - 1):
+            diff = abs(mean_points[i + 1] - mean_points[i])
+            diff_points.append(diff)
+
+        start_index = int(len(diff_points) * 0.05)
+        end_index = int(len(diff_points) * 0.95)
+        subset1 = diff_points[start_index:end_index]
+        sdr_diff = np.max(subset1) * 1.1
+
+        # 找到第一个和最后一个小于 sdr_diff 的序号
+        first_index = np.where(diff_points < sdr_diff)[0][0]
+        last_index = np.where(diff_points < sdr_diff)[0][-1]
+
+        for index, (bin, group) in enumerate(grouped):
+
+            # 去除 distance 最大和最小的前5%
+            quantile_5 = group['distance'].quantile(0.05)
+            quantile_95 = group['distance'].quantile(0.95)
+            filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)]
+
+            if first_index <= index < last_index:  # 如果斜率小于,则认为该组数据不是突变点
+
+                # 计算中点
+                mid_point = filtered_group.mean()
+                # 将中点转换为 DataFrame 并添加到处理后的 DataFrame 中
+                mid_point_df = pd.DataFrame([mid_point])
+                mid_point_df.iloc[0, 0] = time_list[index]
+                processed_df = pd.concat([processed_df, mid_point_df], ignore_index=True)
+                scattered_df = pd.concat([scattered_df, filtered_group], ignore_index=True)
+            else:
+                pass
+
+        # 找到time列的最小值和最大值
+        min_time = processed_df['time'].min()
+        max_time = processed_df['time'].max()
+
+        if sd_time == [-1, -1]:
+            sd_time = [min_time, max_time]
+        elif sd_time[0] < min_time:
+            sd_time[0] = min_time
+        elif sd_time[1] > max_time:
+            sd_time[1] = max_time
+
+        # 将处理后的 DataFrame 添加到列表中
+        turbines_processed.append(processed_df)
+        turbines_scattered.append(scattered_df)
+
+    border_rows = []
+    for i, turbine in enumerate(turbines_processed):
+        # 找到离 sd_time[0] 最近的行的索引
+        closest_index_0 = (turbine['time'] - sd_time[0]).abs().idxmin()
+        turbine.at[closest_index_0, 'time'] = sd_time[0]
+        sd_time_row_0 = turbine.loc[closest_index_0]
+
+        # 找到离 sd_time[1] 最近的行的索引
+        closest_index_1 = (turbine['time'] - sd_time[1]).abs().idxmin()
+        turbine.at[closest_index_1, 'time'] = sd_time[1]
+        sd_time_row_1 = turbine.loc[closest_index_1]
+
+        # 切片 turbine,从 closest_index_0 到 closest_index_1
+        turbines_processed[i] = turbine.iloc[closest_index_0:closest_index_1 + 1].reset_index(drop=True)
+
+        sd_time_rows_turbine = pd.concat([pd.DataFrame([sd_time_row_0]), pd.DataFrame([sd_time_row_1])]
+                                         , ignore_index=True)
+        border_rows.append(sd_time_rows_turbine)
+
+    time.sleep(1)
+
+    return turbines_processed, turbines_scattered, border_rows, full_cycle
+
+
+def blade_shape(turbines_processed: List[pd.DataFrame]):
+
+    """
+    计算叶片平均形状、叶片形状偏差。
+    :param turbines_processed:叶片拟合曲线数据,来自data_normalize
+    :return: 叶片平均形状、叶片形状偏差
+    """
+
+    row_counts = [df.shape[0] for df in turbines_processed]
+    num_rows = min(row_counts)
+
+    # 创建一个新的data.frame用于保存结果
+    turbine_avg = pd.DataFrame(index=range(num_rows), columns=['time', 'distance'])
+    turbine_diff = [pd.DataFrame(index=range(num_rows), columns=['time', 'distance']) for _ in turbines_processed]
+
+    # 遍历每一行
+    for i in range(num_rows):
+        distances = [df.loc[i, 'distance'] for df in turbines_processed]  # 获取每个data.frame的distance列的值
+        avg_distance = sum(distances) / len(distances)  # 计算distance列的平均值
+        time_value = turbines_processed[0].loc[i, 'time']  # 获取time列的值
+        turbine_avg.loc[i, 'time'] = time_value
+        turbine_avg.loc[i, 'distance'] = avg_distance
+
+        for j in range(len(distances)):
+            distances[j] = distances[j] - avg_distance
+            turbine_diff[j].loc[i, 'time'] = time_value
+            turbine_diff[j].loc[i, 'distance'] = distances[j]
+
+    time.sleep(10)
+
+    return turbine_avg, turbine_diff
+
+
+def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle):
+
+    """
+    将叶尖测量数据和叶根、轮毂中心的测量原点归一化。
+    :param tip_border_rows: 3个叶尖边缘数据
+    :param tip_angle: 叶尖测量俯仰角
+    :return: 归一化后叶尖数据,叶尖俯仰角
+    """
+
+    tip_angle1 = np.deg2rad(tip_angle)
+    tip_angle_list = []
+    for turbine in tip_border_rows:
+        tip_angle_cal = np.arctan((np.sin(tip_angle1) * turbine['distance'] - 0.07608) /
+                                  np.cos(tip_angle1) * turbine['distance'])
+        turbine['distance'] = (turbine['distance'] ** 2 + 0.0057881664 -
+                               0.15216 * turbine['distance'] * np.sin(tip_angle1)) ** 0.5
+
+        tip_angle_list.append(tip_angle_cal)
+
+    tip_angle_new = float(np.mean(tip_angle_list))
+    tip_angle_new1 = np.rad2deg(tip_angle_new)
+
+    return tip_border_rows, tip_angle
+
+
+def radius_cal(border_rows, meas_angle, cen_dist, cen_angle, angle_main, angle_rotate):
+
+    """
+    计算测量点处的旋转半径。
+    :param border_rows: 三个叶片的边界
+    :param meas_angle: 回波俯仰角
+    :param cen_dist: 轮毂中心距离
+    :param cen_angle: 轮毂中心俯仰角
+    :param angle_main: 主轴倾角
+    :param angle_rotate: 锥角
+    :return: 旋转半径
+    """
+
+    aero_dist = (pd.concat([df['distance'] for df in border_rows]).mean())
+    cen_x = np.cos(np.deg2rad(cen_angle)) * cen_dist
+    cen_y = np.sin(np.deg2rad(cen_angle)) * cen_dist
+    aero_x = np.cos(np.deg2rad(meas_angle)) * aero_dist
+    aero_y = np.sin(np.deg2rad(meas_angle)) * aero_dist
+    theta_4 = np.tan(np.pi - np.deg2rad(angle_main))
+    theta_5 = np.tan(np.pi / 2 - np.deg2rad(angle_main) + np.deg2rad(angle_rotate))
+
+    if np.abs(np.deg2rad(angle_main) - np.deg2rad(angle_rotate)) < 0.0001:
+        radius = np.abs((cen_y - aero_y) - theta_4 * (cen_x - aero_x))
+
+    else:
+        radius = (np.abs((theta_4 * (cen_x - aero_x) - (cen_y - aero_y)) / (theta_4 - theta_5))
+                  * (1 + theta_5 ** 2) ** 0.5)
+    return radius
+
+
+def blade_angle_aero_dist(border_rows: List[pd.DataFrame], radius: float, full_cycle: int,
+                          tower_dist: float, v_angle: float):
+
+    """
+    计算叶片相对桨距角和叶片净空距离。
+    :param border_rows: 三个叶片的边界
+    :param radius: 旋转半径
+    :param full_cycle: 全周期
+    :param tower_dist: 塔筒距离
+    :param v_angle: 俯仰角度
+    :return: 绝对桨距角,净空距离,叶片线速度
+    """
+
+    v_speed = 2 * np.pi * radius / full_cycle  # 叶片线速度m/(1计时器单位)
+    pitch_angle_list = []
+    aero_dist_list = []
+    cen_blade = []
+    for turbine in border_rows:
+        diff_time = turbine.iloc[1, 0] - turbine.iloc[0, 0]
+
+        diff_len = turbine.iloc[1, 1] - turbine.iloc[0, 1]
+        mean_col2 = (turbine.iloc[1, 1] + turbine.iloc[0, 1]) / 2
+        aero_dist = abs(mean_col2 - tower_dist) * np.cos(np.deg2rad(v_angle))
+
+        pitch_angle = np.degrees(np.arctan(diff_len / (diff_time * v_speed)))
+        pitch_angle_list.append(pitch_angle)
+        aero_dist_list.append(aero_dist)
+        cen_blade.append(mean_col2)
+    pitch_mean = np.mean(pitch_angle_list)
+    pitch_angle_list = [angle - pitch_mean for angle in pitch_angle_list]
+    pitch_angle_list.append(max(pitch_angle_list) - min(pitch_angle_list))
+    aero_dist_list.append(np.mean(aero_dist_list))
+    pitch_angle_list = [round(num, 2) for num in pitch_angle_list]
+    aero_dist_list = [round(num, 2) for num in aero_dist_list]
+
+    return pitch_angle_list, aero_dist_list, v_speed, cen_blade
+
+
+def find_param(path: str):
+
+    """
+    根据文件路径获取参数
+    """
+
+    path = path.replace('\\', '/')
+    last_slash_index = path.rfind('/')
+    result = path[last_slash_index + 1:]
+
+    underscore_indices = []
+    start = 0
+    while True:
+        index = result.find('_', start)
+        if index == -1:
+            break
+        underscore_indices.append(index)
+        start = index + 1
+
+    wind_name = result[: underscore_indices[0]]
+    turbine_code = result[underscore_indices[0] + 1: underscore_indices[1]]
+    time_code = result[underscore_indices[1] + 1: underscore_indices[2]]
+    sampling_fq = int(result[underscore_indices[2] + 1: underscore_indices[3]])
+    tunnel_1 = float(result[underscore_indices[3] + 1: underscore_indices[4]])
+    tunnel_2 = float(result[underscore_indices[4] + 1: -4])
+
+    dt = datetime.strptime(time_code, "%Y%m%d%H%M%S")
+    standard_time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
+
+    return wind_name, turbine_code, standard_time_str, sampling_fq, tunnel_1, tunnel_2
+
+
+def blade_dist_distribute_cal(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame,
+                              tower_dist: float, v_angle: float, blade_cen_dist: list):
+
+    """
+    计算每个叶片每个周期的转速和净空距离
+    :param data_group: cycle_calculate计算完成后的数据。
+    :param start_points: 所有每个周期开始点,叶片前缘突变点。
+    :param end_points: 叶片后缘突变点。
+    :param tower_dist: 塔筒距离。
+    :param v_angle: 测量俯仰角度。
+    :param blade_cen_dist: 叶片内部距离。
+    """
+
+    time.sleep(1)
+
+    combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')
+    # 检查排序后的数据从start开始,end结束
+    if combined_df_sorted.iloc[0].equals(end_points.iloc[0]):
+        combined_df_sorted = combined_df_sorted.iloc[1:]
+    if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]):
+        combined_df_sorted = combined_df_sorted.iloc[:-1]
+    combined_df_sorted.reset_index(drop=True, inplace=True)
+
+    # 将 start_points 中的时间点转换为列表
+    start_times = combined_df_sorted['time'].tolist()
+
+    normalize_cycle = start_times[1] - start_times[0]
+    tower_clearance = [pd.DataFrame() for _ in range(3)]
+
+    # 遍历所有起始时间点
+    for i in range(0, len(start_times), 2):
+        # 获取当前起始和结束时间点
+        start_time = start_times[i]
+        end_time = start_times[i + 1]
+
+        # 根据当前起始时间点和结束时间点对数据进行分段
+        segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)]
+        min_distance = segment['distance'].min()
+        clearance = np.abs(tower_dist - min_distance - blade_cen_dist[i % 3]) * np.cos(np.deg2rad(v_angle))
+        r_speed = (start_times[i + 2] - start_times[i]) * 3 / 5000000
+
+        # 周期归一化
+        ratio = (end_time - start_time) / normalize_cycle
+        segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio
+
+        new_df = pd.DataFrame({
+            'clearance': [clearance],
+            'r_speed': [r_speed]
+        })
+
+        # 将结果添加到相应的 turbine 数据框中
+        tower_clearance[i % 3] = pd.concat([tower_clearance[i % 3], new_df])
+
+    return tower_clearance

+ 149 - 0
frequency_filter.py

@@ -0,0 +1,149 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import warnings
+from pandas.errors import SettingWithCopyWarning
+from scipy.signal import butter, filtfilt
+from scipy.signal import welch
+from scipy.fft import fft, fftfreq
+import numpy as np
+
+
+warnings.filterwarnings("ignore", category=SettingWithCopyWarning)  # 忽略特定警告
+plt.rcParams['font.sans-serif'] = ['SimHei']  # 使用黑体
+plt.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题
+
+
+def butter_lowpass_filter(data, cutoff, fs, order=5):
+    nyq = 0.5 * fs  # Nyquist频率
+    normal_cutoff = cutoff / nyq
+    b, a = butter(order, normal_cutoff, btype='low', analog=False)
+    y = filtfilt(b, a, data)
+    return y
+
+
+def butter_bandpass_filter(data, cutoff_low, cutoff_high, fs, order=5):
+    nyq = 0.5 * fs  # Nyquist频率
+    high_cutoff = cutoff_high / nyq
+    low_cutoff = cutoff_low / nyq
+    b, a = butter(order, [low_cutoff, high_cutoff], btype='band', analog=False)
+    y = filtfilt(b, a, data)
+    return y
+
+
+def apply_fft(x, fs):
+    n = len(x)
+    t = 1.0 / fs
+    fft_cof = fft(x - np.mean(x))  # Obtain FFT coefficients after removing the mean (DC component) from the signal.
+    xf = fftfreq(n, t)[:n // 2]  # Taking positive spectrum only.
+    # Multiply  abs(FFT coefficients) by 2 to compensate for positive spectrum and normalize by signal length.
+    fft_positive = 2.0 / n * np.abs(fft_cof[0:n // 2])
+    return xf, fft_positive
+
+
+# Function to compute PSD
+def compute_psd(signal, length, fs):
+    seg_length = length / 10000  # Segment length.
+    overlap = seg_length / 20  # overlap between segments (in number of samples)
+    nfft_length = 2 ** 14  # FFT length
+    frequencies, psd = welch(signal, fs=fs, window='han', nperseg=seg_length, noverlap=overlap, nfft=nfft_length)
+    return frequencies, psd
+
+
+def tower_cal(data, start_points, end_points, fs):
+
+    """
+    计算测量叶片数据的塔筒测量距离
+    :param data: cycle_calculate计算完成后的数据。
+    :param start_points: 所有每个周期开始点,叶片前缘突变点。
+    :param end_points: 叶片后缘突变点。
+    :param fs: 采样频率。
+    """
+
+    cutoff_low = 0.01 * fs  # 设置低通滤波截止频率
+    combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time')  # 合并DataFrame并按 'time' 列排序
+    # 检查排序后的数据从end开始,start结束
+    if combined_df_sorted.iloc[0].equals(start_points.iloc[0]):
+        combined_df_sorted = combined_df_sorted.iloc[1:]
+    if combined_df_sorted.iloc[-1].equals(end_points.iloc[-1]):
+        combined_df_sorted = combined_df_sorted.iloc[:-1]
+    combined_df_sorted.reset_index(drop=True, inplace=True)
+
+    # 将 start_points 中的时间点转换为列表
+    start_times = combined_df_sorted['time'].tolist()
+    fft_positive_filters = []
+    tower_dists = []
+    # 遍历所有起始时间点
+    for i in range(0, len(start_times), 2):
+
+        # 获取当前起始和结束时间点
+        start_time = start_times[i] + 0.01 * (start_times[i + 1] - start_times[i])
+        end_time = start_times[i + 1] - 0.01 * (start_times[i + 1] - start_times[i])
+
+        # 根据当前起始时间点和结束时间点对数据进行分段
+        segment = data[(data['time'] > start_time) & (data['time'] <= end_time)]
+        mean_point = segment['distance'].mean()
+        tower_dists.append(mean_point)
+
+    tower_dist = np.mean(tower_dists)
+
+    return tower_dist
+
+
+def process_fft(data, fs):
+
+    """
+    塔筒数据每组低通滤波,求平均得振动分析;
+    叶片数据分组高通滤波
+    :param data: tower_cal计算完成后的数据。
+    :param fs: 采样频率。
+    """
+
+    cutoff_low = 0.001 * fs  # 设置低通滤波截止频率
+    segment = data.head(int(len(data) * 0.95))
+
+    # 确保segment的长度是2的n次幂
+    desired_length = 2**((len(segment) - 1).bit_length() - 1)
+    segment = segment.head(desired_length)
+    segment.loc[:, 'distance_filtered'] = butter_lowpass_filter(segment['distance'], cutoff_low, fs)
+
+    # 提取时间序列数据
+    time_series_filter = segment['distance_filtered'].values  # 使用滤波后的距离数据
+    xf_filter, fft_positive_filter = apply_fft(time_series_filter, fs)
+
+    fft_positive_filters_truncated = fft_positive_filter[0:1000]
+    xf_filter_truncated = xf_filter[0:1000]
+    # 将 NumPy 数组转换为 Python 列表
+    fft_y = fft_positive_filters_truncated.tolist()
+    fft_y_scaled = [x * 1000 for x in fft_y]
+    fft_y_scaled = [0] + fft_y_scaled
+    fft_x = xf_filter_truncated.tolist()
+    fft_x = [0] + fft_x
+    fft_y_scaled = [0 if a_val < 0.1 else b_val for a_val, b_val in zip(fft_x, fft_y_scaled)]
+    max_value = max(fft_y_scaled)
+    max_index = fft_y_scaled.index(max_value)
+
+    '''
+    plt.plot(fft_x, fft_y_scaled, label='Filtered Signal')
+    plt.xlabel('频率 (Hz)', fontsize=8)
+    plt.ylabel('振幅 (m)', fontsize=8)
+    plt.tick_params('x', labelsize=8)
+    plt.tick_params('y', labelsize=8)
+    plt.title('Frequency Spectrum', fontsize=12)
+    plt.legend(fontsize=7)
+    plt.grid(True)
+    plt.savefig(f"filter_fft1.png", dpi=110, pil_kwargs={"icc_profile": False})
+    plt.close()
+
+    plt.plot(segment['time'], segment['distance_filtered'], label='Filtered Signal')
+    plt.xlabel('时间 (s)', fontsize=8)
+    plt.ylabel('振幅 (m)', fontsize=8)
+    plt.tick_params('x', labelsize=8)
+    plt.tick_params('y', labelsize=8)
+    plt.title('Time-Domain Waveform', fontsize=12)
+    plt.legend(fontsize=7)
+    plt.grid(True)
+    plt.savefig(f"filter_fft2.png", dpi=110, pil_kwargs={"icc_profile": False})
+    plt.close()
+    '''
+
+    return segment, fft_x, fft_y_scaled, round(fft_x[max_index], 2), round(max_value, 2)