import os import json import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from matplotlib.ticker import MaxNLocator from typing import Tuple, List import warnings import time import sys import frequency_filter as ff from datetime import datetime warnings.filterwarnings("ignore", category=FutureWarning) # 忽略特定警告 plt.rcParams['font.sans-serif'] = ['SimHei'] # 使用黑体 plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题 def result_main(): """ 创建data目录,返回历史分析数据存放的文件路径 """ # 获取当前程序的绝对路径 python_interpreter_path = sys.executable project_directory = os.path.dirname(python_interpreter_path) data_folder = os.path.join(project_directory, 'data') # 检查data文件夹是否存在,如果不存在则创建 if not os.path.exists(data_folder): os.makedirs(data_folder) # CSV文件路径 csv_file_path = os.path.join(data_folder, 'history_data.csv') # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件 if not os.path.exists(csv_file_path): pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率', '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差', '叶片1净空值', '叶片2净空值', '叶片3净空值', '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转', '振动幅值', '振动主频']).to_csv(csv_file_path, index=False) return csv_file_path def delete_data(name): """ 删除历史分析数据 :param name: 删除条件 :return: csv文件路径 """ # 获取当前程序的绝对路径 python_interpreter_path = sys.executable project_directory = os.path.dirname(python_interpreter_path) data_folder = os.path.join(project_directory, 'data') # CSV文件路径 csv_file_path = os.path.join(data_folder, 'history_data.csv') df = pd.read_csv(csv_file_path) condition = ((df['时间'].astype(str).str.contains(name[0])) & (df['场站'].astype(str).str.contains(name[1])) & (df['风机编号'].astype(str).str.contains(name[2]))) # 删除满足条件的行 df = df[~condition] # 如果需要,可以将修改后的 DataFrame 保存回 CSV 文件 df.to_csv(csv_file_path, index=False) return csv_file_path def history_data(name): """ 读取历史分析数据 :param name: 接口返回列表 :return: """ time_code = name[0] wind_name = name[1] turbine_code = name[2] # 获取当前程序的绝对路径 python_interpreter_path = sys.executable project_directory = os.path.dirname(python_interpreter_path) data_folder = os.path.join(project_directory, 'data') time_code_cleaned = time_code.replace("-", "").replace(":", "").replace(" ", "") json_filename = f"{wind_name}_{turbine_code}_{time_code_cleaned}.json" json_file_path = os.path.join(data_folder, json_filename) if not os.path.exists(json_file_path): raise ValueError("文件不存在") with open(json_file_path, 'r') as f: data = json.load(f) return data def data_analyse(path: List[str]): """ 创建data目录,把分析数据保存到历史记录中,同时返回全量分析数据 """ # 基础配置参数 locate_file = path[0] measure_file = path[1] noise_reduction = 0.000001 # 如果一个距离值的所有样本量小于总样本量的noise_reduction,则被去掉 min_difference = 1.5 # 如果相邻2个点的距离差大于min_difference,则被注意是否是周期节点 angle_cone = float(path[2]) # 锥角 axial_inclination = float(path[3]) # 轴向倾角 return_list = [] # 读取文件信息,包括风场名、风机编号、时间、采样频率、2个通道俯仰角 wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen = find_param(locate_file) wind_name_1, turbine_code_1, time_code_1, sampling_fq_1, angle_tip, angle_root = find_param(measure_file) sampling_fq_1 = sampling_fq_1 * 1000 sampling_fq = sampling_fq * 1000 print(wind_name, turbine_code, time_code, sampling_fq, angle_nan, angle_cen) print(wind_name_1, turbine_code_1, time_code, sampling_fq_1, angle_tip, angle_root) # 读取数据,并检查是否有时间序列异常,分离2通道数据 data_nan, data_cen = process_data(locate_file) data_tip, data_root = process_data(measure_file) # 全部数据进行降噪、去除异常点处理,叶根叶尖数据计算叶片扫掠起始、结束点,轮毂中心数据计算距离均值 start_tip, end_tip, filtered_data_tip = cycle_calculate(data_tip, noise_reduction, min_difference) start_root, end_root, filtered_data_root = cycle_calculate(data_root, noise_reduction, min_difference) start_nan, end_nan, filtered_data_nan = cycle_calculate(data_nan, noise_reduction, min_difference) filtered_data_cen = tower_filter(data_cen, noise_reduction) dist_cen = np.mean(filtered_data_cen.iloc[:, 1].tolist()) filtered_data_cen.iloc[:, 1] = filtered_data_cen.iloc[:, 1] * np.cos(np.deg2rad(angle_cen)) # 检查起始结束点顺序,确保叶根叶尖测点同步开始、结束 if end_tip.iloc[0, 0] < start_root.iloc[0, 0]: start_tip = start_tip.drop(start_tip.index[0]) end_tip = end_tip.drop(end_tip.index[0]) if start_root.iloc[0, 0] < start_tip.iloc[0, 0] < end_tip.iloc[0, 0] < end_root.iloc[0, 0]: pass else: raise ValueError("The elements are not in the expected order.") # 计算叶根、叶尖处的塔筒距离,对轮毂中心做FFT分析 tower_dist_tip = ff.tower_cal(filtered_data_tip, start_tip, end_tip, sampling_fq_1) tower_dist_root = ff.tower_cal(filtered_data_root, start_root, end_root, sampling_fq_1) tower_dist_nan = ff.tower_cal(filtered_data_nan, start_nan, end_nan, sampling_fq) lowpass_data, fft_x, fft_y, tower_freq, tower_max= ff.process_fft(filtered_data_cen, sampling_fq) # 根据起始结束点,对叶根、对叶片数据进行归一化处理,计算每个叶片的散点表、线表、边界点表、标准循环周期长度、每个叶片平均最小值 result_line_tip, result_scatter_tip, border_rows_tip, cycle_len_tip, min_tip \ = data_normalize(filtered_data_tip, start_tip, end_tip) result_line_root, result_scatter_root, border_rows_root, cycle_len_root, min_root \ = data_normalize(filtered_data_root, start_root, end_root) result_line_nan, result_scatter_nan, border_rows_nan, cycle_len_nan, min_nan \ = data_normalize(filtered_data_nan, start_nan, end_nan) # 计算3个叶片的平均轮廓,3个叶片的形状差 result_avg_tip, result_diff_tip = blade_shape(result_line_tip) result_avg_root, result_diff_root = blade_shape(result_line_root) # 对叶尖的边界点表和俯仰角做坐标归一化处理 border_rows_tip_new, angle_tip_new = coordinate_normalize(border_rows_tip, angle_tip) border_rows_nan_new, angle_nan_new = coordinate_normalize(border_rows_nan, angle_nan) # 对叶片的边界点表做半径计算 tip_r = radius_cal(border_rows_tip_new, angle_tip_new, dist_cen, angle_cen, axial_inclination, angle_cone) root_r = radius_cal(border_rows_root, angle_root, dist_cen, angle_cen, axial_inclination, angle_cone) nan_r = radius_cal(border_rows_nan_new, angle_nan_new, dist_cen, angle_cen, axial_inclination, angle_cone) # 计算叶片测量位置处的绝对桨距角、相对桨距角、线速度、叶片内部中心点距离 pitch_angle_tip, aero_dist_tip, v_speed_tip, cen_blade_tip = ( blade_angle_aero_dist(border_rows_tip, tip_r, cycle_len_tip, tower_dist_tip, angle_tip_new)) pitch_angle_root, aero_dist_root, v_speed_root, cen_blade_root = ( blade_angle_aero_dist(border_rows_root, root_r, cycle_len_root, tower_dist_root, angle_root)) pitch_angle_nan, aero_dist_nan, v_speed_nan, cen_blade_nan = ( blade_angle_aero_dist(border_rows_nan_new, nan_r, cycle_len_nan, tower_dist_nan, angle_nan_new)) # 将列表转换为 numpy 数组 cen_blade_tip_array = np.array(cen_blade_tip) cen_blade_nan_array = np.array(cen_blade_nan) min_tip_array = np.array(min_tip) min_nan_array = np.array(min_nan) abs_diff = np.abs(cen_blade_tip_array - min_tip_array) # 计算差值的绝对值 abs_diff_nan = np.abs(cen_blade_nan_array - min_nan_array) # 计算差值的绝对值 blade_dist_tip = abs_diff * np.cos(np.deg2rad(angle_tip_new)) blade_dist_nan = abs_diff_nan * np.cos(np.deg2rad(angle_nan_new)) blade_dist_tip.tolist() # 如果需要将结果转换回列表 blade_dist_nan.tolist() # 如果需要将结果转换回列表 # 计算叶片转速-净空散点表 dist_distribute = blade_dist_distribute_cal(filtered_data_tip, start_tip, end_tip, tower_dist_tip, angle_tip_new, blade_dist_tip) dist_distribute_nan = blade_dist_distribute_cal(filtered_data_nan, start_nan, end_nan, tower_dist_nan, angle_nan_new, blade_dist_nan) # dist_distribute = [df.round(5) for df in dist_distribute] dist_distribute = [df.round(5) for df in dist_distribute_nan] # 获取净空距离的最小值和最大值,以及它们对应的转速值,并分别保存在列表中 min_values = [] min_keys = [] max_values = [] max_keys = [] mean_values = [] for df in dist_distribute: second_col_min = df[df.columns[1]].min() second_col_max = df[df.columns[1]].max() min_row = df[df[df.columns[1]] == second_col_min] max_row = df[df[df.columns[1]] == second_col_max] min_values.append(round(second_col_min, 2)) min_keys.append(round(min_row.iloc[0][df.columns[0]], 2)) max_values.append(round(second_col_max, 2)) max_keys.append(round(max_row.iloc[0][df.columns[0]], 2)) for i in range(3): mean_values.append(round((max_values[i] + min_values[i]) / 2, 2)) # 将叶片线表数据乘以线速度,和俯仰角,得到叶片横截面的真实轮廓 for df in result_line_tip: first_column = df.iloc[:, 0] sec_column = df.iloc[:, 1] df.iloc[:, 0] = first_column * v_speed_tip df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_tip_new)) for df in result_line_root: first_column = df.iloc[:, 0] sec_column = df.iloc[:, 1] df.iloc[:, 0] = first_column * v_speed_root df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_root)) for df in result_scatter_tip: first_column = df.iloc[:, 0] sec_column = df.iloc[:, 1] df.iloc[:, 0] = first_column * v_speed_tip df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_tip_new)) for df in result_scatter_root: first_column = df.iloc[:, 0] sec_column = df.iloc[:, 1] df.iloc[:, 0] = first_column * v_speed_root df.iloc[:, 1] = sec_column * np.cos(np.deg2rad(angle_root)) # 将叶片平均轮廓数据乘以线速度,得到实际叶片长度 avg_tip = result_avg_tip.iloc[:, 0] result_avg_tip.iloc[:, 0] = avg_tip * v_speed_tip avg_root = result_avg_root.iloc[:, 0] result_avg_root.iloc[:, 0] = avg_root * v_speed_root twist_1 = round(np.abs(pitch_angle_root[0] - pitch_angle_tip[0]), 2) twist_2 = round(np.abs(pitch_angle_root[1] - pitch_angle_tip[1]), 2) twist_3 = round(np.abs(pitch_angle_root[2] - pitch_angle_tip[2]), 2) twist_avg = round((twist_1 + twist_2 + twist_3) / 3, 2) # 降低给数据采样频率,降低接口负担 sampling_num = int(0.015 * sampling_fq_1) # 将原始数据的时间列由计时时钟转换为实际时间 data_tip.iloc[:, 0] = data_tip.iloc[:, 0] / 5000000 data_root.iloc[:, 0] = data_root.iloc[:, 0] / 5000000 lowpass_data.iloc[:, 0] = lowpass_data.iloc[:, 0] / 5000000 # 将需要保存到CSV的数据添加到return_list中 return_list.append(str(time_code)) return_list.append(str(wind_name)) return_list.append(str(turbine_code)) return_list.append(sampling_fq_1) return_list.append(pitch_angle_root[0]) return_list.append(pitch_angle_root[1]) return_list.append(pitch_angle_root[2]) return_list.append(pitch_angle_root[3]) return_list.append(mean_values[0]) return_list.append(mean_values[1]) return_list.append(mean_values[2]) return_list.append(twist_1) return_list.append(twist_2) return_list.append(twist_3) return_list.append(twist_avg) return_list.append(tower_max) return_list.append(tower_freq) # 将return_list转换为DataFrame并追加到CSV文件 df_new_row = pd.DataFrame([return_list], columns=['时间', '场站', '风机编号', '采样频率', '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差', '叶片1净空值', '叶片2净空值', '叶片3净空值', '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转', '振动幅值', '振动主频']) json_output = { 'original_plot': { 'blade_tip': { 'xdata': data_tip.iloc[:, 0].tolist()[::sampling_num], 'ydata': data_tip.iloc[:, 1].tolist()[::sampling_num] }, 'blade_root': { 'xdata': data_root.iloc[:, 0].tolist()[::sampling_num], 'ydata': data_root.iloc[:, 1].tolist()[::sampling_num] } }, 'fft_plot': { 'lowpass': { 'xdata': lowpass_data['time'].tolist()[::sampling_num], 'ydata': lowpass_data['distance_filtered'].tolist()[::sampling_num], 'xmax': max(lowpass_data['time'].tolist()), 'xmin': min(lowpass_data['time'].tolist()), 'ymax': max(lowpass_data['distance_filtered'].tolist()) + 0.02, 'ymin': min(lowpass_data['distance_filtered'].tolist()) - 0.02 }, 'fft': { 'xdata': fft_x, 'ydata': fft_y, 'xmax': max(fft_x), 'xmin': min(fft_x), 'ymax': max(fft_y) + 0.02, 'ymin': 0 } }, 'blade_tip': { 'first_blade': { 'xdata': result_line_tip[0].iloc[:, 0].tolist(), 'ydata': result_line_tip[0].iloc[:, 1].tolist() }, 'second_blade': { 'xdata': result_line_tip[1].iloc[:, 0].tolist(), 'ydata': result_line_tip[1].iloc[:, 1].tolist() }, 'third_blade': { 'xdata': result_line_tip[2].iloc[:, 0].tolist(), 'ydata': result_line_tip[2].iloc[:, 1].tolist() }, 'avg_blade': { 'xdata': result_avg_tip.iloc[:, 0].tolist(), 'ydata': result_avg_tip.iloc[:, 1].tolist() } }, 'blade_root': { 'first_blade': { 'xdata': result_line_root[0].iloc[:, 0].tolist(), 'ydata': result_line_root[0].iloc[:, 1].tolist() }, 'second_blade': { 'xdata': result_line_root[1].iloc[:, 0].tolist(), 'ydata': result_line_root[1].iloc[:, 1].tolist() }, 'third_blade': { 'xdata': result_line_root[2].iloc[:, 0].tolist(), 'ydata': result_line_root[2].iloc[:, 1].tolist() }, 'avg_blade': { 'xdata': result_avg_root.iloc[:, 0].tolist(), 'ydata': result_avg_root.iloc[:, 1].tolist() } }, 'dist_distribution': { 'first_blade': { 'xdata': dist_distribute[0].iloc[:, 0].tolist(), 'ydata': dist_distribute[0].iloc[:, 1].tolist() }, 'second_blade': { 'xdata': dist_distribute[1].iloc[:, 0].tolist(), 'ydata': dist_distribute[1].iloc[:, 1].tolist() }, 'third_blade': { 'xdata': dist_distribute[2].iloc[:, 0].tolist(), 'ydata': dist_distribute[2].iloc[:, 1].tolist() } }, 'analyse_table': { 'pitch_angle_diff': { 'blade_1': pitch_angle_root[0], 'blade_2': pitch_angle_root[1], 'blade_3': pitch_angle_root[2], 'blade_relate': pitch_angle_root[3] }, 'aero_dist': { 'first_blade': { 'x_min': min_keys[0], 'y_min': min_values[0], 'x_max': max_keys[0], 'y_max': max_values[0], 'y_diff': np.abs(max_values[0] - min_values[0]), 'y_ava': mean_values[0] }, 'second_blade': { 'x_min': min_keys[1], 'y_min': min_values[1], 'x_max': max_keys[1], 'y_max': max_values[1], 'y_diff': np.abs(max_values[1] - min_values[1]), 'y_ava': mean_values[1] }, 'third_blade': { 'x_min': min_keys[2], 'y_min': min_values[2], 'x_max': max_keys[2], 'y_max': max_values[2], 'y_diff': np.abs(max_values[2] - min_values[2]), 'y_ava': mean_values[2] } }, 'blade_twist': { 'blade_1': twist_1, 'blade_2': twist_2, 'blade_3': twist_3, 'blade_avg': twist_avg }, 'tower_vibration': { 'max_vibration': tower_max, 'main_vibration_freq': tower_freq } } } # 获取当前程序的绝对路径 python_interpreter_path = sys.executable project_directory = os.path.dirname(python_interpreter_path) data_folder = os.path.join(project_directory, 'data') # 检查data文件夹是否存在,如果不存在则创建 if not os.path.exists(data_folder): os.makedirs(data_folder) # CSV文件路径 csv_file_path = os.path.join(data_folder, 'history_data.csv') # 检查CSV文件是否存在,如果不存在则创建一个空的CSV文件 if not os.path.exists(csv_file_path): pd.DataFrame(columns=['时间', '场站', '风机编号', '采样频率', '叶片1角度偏差', '叶片2角度偏差', '叶片3角度偏差', '相对角度偏差', '叶片1净空值', '叶片2净空值', '叶片3净空值', '叶片1扭转', '叶片2扭转', '叶片3扭转', '平均扭转', '振动幅值', '振动主频']).to_csv(csv_file_path, index=False) df_new_row.to_csv(csv_file_path, mode='a', header=False, index=False) time_code_cleaned = time_code.replace("-", "").replace(":", "").replace(" ", "") json_filename = f"{wind_name}_{turbine_code}_{time_code_cleaned}.json" json_file_path = os.path.join(data_folder, json_filename) with open(json_file_path, 'w') as json_file: json.dump(json_output, json_file, indent=4) print('csv文件路径' + str(csv_file_path)) print(result_line_tip[0].iloc[:, 0]) print(result_line_root[0].iloc[:, 0]) print('振动主频' + str(tower_freq)) print('振动幅值' + str(tower_max)) print('最小值', min_values) print('最小值对应的键', min_keys) print('最大值', max_values) print('最大值对应的键', max_keys) print('叶尖速度' + str(v_speed_tip), '叶根速度' + str(v_speed_root)) print('新俯仰角' + str(angle_tip_new)) print('轮毂中心距离' + str(dist_cen)) print('叶根原始数据采样时间长度' + str(data_root.iloc[-1, 0])) # plot_data(result_line_tip, 'line', 'data1') # plot_data(result_diff_tip, 'line', 'data_diff_1') # plot_data(result_scatter_tip, 'scatter', 'data1') plot_data(result_line_root, 'line', 'data2') # plot_data(result_diff_root, 'line', 'data_diff_2') plot_data(result_scatter_root, 'scatter', 'data2') # plot_data(dist_distribute, 'scatter', 'dist_distribute') return json_output def process_data(file_path): """ 打开、解决时间重置、按时间清洗异常值、分列数据 """ # 读取第2、4、9列的数据 data = pd.read_csv(file_path, usecols=[1, 3, 8], header=None, engine='c') data = data.head(int(len(data) * 0.95)) print('原始数据长度' + str(len(data))) ''' # 绘制原始数据图 # 只取前1%的数据 # data = data.head(int(len(data)* 0.01)) data.columns = ['time', 'distance1', 'distance2'] plt.figure(figsize=(300, 150)) sns.scatterplot(data=data, x='time', y='distance1', s=50, color='green') sns.scatterplot(data=data, x='time', y='distance2', s=50, color='red') abxy = plt.gca() # 获取当前坐标轴对象 plt.grid(linewidth=2) # 设置网格线宽度为2 abxy.xaxis.set_major_locator(MaxNLocator(nbins=100)) # 设置x轴主刻度的最大数量为10 plt.xlabel('时间', fontsize=16, fontweight='bold') # 添加x轴标签 plt.ylabel('距离(m)', fontsize=16, fontweight='bold') # 添加y轴标签 abxy.tick_params(axis='x', labelsize=14, labelcolor='black', width=2) # 设置x轴刻度标签 abxy.tick_params(axis='y', labelsize=14, labelcolor='black', width=2) # 设置y轴刻度标签 plt.savefig(f"{"original"}.png", dpi=100, pil_kwargs={"icc_profile": False}) plt.close() ''' # 找到第一列中最大值和最小值的位置 max_value = data.iloc[:, 0].max() max_index = data.iloc[:, 0].idxmax() min_index = data.iloc[:, 0].idxmin() # 检查最小值的位置是否是最大值位置的下一个 if min_index == max_index + 1: # 将最小值及其之后的所有值都加上最大值 data.iloc[min_index:, 0] += max_value # 按时间列筛选清洗异常值 last_time = data.iloc[-1, 0] first_time = data.iloc[0, 0] filtered_data = data[(data.iloc[:, 0] > last_time) & (data.iloc[:, 0] < first_time)] print(f'时间列异常数据: {filtered_data}') print(f'起止时间: {first_time}, {last_time}') data = data[data.iloc[:, 0] >= first_time] data = data[data.iloc[:, 0] <= last_time] data.reset_index(drop=True, inplace=True) # 计算最小值 min_time = data.iloc[:, 0].min() data.iloc[:, 0] -= min_time # 分为两组数据 data_1 = data.iloc[:, [0, 1]] data_2 = data.iloc[:, [0, 2]] # 分别命名列 data_1.columns = ['time', 'distance'] data_2.columns = ['time', 'distance'] return data_1, data_2 def tower_filter(data_group: pd.DataFrame, noise_threshold: float): """ 对轮毂中心数据进行降噪,和前项填充 :param data_group: process_data计算完成后轮毂中心的数据。 :param noise_threshold: 去掉占比小于noise_threshold的数据。 :return: filtered_data:降噪后的数据 """ print('正在进行数据清洗......') time.sleep(1) # 计算distance的分布 distance_counts = data_group['distance'].value_counts(normalize=True) noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index data_group.loc[noise_indices, 'distance'] = np.nan # 选择频率最大的5个值 top_5_distances = distance_counts.head(5).index mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean() data_group.loc[(data_group['distance'] < mean_values-20) | ( data_group['distance'] > mean_values*1.1), 'distance'] = np.nan nan_count = data_group['distance'].isna().sum() all_count = data_group.shape[0] print(f"中值是:{mean_values},替换为NaN的异常distance值的数量是: {nan_count}, 总数量是: {all_count}," f"占比: {nan_count / all_count * 100:.2f}%") # 前向填充 data_group['distance'] = data_group['distance'].fillna(method='ffill') filtered_data = data_group return filtered_data def cycle_calculate(data_group: pd.DataFrame, noise_threshold: float, min_distance: float): """ 对数据进行降噪,和前项填充;计算数据的周期节点,叶片前缘突变点、后缘突变点 :param data_group: process_data计算完成后的数据。 :param noise_threshold: 去掉占比小于noise_threshold的数据。 :param min_distance: 区分叶片和塔筒的距离差值。 :return: start_points:周期开始点, end_points:周期结束点, filtered_data:降噪后的数据 """ print('正在计算周期节点......') time.sleep(1) # 计算distance的分布 distance_counts = data_group['distance'].value_counts(normalize=True) noise_distance_threshold = distance_counts[distance_counts < noise_threshold].index noise_indices = data_group[data_group['distance'].isin(noise_distance_threshold)].index data_group.loc[noise_indices, 'distance'] = np.nan # 选择频率最大的5个值 top_5_distances = distance_counts.head(5).index mean_values = data_group[data_group['distance'].isin(top_5_distances)]['distance'].mean() data_group.loc[(data_group['distance'] < mean_values-30) | ( data_group['distance'] > mean_values*1.1), 'distance'] = np.nan nan_count = data_group['distance'].isna().sum() all_count = data_group.shape[0] print(f"中值是:{mean_values},替换为NaN的distance异常值的数量是: {nan_count}, 总数量是: {all_count}," f"占比: {nan_count / all_count * 100:.2f}%") # 前向填充 data_group['distance'] = data_group['distance'].fillna(method='ffill') filtered_data = data_group # 计算相邻两行distance的差值 filtered_data['distance_diff'] = filtered_data['distance'].diff() large_diff_indices = filtered_data[filtered_data['distance_diff'] > min_distance].index small_diff_indices = filtered_data[filtered_data['distance_diff'] < -min_distance].index filtered_data = filtered_data.drop(columns=['distance_diff']) start_points = pd.DataFrame() end_points = pd.DataFrame() # 遍历所有差值大于的行 for idx in large_diff_indices: # 获取当前行的 distance 值 current_distance = filtered_data.loc[idx, 'distance'] next_rows_large = filtered_data.loc[idx - 200: idx - 1] # 检查是否任意 distance 的值小于 current_distance - 2 if next_rows_large['distance'].le(current_distance - min_distance).all(): # 如果都小于,则将当前行和下一行添加到 special_points 中 end_points = pd.concat([end_points, filtered_data.loc[[idx - 1]]]) for idx in small_diff_indices: # 获取当前行的 distance 值 current_distance = filtered_data.loc[idx - 1, 'distance'] next_rows_small = filtered_data.iloc[idx: idx + 200] # 检查是否任意 distance 的值小于 current_distance - 2 if next_rows_small['distance'].le(current_distance - min_distance).all(): # 如果都小于,则将当前行和下一行添加到 special_points 中 start_points = pd.concat([start_points, filtered_data.loc[[idx]]]) if end_points.iloc[0, 0] < start_points.iloc[0, 0]: end_points = end_points.drop(end_points.index[0]) if end_points.iloc[-1, 0] < start_points.iloc[-1, 0]: start_points = start_points.drop(start_points.index[-1]) else: pass return start_points, end_points, filtered_data def data_normalize(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame) \ -> Tuple[List[pd.DataFrame], List[pd.DataFrame], List[pd.DataFrame], int, list]: """ 提取每个叶片的数据并归一化,输出散点图和拟合图 :param data_group: cycle_calculate计算完成后的数据。 :param start_points: 所有每个周期开始点,叶片前缘突变点。 :param end_points: 叶片后缘突变点。 :return: turbines_processed: 每个叶片的拟合数据, turbines_scattered: 每个叶片的散点数据, border_rows: 每个叶片的2个边缘数据, normalize_cycle: 周期长度 """ print('正在进行各周期归一化......') time.sleep(1) combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time') # 检查排序后的数据从start开始,end结束 if combined_df_sorted.iloc[0].equals(end_points.iloc[0]): combined_df_sorted = combined_df_sorted.iloc[1:] if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]): combined_df_sorted = combined_df_sorted.iloc[:-1] combined_df_sorted.reset_index(drop=True, inplace=True) # 将 start_points 中的时间点转换为列表 start_times = combined_df_sorted['time'].tolist() print('本次测量风机完整旋转圈数:'+ str(len(start_times) / 2)) time.sleep(1) normalize_cycle = start_times[1] - start_times[0] full_cycle = int((start_times[2] - start_times[0]) * 3) turbines = [pd.DataFrame() for _ in range(3)] # 遍历所有起始时间点 for i in range(0, len(start_times), 2): # 获取当前起始和结束时间点 start_time = start_times[i] end_time = start_times[i + 1] # 根据当前起始时间点和结束时间点对数据进行分段 segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)] if segment is None: pass else: # 周期归一化 ratio = (end_time - start_time) / normalize_cycle segment.loc[:, 'time'] = (segment['time'] - start_time) / ratio # segment.loc[:, 'distance'] = ff.butter_lowpass_filter(segment['distance'], cutoff_low, fs) # 将结果添加到相应的 turbine 数据框中 turbines[i % 3] = pd.concat([turbines[i % 3], segment]) # 数据分组清洗、求平均 turbines_processed = [] turbines_scattered = [] min_list = [] sd_time = [-1, -1] time_list = list(range(0, normalize_cycle, 9000)) # time_list = [(i + 1) * normalize_cycle / fs * 100 for i in range(fs * 100)] # 生成时间序列 for turbine in turbines: # 按时间排序 turbine_sorted = turbine.sort_values(by='time').reset_index(drop=True) # 找到time列的第一个值 first_time = turbine_sorted['time'].iloc[0] # 分组,时间列每1000为一组(每40个时间点一组) bins = list(range(int(first_time), int(turbine_sorted['time'].max()), 9000)) # 原始代码 # bins = list(range(int(first_time), int(turbine_sorted['time'].max()) + len(start_times), int(fs / 50))) grouped = turbine_sorted.groupby(pd.cut(turbine_sorted['time'], bins=bins, right=False)) # 初始化一个空的 DataFrame 用于存储处理后的数据 processed_df = pd.DataFrame() scattered_df = pd.DataFrame() mean_points = [] diff_points = [] # 对每个组进行处理 for _, group in grouped: # 去除 distance 最大和最小的前5% quantile_5 = group['distance'].quantile(0.05) quantile_95 = group['distance'].quantile(0.95) filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)] # 计算均值 mean_point = filtered_group['distance'].mean() mean_points.append(mean_point) # 遍历 mean_points 列表,计算每个元素与其下一个元素的差值 for i in range(len(mean_points) - 1): diff = abs(mean_points[i + 1] - mean_points[i]) diff_points.append(diff) start_index = int(len(diff_points) * 0.05) end_index = int(len(diff_points) * 0.95) subset1 = diff_points[start_index:end_index] sdr_diff = np.max(subset1) * 1.1 min_list.append(min(mean_points)) # 找到第一个和最后一个小于 sdr_diff 的序号 first_index = np.where(diff_points < sdr_diff)[0][0] last_index = np.where(diff_points < sdr_diff)[0][-1] ''' # 创建一个总图中有3个分图的形式 fig, axs = plt.subplots(1, 1, figsize=(15, 9)) plt.subplots_adjust(hspace=2) # 绘制 diff_points 的折线图 axs.plot(diff_points, label='Diff Points', color='red', marker='x', markersize=5) axs.axhline(y=sdr_diff, color='red', linestyle='--') axs.legend() axs.set_title('Diff Points') axs.set_xlabel('Index') axs.set_ylabel('Value') # 显示图形 plt.tight_layout() plt.show() ''' for index, (bin, group) in enumerate(grouped): # 去除 distance 最大和最小的前5% quantile_5 = group['distance'].quantile(0.05) quantile_95 = group['distance'].quantile(0.95) filtered_group = group[(group['distance'] > quantile_5) & (group['distance'] < quantile_95)] if first_index <= index < last_index: # 如果斜率小于,则认为该组数据不是突变点 # 计算中点 mid_point = filtered_group.mean() # 将中点转换为 DataFrame 并添加到处理后的 DataFrame 中 mid_point_df = pd.DataFrame([mid_point]) mid_point_df.iloc[0, 0] = time_list[index] processed_df = pd.concat([processed_df, mid_point_df], ignore_index=True) scattered_df = pd.concat([scattered_df, filtered_group], ignore_index=True) else: pass # 找到time列的最小值和最大值 min_time = processed_df['time'].min() max_time = processed_df['time'].max() if sd_time == [-1, -1]: sd_time = [min_time, max_time] elif sd_time[0] < min_time: sd_time[0] = min_time elif sd_time[1] > max_time: sd_time[1] = max_time # 将处理后的 DataFrame 添加到列表中 turbines_processed.append(processed_df) turbines_scattered.append(scattered_df) # 把三组叶片数据按sd_time进行筛选,并把每个的边界数据保存 border_rows = [] for i, turbine in enumerate(turbines_processed): # 找到离 sd_time[0] 最近的行的索引 closest_index_0 = (turbine['time'] - sd_time[0]).abs().idxmin() turbine.at[closest_index_0, 'time'] = sd_time[0] sd_time_row_0 = turbine.loc[closest_index_0] # 找到离 sd_time[1] 最近的行的索引 closest_index_1 = (turbine['time'] - sd_time[1]).abs().idxmin() turbine.at[closest_index_1, 'time'] = sd_time[1] sd_time_row_1 = turbine.loc[closest_index_1] # 切片 turbine,从 closest_index_0 到 closest_index_1 turbines_processed[i] = turbine.iloc[closest_index_0:closest_index_1 + 1].reset_index(drop=True) sd_time_rows_turbine = pd.concat([pd.DataFrame([sd_time_row_0]), pd.DataFrame([sd_time_row_1])] , ignore_index=True) border_rows.append(sd_time_rows_turbine) return turbines_processed, turbines_scattered, border_rows, full_cycle, min_list def blade_shape(turbines_processed: List[pd.DataFrame]): """ 计算叶片平均形状、叶片形状偏差。 :param turbines_processed:叶片拟合曲线数据,来自data_normalize :return: 叶片平均形状、叶片形状偏差 """ print('正在进行叶片外形偏差计算......') row_counts = [df.shape[0] for df in turbines_processed] num_rows = min(row_counts) # 创建一个新的data.frame用于保存结果 turbine_avg = pd.DataFrame(index=range(num_rows), columns=['time', 'distance']) turbine_diff = [pd.DataFrame(index=range(num_rows), columns=['time', 'distance']) for _ in turbines_processed] # 遍历每一行 for i in range(num_rows): distances = [df.loc[i, 'distance'] for df in turbines_processed] # 获取每个data.frame的distance列的值 avg_distance = sum(distances) / len(distances) # 计算distance列的平均值 time_value = turbines_processed[0].loc[i, 'time'] # 获取time列的值 turbine_avg.loc[i, 'time'] = time_value turbine_avg.loc[i, 'distance'] = avg_distance for j in range(len(distances)): distances[j] = distances[j] - avg_distance turbine_diff[j].loc[i, 'time'] = time_value turbine_diff[j].loc[i, 'distance'] = distances[j] return turbine_avg, turbine_diff def coordinate_normalize(tip_border_rows: List[pd.DataFrame], tip_angle): """ 将叶尖测量数据和叶根、轮毂中心的测量原点归一化。 :param tip_border_rows: 3个叶尖边缘数据 :param tip_angle: 叶尖测量俯仰角 :return: 归一化后叶尖数据,叶尖俯仰角 """ tip_angle1 = np.deg2rad(tip_angle) tip_angle_list = [] for turbine in tip_border_rows: tip_angle_cal0 = ((np.sin(tip_angle1) * turbine['distance'] - 0.07608) / (np.cos(tip_angle1) * turbine['distance'])) tip_angle_cal = np.arctan(tip_angle_cal0) turbine['distance'] = (turbine['distance']**2 + 0.0057881664 - 0.15216*turbine['distance']*np.sin(tip_angle1)) ** 0.5 tip_angle_list.append(tip_angle_cal) tip_angle_new = float(np.mean(tip_angle_list)) tip_angle_new1 = np.rad2deg(tip_angle_new) print('坐标转换后的新叶尖俯仰角: ' + str(tip_angle_new1)) return tip_border_rows, tip_angle_new1 def radius_cal(border_rows, meas_angle, cen_dist, cen_angle, angle_main, angle_rotate): """ 计算测量点处的旋转半径。 :param border_rows: 三个叶片的边界 :param meas_angle: 回波俯仰角 :param cen_dist: 轮毂中心距离 :param cen_angle: 轮毂中心俯仰角 :param angle_main: 主轴倾角 :param angle_rotate: 锥角 :return: 旋转半径 """ aero_dist = (pd.concat([df['distance'] for df in border_rows]).mean()) cen_x = np.cos(np.deg2rad(cen_angle)) * cen_dist cen_y = np.sin(np.deg2rad(cen_angle)) * cen_dist aero_x = np.cos(np.deg2rad(meas_angle)) * aero_dist aero_y = np.sin(np.deg2rad(meas_angle)) * aero_dist theta_4 = np.tan(np.pi - np.deg2rad(angle_main)) theta_5 = np.tan(np.pi/2 - np.deg2rad(angle_main) - np.deg2rad(angle_rotate)) if theta_5 > 1000: radius = np.abs((cen_y - aero_y) - theta_4 * (cen_x - aero_x)) print("轴向倾角与锥角相近,叶片垂直于地面") else: radius = (np.abs((theta_4 * (cen_x - aero_x) - (cen_y - aero_y))/(theta_4 - theta_5)) * ((1 + theta_5 ** 2) ** 0.5)) print('测量点旋转半径:' + str(radius)) return radius def blade_angle_aero_dist(border_rows: List[pd.DataFrame], radius: float, full_cycle: int, tower_dist: float, v_angle: float): """ 计算叶片相对桨距角和叶片净空距离。 :param border_rows: 三个叶片的边界 :param radius: 旋转半径 :param full_cycle: 全周期 :param tower_dist: 塔筒距离 :param v_angle: 俯仰角度 :return: 绝对桨距角,净空距离,叶片线速度 """ print('正在进行相对桨距角和叶片净空距离计算......') v_speed = 2 * np.pi * radius / full_cycle # 叶片线速度m/(1计时器单位) pitch_angle_list = [] aero_dist_list = [] cen_blade = [] for turbine in border_rows: diff_time = turbine.iloc[1, 0] - turbine.iloc[0, 0] diff_len = turbine.iloc[1, 1] - turbine.iloc[0, 1] mean_col2 = (turbine.iloc[1, 1] + turbine.iloc[0, 1]) / 2 aero_dist = abs(mean_col2 - tower_dist) * np.cos(np.deg2rad(v_angle)) pitch_angle = np.degrees(np.arctan(diff_len / (diff_time * v_speed))) print('单个叶片绝对桨距角' + str(pitch_angle)) pitch_angle_list.append(pitch_angle) aero_dist_list.append(aero_dist) cen_blade.append(mean_col2) pitch_mean = np.mean(pitch_angle_list) pitch_angle_list = [angle - pitch_mean for angle in pitch_angle_list] pitch_angle_list.append(max(pitch_angle_list) - min(pitch_angle_list)) aero_dist_list.append(np.mean(aero_dist_list)) pitch_angle_list = [round(num, 2) for num in pitch_angle_list] aero_dist_list = [round(num, 2) for num in aero_dist_list] print('叶片相对角度偏差:' + '\n' + '叶片1:' + str(pitch_angle_list[0]) + '\n' + '叶片2:' + str(pitch_angle_list[1]) + '\n' + '叶片3:' + str(pitch_angle_list[2]) + '\n' + '相对偏差范围:' + str(pitch_angle_list[3])) print('叶片净空距离:' + '\n' + '叶片1:' + str(aero_dist_list[0]) + '\n' + '叶片2:' + str(aero_dist_list[1]) + '\n' + '叶片3:' + str(aero_dist_list[2]) + '\n' + '平均净空距离:' + str(aero_dist_list[3])) return pitch_angle_list, aero_dist_list, v_speed, cen_blade def plot_data(data, plot_type: str, data_name: str): """ 绘制数据图表并保存为文件。 :param data: 数据列表,每个元素是一个 DataFrame。 :param plot_type: 图表类型,'line' 或 'scatter'。 :param data_name: 数据名称,用于生成文件名。 """ print('正在画图......') time.sleep(1) save_path = "C:/Users/laiwe/Desktop/" save_name = fr"{data_name}_{plot_type}.png" # 生成文件名 plt.figure(figsize=(300, 150)) if plot_type == 'line': for df, color in zip(data, ['blue', 'green', 'red']): sns.lineplot(data=df, x=df.iloc[:, 0], y=df.iloc[:, 1], color=color) elif plot_type == 'scatter': for df, (size, color) in zip(data, [(50, 'blue'), (25, 'green'), (10, 'red')]): sns.scatterplot(data=df, x=df.iloc[:, 0], y=df.iloc[:, 1], s=size, color=color) else: raise ValueError("plot_type must be either 'line' or 'scatter'") axy = plt.gca() # 获取当前坐标轴对象 plt.grid(which='both', linewidth=2) # 设置网格线宽度为2 axy.xaxis.set_major_locator(MaxNLocator(nbins=200)) # 设置x轴主刻度的最大数量为10 axy.yaxis.set_major_locator(MaxNLocator(nbins=100)) # 设置y轴主刻度的最大数量为10 plt.xlabel('时间', fontsize=100, fontweight='bold') # 添加x轴标签 plt.ylabel('距离(m)', fontsize=100, fontweight='bold') # 添加y轴标签 axy.tick_params(axis='x', labelsize=10, labelcolor='black', width=2) # 设置x轴刻度标签 axy.tick_params(axis='y', labelsize=60, labelcolor='black', width=10) # 设置y轴刻度标签 plt.savefig(save_path + save_name) plt.close() abs_path = os.path.abspath(save_name) print(f" {save_name} 已完成") return abs_path def find_param(path: str): """ 根据文件路径获取参数 """ path = path.replace('\\', '/') last_slash_index = path.rfind('/') result = path[last_slash_index + 1:] underscore_indices = [] start = 0 while True: index = result.find('_', start) if index == -1: break underscore_indices.append(index) start = index + 1 wind_name = result[: underscore_indices[0]] turbine_code = result[underscore_indices[0] + 1: underscore_indices[1]] time_code = result[underscore_indices[1] + 1: underscore_indices[2]] sampling_fq = int(result[underscore_indices[2] + 1: underscore_indices[3]]) tunnel_1 = float(result[underscore_indices[3] + 1: underscore_indices[4]]) tunnel_2 = float(result[underscore_indices[4] + 1: -4]) dt = datetime.strptime(time_code, "%Y%m%d%H%M%S") standard_time_str = dt.strftime("%Y-%m-%d %H:%M:%S") return wind_name, turbine_code, standard_time_str, sampling_fq, tunnel_1, tunnel_2 def blade_dist_distribute_cal(data_group: pd.DataFrame, start_points: pd.DataFrame, end_points: pd.DataFrame, tower_dist: float, v_angle: float, blade_cen_dist: list): """ 计算每个叶片每个周期的转速和净空距离 :param data_group: cycle_calculate计算完成后的数据。 :param start_points: 所有每个周期开始点,叶片前缘突变点。 :param end_points: 叶片后缘突变点。 :param tower_dist: 塔筒距离。 :param v_angle: 测量俯仰角度。 :param blade_cen_dist: 叶片内部距离。 """ print('正在进行各周期净空距离分布计算......') time.sleep(1) combined_df_sorted = pd.concat([start_points, end_points]).sort_values(by='time') # 检查排序后的数据从start开始,end结束 if combined_df_sorted.iloc[0].equals(end_points.iloc[0]): combined_df_sorted = combined_df_sorted.iloc[1:] if combined_df_sorted.iloc[-1].equals(start_points.iloc[-1]): combined_df_sorted = combined_df_sorted.iloc[:-1] combined_df_sorted.reset_index(drop=True, inplace=True) # 将 start_points 中的时间点转换为列表 start_times = combined_df_sorted['time'].tolist() normalize_cycle = start_times[1] - start_times[0] tower_clearance = [pd.DataFrame() for _ in range(3)] # 遍历所有起始时间点 for i in range(0, len(start_times) - 2, 2): # 获取当前起始和结束时间点 start_time = start_times[i] end_time = start_times[i + 1] # 根据当前起始时间点和结束时间点对数据进行分段 segment = data_group[(data_group['time'] > start_time) & (data_group['time'] <= end_time)] min_distance = segment['distance'].min() clearance = np.abs(tower_dist - min_distance - blade_cen_dist[i % 3]) * np.cos(np.deg2rad(v_angle)) r_speed = round(60 / ((start_times[i + 2] - start_times[i]) * 3 / 5000000), 2) new_df = pd.DataFrame({ 'r_speed': [r_speed], 'clearance': [clearance] }) # 将结果添加到相应的 turbine 数据框中 tower_clearance[i % 3] = pd.concat([tower_clearance[i % 3], new_df]) tower_clearance = [df.sort_values(by='r_speed') for df in tower_clearance] return tower_clearance # locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tf-20_20250630223600_20_13.03_23.32.csv" # measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tf-20_20250630223849_20_17.89_21.07.csv" locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tf-50_20250630223358_50_13.03_23.32.csv" measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tf-50_20250630224408_50_17.89_21.07.csv" # locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tf-100_20250630222752_100_13.03_23.32.csv" # measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tf-100_20250630225119_100_17.89_21.07.csv" # locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tff-20_20250630231223_20_12.51_20.06.csv" # measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tff-20_20250630232052_20_15.36_18.17.csv" # locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tff-50_20250630231417_50_12.51_20.06.csv" # measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tff-50_20250630233420_50_15.35_18.16.csv" # locate_path = "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tff-100_20250630231610_100_12.51_20.06.csv" # measure_path= "C:/Users/laiwe/Desktop/风电/激光测量/测试数据/20250728/gy_10-tff-100_20250630234012_100_15.35_18.16.csv" start_t = time.time() # 记录开始时间 data_path = [locate_path, measure_path, 5, 6] list_1 = data_analyse(data_path) # print(list_1) print(f"耗时: {time.time() - start_t:.2f} 秒")