|
|
@@ -0,0 +1,359 @@
|
|
|
+import datetime
|
|
|
+import traceback
|
|
|
+
|
|
|
+import numpy as np
|
|
|
+from pandas import DataFrame
|
|
|
+
|
|
|
+from utils.log.trans_log import logger
|
|
|
+
|
|
|
+
|
|
|
+class ClassIdentifier(object):
|
|
|
+ """
|
|
|
+ 分类标识 -1:停机 0:好点 1:欠发功率点;2:超发功率点;3:额定风速以上的超发功率点 4: 限电
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self, wind_turbine_number="", origin_df: DataFrame = None,
|
|
|
+ wind_velocity='wind_velocity',
|
|
|
+ active_power='active_power',
|
|
|
+ pitch_angle_blade='pitch_angle_blade_1',
|
|
|
+ rated_power=1500, cut_out_speed=20):
|
|
|
+ """
|
|
|
+ :param origin_df: The pandas DataFrame containing the input data.
|
|
|
+ :param wind_velocity: 风速字段
|
|
|
+ :param active_power: 有功功率字段
|
|
|
+ :param pitch_angle_blade: 桨距角
|
|
|
+ :param rated_power: 额定功率
|
|
|
+ :param cut_out_speed: 切出风速
|
|
|
+ """
|
|
|
+ self.wind_turbine_number = wind_turbine_number
|
|
|
+ self.wind_velocity = wind_velocity
|
|
|
+ self.active_power = active_power
|
|
|
+ self.pitch_angle_blade = pitch_angle_blade
|
|
|
+ self.rated_power = rated_power # 额定功率1500kw,可改为2000kw
|
|
|
+ self.cut_out_speed = cut_out_speed
|
|
|
+
|
|
|
+ if self.rated_power is None:
|
|
|
+ logger.info(f"{wind_turbine_number} WARNING:rated_power配置为空的")
|
|
|
+ self.rated_power = 1500
|
|
|
+
|
|
|
+ if self.cut_out_speed is None:
|
|
|
+ logger.info(f"{cut_out_speed} WARNING:cut_out_speed配置为空的")
|
|
|
+ self.cut_out_speed = 20
|
|
|
+
|
|
|
+ self.df = origin_df
|
|
|
+
|
|
|
+ def identifier(self):
|
|
|
+ # 风速 和 有功功率 df
|
|
|
+ # self.df = self.df[[self.wind_velocity, self.active_power, "pitch_angle_blade_1"]]
|
|
|
+ self.df.reset_index(inplace=True)
|
|
|
+
|
|
|
+ if len(self.df[self.active_power].unique()) <= 10:
|
|
|
+ self.df['lab'] = -1
|
|
|
+ return self.df
|
|
|
+
|
|
|
+ wind_and_power_df_count = self.df.shape[0]
|
|
|
+ power_max = self.df[self.active_power].max()
|
|
|
+ power_rated = np.ceil(power_max / 100) * 100
|
|
|
+ v_cut_out = self.cut_out_speed
|
|
|
+ # 网格法确定风速风向分区数量,功率方向分区数量,
|
|
|
+ power_bin_count = int(np.ceil(power_rated / 25)) # 功率分区间隔25kW
|
|
|
+ velocity_bin_count = int(np.ceil(v_cut_out / 0.25)) # 风速分区间隔0.25m/s
|
|
|
+
|
|
|
+ # 存储功率大于零的运行数据
|
|
|
+ power_gt_zero_array = np.zeros([wind_and_power_df_count, 2], dtype=float)
|
|
|
+ power_gt_zero_array_count = 0
|
|
|
+ for i in range(wind_and_power_df_count):
|
|
|
+ if self.df.loc[i, self.active_power] > 0:
|
|
|
+ power_gt_zero_array[power_gt_zero_array_count, 0] = self.df.loc[i, self.wind_velocity]
|
|
|
+ power_gt_zero_array[power_gt_zero_array_count, 1] = self.df.loc[i, self.active_power]
|
|
|
+
|
|
|
+ power_gt_zero_array_count = power_gt_zero_array_count + 1
|
|
|
+
|
|
|
+ # 统计各网格落入的散点个数
|
|
|
+ x_box_number = np.zeros([power_bin_count, velocity_bin_count], dtype=int)
|
|
|
+
|
|
|
+ n_which_p = -1
|
|
|
+ n_which_v = -1
|
|
|
+ for i in range(power_gt_zero_array_count):
|
|
|
+ for m in range(power_bin_count):
|
|
|
+ if m * 25 < power_gt_zero_array[i, 1] <= (m + 1) * 25:
|
|
|
+ n_which_p = m
|
|
|
+ break
|
|
|
+
|
|
|
+ for n in range(velocity_bin_count):
|
|
|
+ if (n * 0.25 + 0.125) < power_gt_zero_array[i, 0] <= ((n + 1) * 0.25 + 0.125):
|
|
|
+ n_which_v = n
|
|
|
+ break
|
|
|
+
|
|
|
+ if n_which_p > -1 and n_which_v > -1:
|
|
|
+ x_box_number[n_which_p, n_which_v] = x_box_number[n_which_p, n_which_v] + 1
|
|
|
+
|
|
|
+ # 在功率方向将网格内散点绝对个数转换为相对百分比,备用
|
|
|
+ power_box_percent = np.zeros([power_bin_count, velocity_bin_count], dtype=float)
|
|
|
+
|
|
|
+ # 功率方向统计
|
|
|
+ power_bin_sum = np.zeros(power_bin_count, dtype=int)
|
|
|
+
|
|
|
+ for i in range(power_bin_count):
|
|
|
+ power_bin_sum[i] = sum(x_box_number[i, :])
|
|
|
+ # for m in range(velocity_bin_count):
|
|
|
+ # power_bin_sum[i] = power_bin_sum[i] + x_box_number[i, m]
|
|
|
+
|
|
|
+ for m in range(velocity_bin_count):
|
|
|
+ if power_bin_sum[i] > 0:
|
|
|
+ power_box_percent[i, m] = x_box_number[i, m] / power_bin_sum[i] * 100
|
|
|
+
|
|
|
+ # 在风速方向将网格内散点绝对个数转换为相对百分比,备用
|
|
|
+ v_box_percent = np.zeros([power_bin_count, velocity_bin_count], dtype=float)
|
|
|
+ v_bin_sum = np.zeros(velocity_bin_count, dtype=int)
|
|
|
+
|
|
|
+ for i in range(velocity_bin_count):
|
|
|
+ v_bin_sum[i] = sum(x_box_number[:, i])
|
|
|
+ # for m in range(power_bin_count):
|
|
|
+ # v_bin_sum[i] = v_bin_sum[i] + x_box_number[m, i]
|
|
|
+
|
|
|
+ for m in range(power_bin_count):
|
|
|
+ if v_bin_sum[i] > 0:
|
|
|
+ v_box_percent[m, i] = x_box_number[m, i] / v_bin_sum[i] * 100
|
|
|
+
|
|
|
+ # 以水平功率带方向为准,分析每个水平功率带中,功率主带中心,即找百分比最大的网格位置。
|
|
|
+ p_box_max_index = np.zeros(power_bin_count, dtype=int) # 水平功率带最大网格位置索引
|
|
|
+ p_box_max_p = np.zeros(power_bin_count, dtype=int) # 水平功率带最大网格百分比
|
|
|
+
|
|
|
+ for m in range(power_bin_count):
|
|
|
+ # 确定每一水平功率带的最大网格位置索引即百分比值
|
|
|
+ p_box_max_p[m], p_box_max_index[m] = power_box_percent[m, :].max(), power_box_percent[m, :].argmax()
|
|
|
+
|
|
|
+ # 切入风速特殊处理,如果切入风速过于偏右,向左拉回
|
|
|
+ # todo 为什么第一行数据的索引值 > 14个就要往左拉回,还有是不是不叫切入风速,这个是 落入这个区间功率最多的个数的索引值
|
|
|
+ if p_box_max_index[0] > 14:
|
|
|
+ p_box_max_index[0] = 9
|
|
|
+
|
|
|
+ # 以水平功率带方向为基准,进行分析
|
|
|
+ dot_dense_left_right = np.zeros([power_bin_count, 2], dtype=int) # 存储每一水平功率带的功率主带以最大网格为中心,向向左,向右扩展的网格数
|
|
|
+ dot_valve = 90 # 从中心向左右对称扩展网格的散点百分比和的阈值。
|
|
|
+
|
|
|
+ for i in range(power_bin_count - 6): # 从最下层水平功率带1开始,向上到第PNum-6个水平功率带(额定功率一下水平功率带),逐一分析
|
|
|
+ p_dot_dense_sum = p_box_max_p[i] # 以中心最大水平功率带为基准,向左向右对称扩展网格,累加各网格散点百分比
|
|
|
+ i_spread_right = 1
|
|
|
+ i_spread_left = 1
|
|
|
+ while p_dot_dense_sum < dot_valve:
|
|
|
+
|
|
|
+ if (p_box_max_index[i] + i_spread_right) < velocity_bin_count - 1:
|
|
|
+ # 向右侧扩展
|
|
|
+ p_dot_dense_sum = p_dot_dense_sum + power_box_percent[i, p_box_max_index[i] + i_spread_right]
|
|
|
+ i_spread_right = i_spread_right + 1
|
|
|
+
|
|
|
+ if (p_box_max_index[i] + i_spread_right) > velocity_bin_count - 1:
|
|
|
+ break
|
|
|
+
|
|
|
+ if (p_box_max_index[i] - i_spread_left) > 0:
|
|
|
+ # 向左侧扩展
|
|
|
+ p_dot_dense_sum = p_dot_dense_sum + power_box_percent[i, p_box_max_index[i] - i_spread_left]
|
|
|
+ i_spread_left = i_spread_left + 1
|
|
|
+
|
|
|
+ if (p_box_max_index[i] - i_spread_left) <= 0:
|
|
|
+ break
|
|
|
+
|
|
|
+ i_spread_right = i_spread_right - 1
|
|
|
+ i_spread_left = i_spread_left - 1
|
|
|
+ # 向左右对称扩展完毕
|
|
|
+
|
|
|
+ dot_dense_left_right[i, 0] = i_spread_left
|
|
|
+ dot_dense_left_right[i, 1] = i_spread_right
|
|
|
+
|
|
|
+ main_band_right = np.median(dot_dense_left_right[:, 1])
|
|
|
+
|
|
|
+ # 散点向右显著延展分布的水平功率带为限功率水平带
|
|
|
+ # 各水平功率带是否为限功率标识,==1:是;==0:不是
|
|
|
+ power_limit = np.zeros([power_bin_count, 1], dtype=int)
|
|
|
+ width_average = 0 # 功率主带平均宽度
|
|
|
+
|
|
|
+ # todo 限功率主带判别阈值为什么要加3
|
|
|
+ power_limit_valve = np.ceil(main_band_right) + 3 # 限功率主带判别阈值
|
|
|
+ n_counter = 0
|
|
|
+
|
|
|
+ for i in range(power_bin_count - 6):
|
|
|
+ # 如果向右扩展网格数大于阈值,且该水平功率带点总数>20,是限功率
|
|
|
+ if dot_dense_left_right[i, 1] > power_limit_valve and power_bin_sum[i] > 20:
|
|
|
+ power_limit[i] = 1
|
|
|
+
|
|
|
+ if dot_dense_left_right[i, 1] <= power_limit_valve:
|
|
|
+ # 统计正常水平功率带右侧宽度
|
|
|
+ width_average = width_average + dot_dense_left_right[i, 1]
|
|
|
+ n_counter = n_counter + 1
|
|
|
+
|
|
|
+ width_average = width_average / n_counter # 功率主带平均宽度
|
|
|
+
|
|
|
+ # 对限负荷水平功率带的最大网格较下面相邻层显著偏右,拉回
|
|
|
+ for i in range(1, power_bin_count - 6):
|
|
|
+ if power_limit[i] == 1 and abs(p_box_max_index[i] - p_box_max_index[i - 1]) > 5:
|
|
|
+ p_box_max_index[i] = p_box_max_index[i - 1] + 1
|
|
|
+
|
|
|
+ # 功率主带的右边界
|
|
|
+ curve_width = int(np.ceil(width_average) + 2)
|
|
|
+
|
|
|
+ # 数据异常需要剔除的网格标识,标识1:功率主带右侧的欠发网格;2:功率主带左侧的超发网格 3:额定功率以上的超发点
|
|
|
+ b_box_remove = np.zeros([power_bin_count, velocity_bin_count], dtype=int)
|
|
|
+
|
|
|
+ for m in range(power_bin_count - 6):
|
|
|
+ for n in range(p_box_max_index[m] + curve_width, velocity_bin_count):
|
|
|
+ b_box_remove[m, n] = 1
|
|
|
+
|
|
|
+ for n in range(p_box_max_index[m] - curve_width, -1, -1):
|
|
|
+ b_box_remove[m, n] = 2
|
|
|
+
|
|
|
+ # 确定功率主带的左上拐点,即额定风速位置的网格索引
|
|
|
+ curve_top = np.zeros(2, dtype=int)
|
|
|
+ curve_top_valve = 3 # 网格的百分比阈值
|
|
|
+ b_top_find = False
|
|
|
+ for m in range(power_bin_count - 5, -1, -1):
|
|
|
+ for n in range(velocity_bin_count):
|
|
|
+ # 如左上角网格的百分比和散点个数大于阈值。
|
|
|
+ if v_box_percent[m, n] > curve_top_valve and x_box_number[m, n] >= 10:
|
|
|
+ curve_top[0] = m
|
|
|
+ curve_top[1] = n
|
|
|
+ b_top_find = True
|
|
|
+ break
|
|
|
+
|
|
|
+ if b_top_find:
|
|
|
+ break
|
|
|
+
|
|
|
+ isolate_valve = 3
|
|
|
+ for m in range(power_bin_count - 6):
|
|
|
+ for n in range(p_box_max_index[m] + curve_width, velocity_bin_count):
|
|
|
+ if power_box_percent[m, n] < isolate_valve:
|
|
|
+ b_box_remove[m, n] = 1
|
|
|
+
|
|
|
+ # 功率主带顶部宽度
|
|
|
+ curve_width_t = 2
|
|
|
+ for m in range(power_bin_count - curve_width_t - 1, power_bin_count):
|
|
|
+ for n in range(velocity_bin_count):
|
|
|
+ b_box_remove[m, n] = 3 # 网格为额定功率以上的超发点
|
|
|
+
|
|
|
+ # 功率主带拐点左侧的欠发网格标识
|
|
|
+ for m in range(power_bin_count - 5 - 1, power_bin_count):
|
|
|
+ for n in range(curve_top[1] - 1):
|
|
|
+ b_box_remove[m, n] = 2
|
|
|
+
|
|
|
+ # 以网格的标识,决定该网格内数据的标识。dzwind_and_power_sel。散点在哪个网格,此网格的标识即为该点的标识
|
|
|
+ # -1:停机 0:好点 1:欠发功率点;2:超发功率点;3:额定风速以上的超发功率点 4: 限电
|
|
|
+ dzwind_and_power_sel = np.zeros(power_gt_zero_array_count, dtype=int)
|
|
|
+ n_which_p = -1
|
|
|
+ n_which_v = -1
|
|
|
+
|
|
|
+ for i in range(power_gt_zero_array_count):
|
|
|
+ for m in range(power_bin_count):
|
|
|
+ if m * 25 < power_gt_zero_array[i, 1] <= (m + 1) * 25:
|
|
|
+ n_which_p = m
|
|
|
+ break
|
|
|
+
|
|
|
+ for n in range(velocity_bin_count):
|
|
|
+ if (n * 0.25 + 0.125) < power_gt_zero_array[i, 0] <= ((n + 1) * 0.25 + 0.125):
|
|
|
+ n_which_v = n
|
|
|
+ break
|
|
|
+
|
|
|
+ if n_which_p > -1 and n_which_v > -1:
|
|
|
+ if b_box_remove[n_which_p, n_which_v] == 1:
|
|
|
+ dzwind_and_power_sel[i] = 1
|
|
|
+
|
|
|
+ if b_box_remove[n_which_p, n_which_v] == 2:
|
|
|
+ dzwind_and_power_sel[i] = 2
|
|
|
+
|
|
|
+ if b_box_remove[n_which_p, n_which_v] == 3:
|
|
|
+ dzwind_and_power_sel[i] = 0 # 3 # 额定风速以上的超发功率点认为是正常点,不再标识。
|
|
|
+
|
|
|
+ # 限负荷数据标识方法2:把数据切割为若干个窗口。对每一窗口,以第一个点为基准,连续nWindowLength个数据的功率在方差范围内,呈现显著水平分布的点
|
|
|
+ n_window_length = 3
|
|
|
+ limit_window = np.zeros(n_window_length, dtype=float)
|
|
|
+ power_std = 15 # 功率波动方差
|
|
|
+ n_window_num = int(np.floor(power_gt_zero_array_count / n_window_length))
|
|
|
+ power_limit_up = self.rated_power - 300
|
|
|
+ power_limit_low = 200
|
|
|
+ for i in range(n_window_num):
|
|
|
+ for j in range(n_window_length):
|
|
|
+ limit_window[j] = power_gt_zero_array[i * n_window_length + j, 1]
|
|
|
+
|
|
|
+ b_all_in_areas = 1
|
|
|
+ for j in range(n_window_length):
|
|
|
+ if limit_window[j] < power_limit_low or limit_window[j] > power_limit_up:
|
|
|
+ b_all_in_areas = 0
|
|
|
+
|
|
|
+ if b_all_in_areas == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ up_limit = limit_window[0] + power_std
|
|
|
+ low_limit = limit_window[0] - power_std
|
|
|
+ b_all_in_up_low = 1
|
|
|
+ for j in range(1, n_window_length):
|
|
|
+ if limit_window[j] < low_limit or limit_window[j] > up_limit:
|
|
|
+ b_all_in_up_low = 0
|
|
|
+
|
|
|
+ if b_all_in_up_low == 1:
|
|
|
+ for j in range(n_window_length):
|
|
|
+ dzwind_and_power_sel[i * n_window_length + j] = 4 # 标识窗口内的数据为限负荷数据
|
|
|
+
|
|
|
+ for i in range(power_bin_count - 6):
|
|
|
+ pv_left_down = np.zeros(2, dtype=float)
|
|
|
+ pv_right_up = np.zeros(2, dtype=float)
|
|
|
+
|
|
|
+ if (p_box_max_index[i + 1] - p_box_max_index[i]) >= 1:
|
|
|
+ pv_left_down[0] = (p_box_max_index[i] + curve_width) * 0.25 + 0.125
|
|
|
+ pv_left_down[1] = i * 25
|
|
|
+
|
|
|
+ pv_right_up[0] = (p_box_max_index[i + 1] + curve_width) * 0.25 + 0.125
|
|
|
+ pv_right_up[1] = (i + 1) * 25
|
|
|
+
|
|
|
+ for m in range(power_gt_zero_array_count):
|
|
|
+ if pv_left_down[0] < power_gt_zero_array[m, 0] < pv_right_up[0] and \
|
|
|
+ pv_left_down[1] < power_gt_zero_array[m, 1] < pv_right_up[1]: # 在该锯齿中
|
|
|
+ if (power_gt_zero_array[m, 1] - pv_left_down[1]) / (
|
|
|
+ power_gt_zero_array[m, 0] - pv_left_down[0]) > (
|
|
|
+ pv_right_up[1] - pv_left_down[1]) / (
|
|
|
+ pv_right_up[0] - pv_left_down[0]): # 斜率大于对角连线,则在锯齿左上三角形中,选中
|
|
|
+ dzwind_and_power_sel[m] = 0
|
|
|
+
|
|
|
+ self.df.loc[:, 'lab'] = -1
|
|
|
+ self.df.loc[
|
|
|
+ self.df[self.df[self.active_power] > 0].index, 'lab'] = dzwind_and_power_sel
|
|
|
+
|
|
|
+ # 把部分欠发的优化为限电
|
|
|
+ # 构建条件表达式
|
|
|
+ cond1 = (self.df['lab'] == 1) & (
|
|
|
+ (self.df[self.active_power] < self.rated_power * 0.75) &
|
|
|
+ (self.df[self.pitch_angle_blade] > 0.5)
|
|
|
+ )
|
|
|
+ cond2 = (self.df['lab'] == 1) & (
|
|
|
+ (self.df[self.active_power] < self.rated_power * 0.85) &
|
|
|
+ (self.df[self.pitch_angle_blade] > 1.5)
|
|
|
+ )
|
|
|
+ cond3 = (self.df['lab'] == 1) & (
|
|
|
+ (self.df[self.active_power] < self.rated_power * 0.9) &
|
|
|
+ (self.df[self.pitch_angle_blade] > 2.5)
|
|
|
+ )
|
|
|
+
|
|
|
+ # 使用逻辑或操作符|合并条件
|
|
|
+ combined_condition = cond1 | cond2 | cond3
|
|
|
+ self.df.loc[combined_condition, 'lab'] = 4
|
|
|
+
|
|
|
+ self.df.loc[self.df[self.active_power] <= 0, 'lab'] = -1
|
|
|
+
|
|
|
+ self.df.reset_index(drop=True, inplace=True)
|
|
|
+ if 'index' in self.df.columns:
|
|
|
+ del self.df['index']
|
|
|
+ return self.df
|
|
|
+
|
|
|
+ def run(self):
|
|
|
+ # Implement your class identification logic here
|
|
|
+ begin = datetime.datetime.now()
|
|
|
+ logger.info(f"打标签开始,风机号:{self.wind_turbine_number},数量:{self.df.shape}")
|
|
|
+ try:
|
|
|
+ df = self.identifier()
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(traceback.format_exc())
|
|
|
+ # message = str(e) + ',风机编号:' + self.wind_turbine_number
|
|
|
+ # raise Exception('打标签失败:' + message)
|
|
|
+ self.df.loc[:, 'lab'] = -999
|
|
|
+ return self.df
|
|
|
+ logger.info(f"打标签结束,{df.shape},耗时:{datetime.datetime.now() - begin}")
|
|
|
+ return df
|