před 11 měsíci · 2cacf26527
--- a/app_run.py
+++ b/app_run.py
@@ -137,6 +137,6 @@ if __name__ == '__main__':
 
				 
			
 
				     run_schedule(run_count=run_count)
			
 
				 
			
 
				-    # run_local(4, 4, batch_no='WOF035200003-WOB000004', batch_name='MMFDC_second0718', transfer_type='second',
			
 
				-    #            transfer_file_addr=r'/data/download/collection_data/1进行中/密马风电场-山西-大唐/收资数据/scada/秒级数据', field_name='密马风电场',
			
 
				-    #            field_code="WOF093400005", save_db=True)
			
 
				+    # run_local(4, 4, batch_no='WOF035200003-WOB000005', batch_name='MM14号机组0719', transfer_type='second',
			
 
				+    #            transfer_file_addr=r'/data/download/collection_data/1进行中/密马风电场-山西-大唐/收资数据/scada/14号/sec', field_name='密马风电场',
			
 
				+    #            field_code="WOF035200003", save_db=True)
			
--- a/etl/base/WindFarms.py
+++ b/etl/base/WindFarms.py
@@ -8,6 +8,7 @@ import pandas as pd
 
				 
			
 
				 from etl.base.PathsAndTable import PathsAndTable
			
 
				 from etl.base.TransParam import TransParam
			
 
				+from etl.step.ClassIdentifier import ClassIdentifier
			
 
				 from etl.step.ClearData import ClearData
			
 
				 from etl.step.ReadAndSaveTmp import ReadAndSaveTmp
			
 
				 from etl.step.SaveToDb import SaveToDb
			
@@ -31,7 +32,7 @@ class WindFarms(object):
 
				         self.save_zip = False
			
 
				         self.trans_param = params
			
 
				         self.exist_wind_names = multiprocessing.Manager().list()
			
 
				-        self.wind_col_trans = get_all_wind(self.field_code)
			
 
				+        self.wind_col_trans, self.rated_power_map = get_all_wind(self.field_code)
			
 
				         self.batch_count = 50000
			
 
				         self.save_path = None
			
 
				         self.save_db = save_db
			
@@ -57,13 +58,13 @@ class WindFarms(object):
 
				             unzipAndRemove.run()
			
 
				 
			
 
				         if step <= 2 and end >= 2:
			
 
				-            # 更新运行状态到运行中
			
 
				             readAndSaveTmp = ReadAndSaveTmp(self.pathsAndTable, self.trans_param)
			
 
				             readAndSaveTmp.run()
			
 
				 
			
 
				         if step <= 3 and end >= 3:
			
 
				             # 保存到正式文件
			
 
				-            statisticsAndSaveFile = StatisticsAndSaveFile(self.pathsAndTable, self.trans_param, self.statistics_map)
			
 
				+            statisticsAndSaveFile = StatisticsAndSaveFile(self.pathsAndTable, self.trans_param, self.statistics_map,
			
 
				+                                                          self.rated_power_map)
			
 
				             statisticsAndSaveFile.run()
			
 
				 
			
 
				         if step <= 4 and end >= 4:
			
--- a/etl/step/ClassIdentifier.py
+++ b/etl/step/ClassIdentifier.py
@@ -0,0 +1,404 @@
 
				+import os
			
 
				+
			
 
				+import numpy as np
			
 
				+from pandas import DataFrame
			
 
				+
			
 
				+from utils.draw.draw_file import scatter
			
 
				+from utils.file.trans_methods import read_file_to_df
			
 
				+from utils.log.trans_log import trans_print
			
 
				+
			
 
				+
			
 
				+class ClassIdentifier(object):
			
 
				+    """
			
 
				+    分类标识 -1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, wind_turbine_number=None, origin_df: DataFrame = None,
			
 
				+                 wind_velocity='wind_velocity',
			
 
				+                 active_power='active_power',
			
 
				+                 pitch_angle_blade='pitch_angle_blade_1',
			
 
				+                 rated_power=1500, file_path: str = None):
			
 
				+        """
			
 
				+        :param file_path: The file path of the input data.
			
 
				+        :param origin_df: The pandas DataFrame containing the input data.
			
 
				+        :param wind_velocity: 风速字段
			
 
				+        :param active_power: 有功功率字段
			
 
				+        :param pitch_angle_blade: 桨距角
			
 
				+        :param rated_power: 额定功率
			
 
				+        """
			
 
				+        self.wind_velocity = wind_velocity
			
 
				+        self.active_power = active_power
			
 
				+        self.pitch_angle_blade = pitch_angle_blade
			
 
				+        self.rated_power = rated_power  # 额定功率1500kw,可改为2000kw
			
 
				+
			
 
				+        if self.rated_power is None:
			
 
				+            trans_print(wind_turbine_number, "WARNING:rated_power配置为空的")
			
 
				+            self.rated_power = 1500
			
 
				+
			
 
				+        if file_path is None and origin_df is None:
			
 
				+            raise ValueError("Either file_path or origin_df should be provided.")
			
 
				+
			
 
				+        if file_path:
			
 
				+            self.df = read_file_to_df(file_path)
			
 
				+        else:
			
 
				+            self.df = origin_df
			
 
				+
			
 
				+    def identifier(self):
			
 
				+        # 风速 和 有功功率 df
			
 
				+        # wind_and_power_df = self.df[[self.wind_velocity, self.active_power, "pitch_angle_blade_1"]]
			
 
				+        wind_and_power_df = self.df
			
 
				+        wind_and_power_df.reset_index(inplace=True)
			
 
				+        wind_and_power_df_count = wind_and_power_df.shape[0]
			
 
				+        power_max = wind_and_power_df[self.active_power].max()
			
 
				+        power_rated = np.ceil(power_max / 100) * 100
			
 
				+        v_cut_out = 25
			
 
				+        # 网格法确定风速风向分区数量，功率方向分区数量，
			
 
				+        p_num = int(np.ceil(power_rated / 25))  # 功率分区间隔25kW
			
 
				+        v_num = int(np.ceil(v_cut_out / 0.25))  # 风速分区间隔0.25m/s
			
 
				+
			
 
				+        # 存储功率大于零的运行数据
			
 
				+        dz_march = np.zeros([wind_and_power_df_count, 2], dtype=float)
			
 
				+        n_counter1 = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.active_power] > 0:
			
 
				+                dz_march[n_counter1, 0] = wind_and_power_df.loc[i, self.wind_velocity]
			
 
				+                dz_march[n_counter1, 1] = wind_and_power_df.loc[i, self.active_power]
			
 
				+
			
 
				+                n_counter1 = n_counter1 + 1
			
 
				+
			
 
				+        # 统计各网格落入的散点个数
			
 
				+        if v_num == 1:
			
 
				+            x_box_number = np.ones([p_num], dtype=int)
			
 
				+        else:
			
 
				+            x_box_number = np.ones([p_num, v_num], dtype=int)
			
 
				+        n_which_p = -1
			
 
				+        n_which_v = -1
			
 
				+        for i in range(n_counter1):
			
 
				+            for m in range(p_num):
			
 
				+                if m * 25 < dz_march[i, 1] <= (m + 1) * 25:
			
 
				+                    n_which_p = m
			
 
				+                    break
			
 
				+            for n in range(v_num):
			
 
				+                if ((n + 1) * 0.25 - 0.125) < dz_march[i, 0] <= ((n + 1) * 0.25 + 0.125):
			
 
				+                    n_which_v = n
			
 
				+                    break
			
 
				+
			
 
				+            if n_which_p > -1 and n_which_v > -1:
			
 
				+                x_box_number[n_which_p, n_which_v] = x_box_number[n_which_p, n_which_v] + 1
			
 
				+
			
 
				+        for m in range(p_num):
			
 
				+            for n in range(v_num):
			
 
				+                x_box_number[m, n] = x_box_number[m, n] - 1
			
 
				+
			
 
				+        # 在功率方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        p_box_percent = np.zeros([p_num, v_num], dtype=float)
			
 
				+        p_bin_sum = np.zeros(p_num, dtype=int)
			
 
				+
			
 
				+        for i in range(p_num):
			
 
				+            for m in range(v_num):
			
 
				+                p_bin_sum[i] = p_bin_sum[i] + x_box_number[i, m]
			
 
				+
			
 
				+            for m in range(v_num):
			
 
				+                if p_bin_sum[i] > 0:
			
 
				+                    p_box_percent[i, m] = x_box_number[i, m] / p_bin_sum[i] * 100
			
 
				+
			
 
				+        # 在风速方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        v_box_percent = np.zeros([p_num, v_num], dtype=float)
			
 
				+        v_bin_sum = np.zeros(v_num, dtype=int)
			
 
				+
			
 
				+        for i in range(v_num):
			
 
				+            for m in range(p_num):
			
 
				+                v_bin_sum[i] = v_bin_sum[i] + x_box_number[m, i]
			
 
				+
			
 
				+            for m in range(p_num):
			
 
				+                if v_bin_sum[i] > 0:
			
 
				+                    v_box_percent[m, i] = x_box_number[m, i] / v_bin_sum[i] * 100
			
 
				+
			
 
				+        # 以水平功率带方向为准，分析每个水平功率带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        p_box_max_index = np.zeros(p_num, dtype=int)  # 水平功率带最大网格位置索引
			
 
				+        p_box_max_p = np.zeros(p_num, dtype=int)  # 水平功率带最大网格百分比
			
 
				+
			
 
				+        for m in range(p_num):
			
 
				+            # 确定每一水平功率带的最大网格位置索引即百分比值
			
 
				+            p_box_max_p[m], p_box_max_index[m] = p_box_percent[m, :].max(), p_box_percent[m, :].argmax()
			
 
				+
			
 
				+        # 以垂直风速方向为准，分析每个垂直风速带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        v_box_max_index = np.zeros(v_num, dtype=int)
			
 
				+        v_box_max_v = np.zeros(v_num, dtype=int)
			
 
				+
			
 
				+        for m in range(v_num):
			
 
				+            [v_box_max_v[m], v_box_max_index[m]] = v_box_percent[:, m].max(), v_box_percent[:, m].argmax()
			
 
				+
			
 
				+        # 切入风速特殊处理，如果切入风速过于偏右，向左拉回
			
 
				+        if p_box_max_index[0] > 14:
			
 
				+            p_box_max_index[0] = 9
			
 
				+
			
 
				+        # 以水平功率带方向为基准，进行分析
			
 
				+        dot_dense = np.zeros(p_num, dtype=int)  # 每一水平功率带的功率主带包含的网格数
			
 
				+        dot_dense_left_right = np.zeros([p_num, 2], dtype=int)  # 存储每一水平功率带的功率主带以最大网格为中心，向向左，向右扩展的网格数
			
 
				+        dot_valve = 90  # 从中心向左右对称扩展网格的散点百分比和的阈值。
			
 
				+
			
 
				+        for i in range(p_num - 6):  # 从最下层水平功率带1开始，向上到第PNum-6个水平功率带（额定功率一下水平功率带），逐一分析
			
 
				+            p_dot_dense_sum = p_box_max_p[i]  # 以中心最大水平功率带为基准，向左向右对称扩展网格，累加各网格散点百分比
			
 
				+            i_spread_right = 1
			
 
				+            i_spread_left = 1
			
 
				+            while p_dot_dense_sum < dot_valve:
			
 
				+
			
 
				+                if (p_box_max_index[i] + i_spread_right) < v_num - 1:
			
 
				+                    p_dot_dense_sum = p_dot_dense_sum + p_box_percent[i, p_box_max_index[i] + i_spread_right]  # 向右侧扩展
			
 
				+                    i_spread_right = i_spread_right + 1
			
 
				+
			
 
				+                if (p_box_max_index[i] + i_spread_right) > v_num - 1:
			
 
				+                    break
			
 
				+
			
 
				+                if (p_box_max_index[i] - i_spread_left) > 0:
			
 
				+                    p_dot_dense_sum = p_dot_dense_sum + p_box_percent[i, p_box_max_index[i] - i_spread_left]  # 向左侧扩展
			
 
				+                    i_spread_left = i_spread_left + 1
			
 
				+
			
 
				+                if (p_box_max_index[i] - i_spread_left) <= 0:
			
 
				+                    break
			
 
				+
			
 
				+            i_spread_right = i_spread_right - 1
			
 
				+            i_spread_left = i_spread_left - 1
			
 
				+            # 向左右对称扩展完毕
			
 
				+
			
 
				+            dot_dense_left_right[i, 0] = i_spread_left
			
 
				+            dot_dense_left_right[i, 1] = i_spread_right
			
 
				+            dot_dense[i] = i_spread_left + i_spread_right + 1
			
 
				+
			
 
				+        # 各行功率主带右侧宽度的中位数最具有代表性
			
 
				+        dot_dense_width_left = np.zeros([p_num - 6, 1], dtype=int)
			
 
				+        for i in range(p_num - 6):
			
 
				+            dot_dense_width_left[i] = dot_dense_left_right[i, 1]
			
 
				+
			
 
				+        main_band_right = np.median(dot_dense_width_left)
			
 
				+
			
 
				+        # 散点向右显著延展分布的水平功率带为限功率水平带
			
 
				+        power_limit = np.zeros([p_num, 1], dtype=int)  # 各水平功率带是否为限功率标识，==1：是；==0：不是
			
 
				+        width_average = 0  # 功率主带平均宽度
			
 
				+        width_var = 0  # 功率主带方差
			
 
				+        # power_limit_valve = 6    #限功率主带判别阈值
			
 
				+        power_limit_valve = np.ceil(main_band_right) + 3  # 限功率主带判别阈值
			
 
				+
			
 
				+        n_counter_limit = 0
			
 
				+        n_counter = 0
			
 
				+
			
 
				+        for i in range(p_num - 6):
			
 
				+            if dot_dense_left_right[i, 1] > power_limit_valve and p_bin_sum[i] > 20:  # 如果向右扩展网格数大于阈值，且该水平功率带点总数>20，是
			
 
				+                power_limit[i] = 1
			
 
				+                n_counter_limit = n_counter_limit + 1
			
 
				+
			
 
				+            if dot_dense_left_right[i, 1] <= power_limit_valve:
			
 
				+                width_average = width_average + dot_dense_left_right[i, 1]  # 统计正常水平功率带右侧宽度
			
 
				+                n_counter = n_counter + 1
			
 
				+
			
 
				+        width_average = width_average / n_counter  # 功率主带平均宽度
			
 
				+
			
 
				+        # 各水平功率带的功率主带宽度的方差，反映从下到上宽度是否一致，或是否下宽上窄等异常情况
			
 
				+        for i in range(p_num - 6):
			
 
				+            if dot_dense_left_right[i, 1] <= power_limit_valve:
			
 
				+                width_var = width_var + (dot_dense_left_right[i, 1] - width_average) * (
			
 
				+                        dot_dense_left_right[i, 1] - width_average)
			
 
				+
			
 
				+        # 对限负荷水平功率带的最大网格较下面相邻层显著偏右，拉回
			
 
				+        for i in range(1, p_num - 6):
			
 
				+            if power_limit[i] == 1 and abs(p_box_max_index[i] - p_box_max_index[i - 1]) > 5:
			
 
				+                p_box_max_index[i] = p_box_max_index[i - 1] + 1
			
 
				+
			
 
				+        # 输出各层功率主带的左右边界网格索引
			
 
				+        dot_dense_inverse = np.zeros([p_num, 2], dtype=int)
			
 
				+
			
 
				+        for i in range(p_num):
			
 
				+            dot_dense_inverse[i, :] = dot_dense_left_right[p_num - i - 1, :]
			
 
				+
			
 
				+        # 功率主带的右边界
			
 
				+        curve_width_r = int(np.ceil(width_average) + 2)
			
 
				+
			
 
				+        # curve_width_l = 6    #功率主带的左边界
			
 
				+        curve_width_l = curve_width_r
			
 
				+
			
 
				+        b_box_limit = np.zeros([p_num, v_num], dtype=int)  # 网格是否为限功率网格的标识，如果为限功率水平功率带，从功率主带右侧边缘向右的网格为限功率网格
			
 
				+        for i in range(2, p_num - 6):
			
 
				+            if power_limit[i] == 1:
			
 
				+                for j in range(p_box_max_index[i] + curve_width_r, v_num):
			
 
				+                    b_box_limit[i, j] = 1
			
 
				+
			
 
				+        b_box_remove = np.zeros([p_num, v_num], dtype=int)  # 数据异常需要剔除的网格标识，标识==1：功率主带右侧的欠发网格；==2：功率主带左侧的超发网格
			
 
				+        for m in range(p_num - 6):
			
 
				+            for n in range(p_box_max_index[m] + curve_width_r, v_num):
			
 
				+                b_box_remove[m, n] = 1
			
 
				+
			
 
				+            for n in range(p_box_max_index[m] - curve_width_l, -1, -1):
			
 
				+                b_box_remove[m, n] = 2
			
 
				+
			
 
				+        # 确定功率主带的左上拐点，即额定风速位置的网格索引
			
 
				+        curve_top = np.zeros(2, dtype=int)
			
 
				+        curve_top_valve = 3  # 网格的百分比阈值
			
 
				+        b_top_find = 0
			
 
				+        for m in range(p_num - 4 - 1, -1, -1):
			
 
				+            for n in range(v_num):
			
 
				+                if v_box_percent[m, n] > curve_top_valve and x_box_number[m, n] >= 10:  # 如左上角网格的百分比和散点个数大于阈值。
			
 
				+                    curve_top[0] = m
			
 
				+                    curve_top[1] = n
			
 
				+                    b_top_find = 1
			
 
				+                    break
			
 
				+
			
 
				+            if b_top_find == 1:
			
 
				+                break
			
 
				+
			
 
				+        isolate_valve = 3
			
 
				+        for m in range(p_num - 6):
			
 
				+            for n in range(p_box_max_index[m] + curve_width_r, v_num):
			
 
				+                if p_box_percent[m, n] < isolate_valve:
			
 
				+                    b_box_remove[m, n] = 1
			
 
				+
			
 
				+        # 功率主带顶部宽度
			
 
				+        curve_width_t = 2
			
 
				+        for m in range(p_num - curve_width_t - 1, p_num):
			
 
				+            for n in range(v_num):
			
 
				+                b_box_remove[m, n] = 3  # 网格为额定功率以上的超发点
			
 
				+
			
 
				+        # 功率主带拐点左侧的欠发网格标识
			
 
				+        for m in range(p_num - 5 - 1, p_num):
			
 
				+            for n in range(curve_top[1] - 1):
			
 
				+                b_box_remove[m, n] = 2
			
 
				+
			
 
				+        # 以网格的标识，决定该网格内数据的标识。dzwind_and_power_sel。散点在哪个网格，此网格的标识即为该点的标识
			
 
				+        dzwind_and_power_sel = np.zeros(n_counter1, dtype=int)  # -1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电
			
 
				+        n_which_p = -1
			
 
				+        n_which_v = -1
			
 
				+        n_bad_a = 0
			
 
				+
			
 
				+        for i in range(n_counter1):
			
 
				+            for m in range(p_num):
			
 
				+                if m * 25 < dz_march[i, 1] <= (m + 1) * 25:
			
 
				+                    n_which_p = m
			
 
				+                    break
			
 
				+
			
 
				+            for n in range(v_num):
			
 
				+                if ((n + 1) * 0.25 - 0.125) < dz_march[i, 0] <= ((n + 1) * 0.25 + 0.125):
			
 
				+                    n_which_v = n
			
 
				+                    break
			
 
				+
			
 
				+            if n_which_p > -1 and n_which_v > -1:
			
 
				+
			
 
				+                if b_box_remove[n_which_p, n_which_v] == 1:
			
 
				+                    dzwind_and_power_sel[i] = 1
			
 
				+                    n_bad_a = n_bad_a + 1
			
 
				+
			
 
				+                if b_box_remove[n_which_p, n_which_v] == 2:
			
 
				+                    dzwind_and_power_sel[i] = 2
			
 
				+
			
 
				+                if b_box_remove[n_which_p, n_which_v] == 3:
			
 
				+                    dzwind_and_power_sel[i] = 0  # 3  # 额定风速以上的超发功率点认为是正常点，不再标识。
			
 
				+
			
 
				+        # 限负荷数据标识方法2：把数据切割为若干个窗口。对每一窗口，以第一个点为基准，连续nWindowLength个数据的功率在方差范围内，呈现显著水平分布的点
			
 
				+        n_window_length = 3
			
 
				+        limit_window = np.zeros(n_window_length, dtype=float)
			
 
				+        power_std = 15  # 功率波动方差
			
 
				+        n_window_num = int(np.floor(n_counter1 / n_window_length))
			
 
				+        power_limit_up = self.rated_power - 300
			
 
				+        power_limit_low = 200
			
 
				+        for i in range(n_window_num):
			
 
				+            for j in range(n_window_length):
			
 
				+                limit_window[j] = dz_march[i * n_window_length + j, 1]
			
 
				+
			
 
				+            b_all_in_areas = 1
			
 
				+            for j in range(n_window_length):
			
 
				+                if limit_window[j] < power_limit_low or limit_window[j] > power_limit_up:
			
 
				+                    b_all_in_areas = 0
			
 
				+
			
 
				+            if b_all_in_areas == 0:
			
 
				+                continue
			
 
				+
			
 
				+            up_limit = limit_window[0] + power_std
			
 
				+            low_limit = limit_window[0] - power_std
			
 
				+            b_all_in_up_low = 1
			
 
				+            for j in range(1, n_window_length):
			
 
				+                if limit_window[j] < low_limit or limit_window[j] > up_limit:
			
 
				+                    b_all_in_up_low = 0
			
 
				+
			
 
				+            if b_all_in_up_low == 1:
			
 
				+                for j in range(n_window_length):
			
 
				+                    dzwind_and_power_sel[i * n_window_length + j] = 4  # 标识窗口内的数据为限负荷数据
			
 
				+
			
 
				+        for i in range(p_num - 6):
			
 
				+            pv_left_down = np.zeros(2, dtype=float)
			
 
				+            pv_right_up = np.zeros(2, dtype=float)
			
 
				+
			
 
				+            if (p_box_max_index[i + 1] - p_box_max_index[i]) >= 1:
			
 
				+                pv_left_down[0] = (p_box_max_index[i] + 1 + curve_width_r) * 0.25 - 0.125
			
 
				+                pv_left_down[1] = i * 25
			
 
				+
			
 
				+                pv_right_up[0] = (p_box_max_index[i + 1] + 1 + curve_width_r) * 0.25 - 0.125
			
 
				+                pv_right_up[1] = (i + 1) * 25
			
 
				+
			
 
				+                for m in range(n_counter1):
			
 
				+                    if pv_left_down[0] < dz_march[m, 0] < pv_right_up[0] and pv_left_down[1] < \
			
 
				+                            dz_march[m, 1] < pv_right_up[1]:  # 在该锯齿中
			
 
				+                        if (dz_march[m, 1] - pv_left_down[1]) / (dz_march[m, 0] - pv_left_down[0]) > (
			
 
				+                                pv_right_up[1] - pv_left_down[1]) / (
			
 
				+                                pv_right_up[0] - pv_left_down[0]):  # 斜率大于对角连线，则在锯齿左上三角形中，选中
			
 
				+                            dzwind_and_power_sel[m] = 0
			
 
				+
			
 
				+        wind_and_power_df.loc[:, 'lab'] = -1
			
 
				+        wind_and_power_df.loc[
			
 
				+            wind_and_power_df[wind_and_power_df[self.active_power] > 0].index, 'lab'] = dzwind_and_power_sel
			
 
				+
			
 
				+        # 把部分欠发的优化为限电
			
 
				+        # 构建条件表达式
			
 
				+        cond1 = (wind_and_power_df['lab'] == 1) & (
			
 
				+                (wind_and_power_df[self.active_power] < self.rated_power * 0.75) &
			
 
				+                (wind_and_power_df[self.pitch_angle_blade] > 0.5)
			
 
				+        )
			
 
				+        cond2 = (wind_and_power_df['lab'] == 1) & (
			
 
				+                (wind_and_power_df[self.active_power] < self.rated_power * 0.85) &
			
 
				+                (wind_and_power_df[self.pitch_angle_blade] > 1.5)
			
 
				+        )
			
 
				+        cond3 = (wind_and_power_df['lab'] == 1) & (
			
 
				+                (wind_and_power_df[self.active_power] < self.rated_power * 0.9) &
			
 
				+                (wind_and_power_df[self.pitch_angle_blade] > 2.5)
			
 
				+        )
			
 
				+
			
 
				+        # 使用逻辑或操作符|合并条件
			
 
				+        combined_condition = cond1 | cond2 | cond3
			
 
				+        wind_and_power_df.loc[combined_condition, 'lab'] = 4
			
 
				+
			
 
				+        wind_and_power_df.reset_index(drop=True, inplace=True)
			
 
				+        if 'index' in wind_and_power_df.columns:
			
 
				+            del wind_and_power_df['index']
			
 
				+        return wind_and_power_df
			
 
				+
			
 
				+    def run(self):
			
 
				+        # Implement your class identification logic here
			
 
				+        return self.identifier()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    read_dir = r"D:\data\清理数据\和风元宝山\WOF035200003-WOB000005111_MM14号机组0719\minute"
			
 
				+
			
 
				+    files = [read_dir + os.sep + i for i in os.listdir(read_dir)]
			
 
				+
			
 
				+    for file in files:
			
 
				+        # test = ClassIdentifier(file_path=file,
			
 
				+        #                        wind_velocity='wind_velocity',
			
 
				+        #                        active_power='active_power',
			
 
				+        #                        pitch_angle_blade='pitch_angle_blade_1',
			
 
				+        #                        rated_power=1500
			
 
				+        #                        )
			
 
				+        #
			
 
				+        # df = test.run()
			
 
				+
			
 
				+        name = os.path.basename(file).split('.')[0]
			
 
				+        df = read_file_to_df(file)
			
 
				+
			
 
				+        color_map = {-1: 'red', 0: 'green', 1: 'blue', 2: 'black', 3: 'orange', 4: 'magenta'}
			
 
				+        c = df['lab'].map(color_map)
			
 
				+
			
 
				+        # -1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电
			
 
				+        legend_map = {"停机": 'red', "好点": 'green', "欠发": 'blue', "超发": 'black', "额定风速以上的超发": 'orange', "限电": 'magenta'}
			
 
				+        scatter(name, x_label='风速', y_label='有功功率', x_values=df['wind_velocity'].values,
			
 
				+                y_values=df['active_power'].values, color=c, col_map=legend_map,
			
 
				+                save_file_path=os.path.dirname(
			
 
				+                    os.path.dirname(
			
 
				+                        os.path.dirname(__file__))) + os.sep + "tmp_file" + os.sep + "和风元宝山" + os.sep + name + '结果.png')
			
--- a/etl/step/ReadAndSaveTmp.py
+++ b/etl/step/ReadAndSaveTmp.py
@@ -199,6 +199,12 @@ class ReadAndSaveTmp(object):
 
				                         if len(datas) != 2:
			
 
				                             raise Exception("字段映射出现错误 :" + str(trans_dict))
			
 
				                         df[v] = str(file[int(datas[0]):int(datas[1])]).strip()
			
 
				+                    elif k.startswith("$file.split"):
			
 
				+                        datas = str(k).replace("$file.split(", "").replace(")", "").split(",")
			
 
				+                        split_str = str(datas[0])
			
 
				+                        split_index = int(datas[1])
			
 
				+                        df[v] = str(file.split(split_str)[split_index])
			
 
				+
			
 
				                 elif k.find("$file_date") > 0:
			
 
				                     datas = str(k.split(",")[1].replace("$file_date", "").replace("[", "").replace("]", "")).split(":")
			
 
				                     if len(datas) != 2:
			
--- a/etl/step/StatisticsAndSaveFile.py
+++ b/etl/step/StatisticsAndSaveFile.py
@@ -8,6 +8,7 @@ import numpy as np
 
				 
			
 
				 from etl.base import TransParam
			
 
				 from etl.base.PathsAndTable import PathsAndTable
			
 
				+from etl.step.ClassIdentifier import ClassIdentifier
			
 
				 from service.plt_service import update_trans_transfer_progress
			
 
				 from utils.df_utils.util import get_time_space
			
 
				 from utils.file.trans_methods import create_file_path, read_excel_files, read_file_to_df, split_array
			
@@ -17,11 +18,12 @@ from utils.systeminfo.sysinfo import use_files_get_max_cpu_count
 
				 
			
 
				 class StatisticsAndSaveFile(object):
			
 
				 
			
 
				-    def __init__(self, pathsAndTable: PathsAndTable, trans_param: TransParam, statistics_map):
			
 
				+    def __init__(self, pathsAndTable: PathsAndTable, trans_param: TransParam, statistics_map:dict(),rated_power_map: dict()):
			
 
				         self.pathsAndTable = pathsAndTable
			
 
				         self.trans_param = trans_param
			
 
				         self.statistics_map = statistics_map
			
 
				         self.lock = multiprocessing.Manager().Lock()
			
 
				+        self.rated_power_map = rated_power_map
			
 
				 
			
 
				     def set_statistics_data(self, df):
			
 
				 
			
@@ -91,9 +93,11 @@ class StatisticsAndSaveFile(object):
 
				         # 添加年月日
			
 
				         trans_print(wind_col_name, "包含时间字段,开始处理时间字段,添加年月日", filename)
			
 
				         trans_print(wind_col_name, "时间原始大小:", df.shape[0])
			
 
				-        df = df[(df['time_stamp'].str.find('-') > 0) & (df['time_stamp'].str.find(':') > 0)]
			
 
				+        # df = df[(df['time_stamp'].str.find('-') > 0) & (df['time_stamp'].str.find(':') > 0)]
			
 
				+        # trans_print(wind_col_name, "去掉非法时间后大小:", df.shape[0])
			
 
				+        df['time_stamp'] = pd.to_datetime(df['time_stamp'], errors="coerce")
			
 
				+        df.dropna(subset=['time_stamp'], inplace=True)
			
 
				         trans_print(wind_col_name, "去掉非法时间后大小:", df.shape[0])
			
 
				-        df['time_stamp'] = pd.to_datetime(df['time_stamp'])
			
 
				         df['year'] = df['time_stamp'].dt.year
			
 
				         df['month'] = df['time_stamp'].dt.month
			
 
				         df['day'] = df['time_stamp'].dt.day
			
@@ -117,6 +121,10 @@ class StatisticsAndSaveFile(object):
 
				         trans_print(wind_col_name, "去掉重复数据前大小:", df.shape[0])
			
 
				         df.drop_duplicates(['wind_turbine_number', 'time_stamp'], keep='first', inplace=True)
			
 
				         trans_print(wind_col_name, "去掉重复数据后大小:", df.shape[0])
			
 
				+
			
 
				+        filter = ClassIdentifier(origin_df=df, rated_power=self.rated_power_map[str(wind_col_name)])
			
 
				+        df = filter.run()
			
 
				+
			
 
				         if self.pathsAndTable.save_zip:
			
 
				             save_path = os.path.join(self.pathsAndTable.get_save_path(), str(wind_col_name) + '.csv.gz')
			
 
				         else:
			
--- a/package.sh
+++ b/package.sh
@@ -0,0 +1,2 @@
 
				+pyinstaller -F -n etl_tool app_run.py
			
 
				+#python -m nuitka --onefile --remove-output app_run.py
			
--- a/service/plt_service.py
+++ b/service/plt_service.py
@@ -68,7 +68,7 @@ def update_trans_transfer_progress(batch_no, trans_type, transfer_progress=0, sa
 
				         exec_sql = """
			
 
				         update data_transfer set transfer_progress =%s where batch_code = %s  and transfer_type = %s
			
 
				         """
			
 
				-        plt.execute(exec_sql, (transfer_progress, batch_no, trans_type))
			
 
				+        plt.execute(exec_sql, (int(transfer_progress), batch_no, trans_type))
			
 
				 
			
 
				 
			
 
				 # 获取执行的数据
			
@@ -99,12 +99,14 @@ def get_exec_data(run_count: int = 1) -> dict:
 
				 
			
 
				 
			
 
				 def get_all_wind(field_code):
			
 
				-    query_sql = "select engine_code,engine_name from wind_engine_group where field_code = %s and del_state = 0"
			
 
				+    query_sql = "select engine_code,engine_name,rated_capacity from wind_engine_group where field_code = %s and del_state = 0"
			
 
				     dict_datas = plt.execute(query_sql, (field_code,))
			
 
				-    result = dict()
			
 
				+    wind_result = dict()
			
 
				+    power_result = dict()
			
 
				     for data in dict_datas:
			
 
				-        result[str(data['engine_name'])] = str(data['engine_code'])
			
 
				-    return result
			
 
				+        wind_result[str(data['engine_name'])] = str(data['engine_code'])
			
 
				+        power_result[str(data['engine_code'])] = float(data['rated_capacity'])
			
 
				+    return wind_result,power_result
			
 
				 
			
 
				 
			
 
				 def get_all_wind_company():
			
@@ -116,6 +118,14 @@ def get_all_wind_company():
 
				         return ['吉山风电场', '和风元宝山', '唐龙三期风电场', '密马风电场', '招远风电场', '昌平坳风场', '昌西一风电场', '虹梯官风电场', '长清风电场']
			
 
				 
			
 
				 
			
 
				+def get_base_wind_and_power(wind_turbine_number):
			
 
				+    query_sql = "SELECT rated_wind_speed,rated_capacity FROM wind_engine_group where engine_code = %s order by rated_wind_speed"
			
 
				+    dict_datas = plt.execute(query_sql, (wind_turbine_number,))
			
 
				+    if type(dict_datas) == tuple:
			
 
				+        return None
			
 
				+    return dict_datas
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				     print(get_exec_data(run_count=1))
			
 
				 
			
--- a/service/trans_service.py
+++ b/service/trans_service.py
@@ -25,6 +25,9 @@ def save_to_trans_conf(data_dict=dict()):
 
				     trans.save_dict(data_dict)
			
 
				 
			
 
				 
			
 
				+zhishu_list = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]
			
 
				+
			
 
				+
			
 
				 def creat_table_and_add_partition(table_name, count, read_type):
			
 
				     create_sql = f"""
			
 
				     CREATE TABLE
			
@@ -69,6 +72,7 @@ def creat_table_and_add_partition(table_name, count, read_type):
 
				         `wind_turbine_status` DOUBLE DEFAULT NULL COMMENT '风机状态1',
			
 
				         `wind_turbine_status2` DOUBLE DEFAULT NULL COMMENT '风机状态2',
			
 
				         `turbulence_intensity` DOUBLE DEFAULT NULL COMMENT '湍流强度',
			
 
				+        `lab` int DEFAULT NULL COMMENT '-1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电',
			
 
				         `year` INT (4) DEFAULT NULL COMMENT '年',
			
 
				         `month` INT (2) DEFAULT NULL COMMENT '月',
			
 
				         `day` INT (2) DEFAULT NULL COMMENT '日',
			
@@ -87,7 +91,13 @@ def creat_table_and_add_partition(table_name, count, read_type):
 
				     ) ENGINE = INNODB DEFAULT CHARSET = utf8mb4
			
 
				     """
			
 
				 
			
 
				-    if read_type == 'second':
			
 
				+    if read_type == 'second' and count > 1:
			
 
				+
			
 
				+        for zhishu in zhishu_list:
			
 
				+            if zhishu >= count:
			
 
				+                count = zhishu
			
 
				+                break
			
 
				+
			
 
				         create_sql = create_sql + f" PARTITION BY KEY (`wind_turbine_number`) PARTITIONS {count}"
			
 
				 
			
 
				     trans.execute(create_sql)
			
@@ -136,7 +146,15 @@ def batch_statistics(table_name):
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    path_prix = r"/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF063100040-WOB00013/second"
			
 
				-    files = ["WOG00030.csv", "WOG00034.csv"]
			
 
				-    for path in files:
			
 
				-        save_file_to_db("WOF063100040-WOB00013_second", path_prix + os.sep + path, batch_count=100000)
			
 
				+    # path_prix = r"/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF063100040-WOB00013/second"
			
 
				+    # files = ["WOG00030.csv", "WOG00034.csv"]
			
 
				+    # for path in files:
			
 
				+    #     save_file_to_db("WOF063100040-WOB00013_second", path_prix + os.sep + path, batch_count=100000)
			
 
				+
			
 
				+    count = 13
			
 
				+    if count > 1:
			
 
				+        for i in zhishu_list:
			
 
				+            if i >= count:
			
 
				+                count = i
			
 
				+                break
			
 
				+    print(count)
			
--- a/test_app_run.py
+++ b/test_app_run.py
@@ -0,0 +1,142 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Time    : 2024/6/11
			
 
				+# @Author  : 魏志亮
			
 
				+import os
			
 
				+import sys
			
 
				+import traceback
			
 
				+
			
 
				+
			
 
				+def run_schedule(step=0, end=4, run_count=1):
			
 
				+    # 更新超时任务
			
 
				+    update_timeout_trans_data()
			
 
				+
			
 
				+    data = get_exec_data(run_count)
			
 
				+    if data is None:
			
 
				+        trans_print("当前有任务在执行")
			
 
				+    elif len(data.keys()) == 0:
			
 
				+        trans_print("当前无任务")
			
 
				+    else:
			
 
				+        batch_no = data['batch_code']
			
 
				+        batch_name = data['batch_name']
			
 
				+        transfer_type = data['transfer_type']
			
 
				+        transfer_file_addr = data['transfer_addr']
			
 
				+        field_code = data['field_code']
			
 
				+        field_name = data['field_name']
			
 
				+
			
 
				+        __exec_trans(step, end, batch_no, batch_name, transfer_type, transfer_file_addr, field_name, field_code,
			
 
				+                     save_db=True)
			
 
				+
			
 
				+
			
 
				+def run_local(step=0, end=3, batch_no=None, batch_name='', transfer_type=None, transfer_file_addr=None, field_name=None,
			
 
				+              field_code="测试", save_db=False):
			
 
				+    if batch_no is None or str(batch_no).strip() == '':
			
 
				+        return "批次编号不能为空"
			
 
				+
			
 
				+    if transfer_type not in ['second', 'minute', 'second_1']:
			
 
				+        return "查询类型错误"
			
 
				+
			
 
				+    if transfer_file_addr is None or str(transfer_file_addr).strip() == '':
			
 
				+        return "文件路径不能为空"
			
 
				+
			
 
				+    __exec_trans(step, end, batch_no, batch_name, transfer_type, transfer_file_addr, field_name, field_code,
			
 
				+                 save_db=save_db)
			
 
				+
			
 
				+
			
 
				+def __exec_trans(step, end, batch_no, batch_name, transfer_type, transfer_file_addr=None, field_name=None,
			
 
				+                 field_code="测试",
			
 
				+                 save_db=False):
			
 
				+    trance_id = '-'.join([batch_no, field_name, transfer_type])
			
 
				+    set_trance_id(trance_id)
			
 
				+    conf_map = get_trans_conf(field_code, field_name, transfer_type)
			
 
				+    if conf_map is None or type(conf_map) == tuple or len(conf_map.keys()) == 0:
			
 
				+        message = f"未找到{field_name}的{transfer_type}配置"
			
 
				+        trans_print(message)
			
 
				+        update_trans_status_error(batch_no, transfer_type, message, save_db)
			
 
				+    else:
			
 
				+
			
 
				+        resolve_col_prefix = read_conf(conf_map, 'resolve_col_prefix')
			
 
				+        wind_name_exec = read_conf(conf_map, 'wind_name_exec', None)
			
 
				+        is_vertical_table = read_conf(conf_map, 'is_vertical_table', False)
			
 
				+        merge_columns = read_conf(conf_map, 'merge_columns', False)
			
 
				+
			
 
				+        vertical_cols = read_conf(conf_map, 'vertical_read_cols', '').split(',')
			
 
				+        index_cols = read_conf(conf_map, 'vertical_index_cols', '').split(',')
			
 
				+        vertical_key = read_conf(conf_map, 'vertical_col_key')
			
 
				+        vertical_value = read_conf(conf_map, 'vertical_col_value')
			
 
				+        need_valid_cols = not merge_columns
			
 
				+
			
 
				+        begin_header = read_conf(conf_map, 'begin_header', 0)
			
 
				+
			
 
				+        cols_trans_all = dict()
			
 
				+        trans_cols = ['wind_turbine_number', 'time_stamp', 'active_power', 'rotor_speed', 'generator_speed',
			
 
				+                      'wind_velocity', 'pitch_angle_blade_1', 'pitch_angle_blade_2', 'pitch_angle_blade_3',
			
 
				+                      'cabin_position', 'true_wind_direction', 'yaw_error1', 'set_value_of_active_power',
			
 
				+                      'gearbox_oil_temperature', 'generatordrive_end_bearing_temperature',
			
 
				+                      'generatornon_drive_end_bearing_temperature', 'wind_turbine_status',
			
 
				+                      'wind_turbine_status2',
			
 
				+                      'cabin_temperature', 'twisted_cable_angle', 'front_back_vibration_of_the_cabin',
			
 
				+                      'side_to_side_vibration_of_the_cabin', 'actual_torque', 'given_torque',
			
 
				+                      'clockwise_yaw_count',
			
 
				+                      'counterclockwise_yaw_count', 'unusable', 'power_curve_available',
			
 
				+                      'required_gearbox_speed',
			
 
				+                      'inverter_speed_master_control', 'outside_cabin_temperature', 'main_bearing_temperature',
			
 
				+                      'gearbox_high_speed_shaft_bearing_temperature',
			
 
				+                      'gearboxmedium_speed_shaftbearing_temperature',
			
 
				+                      'gearbox_low_speed_shaft_bearing_temperature', 'generator_winding1_temperature',
			
 
				+                      'generator_winding2_temperature', 'generator_winding3_temperature',
			
 
				+                      'turbulence_intensity', 'param1',
			
 
				+                      'param2', 'param3', 'param4', 'param5', 'param6', 'param7', 'param8', 'param9', 'param10']
			
 
				+
			
 
				+        for col in trans_cols:
			
 
				+            cols_trans_all[col] = read_conf(conf_map, col, '')
			
 
				+
			
 
				+        params = TransParam(read_type=transfer_type, read_path=transfer_file_addr,
			
 
				+                            cols_tran=cols_trans_all,
			
 
				+                            wind_name_exec=wind_name_exec, is_vertical_table=is_vertical_table,
			
 
				+                            vertical_cols=vertical_cols, vertical_key=vertical_key,
			
 
				+                            vertical_value=vertical_value, index_cols=index_cols, merge_columns=merge_columns,
			
 
				+                            resolve_col_prefix=resolve_col_prefix, need_valid_cols=need_valid_cols)
			
 
				+
			
 
				+        try:
			
 
				+            trans_subject = WindFarms(batch_no=batch_no, batch_name=batch_name, field_code=field_code,
			
 
				+                                      field_name=field_name,
			
 
				+                                      save_db=save_db,
			
 
				+                                      header=begin_header, trans_param=params)
			
 
				+            trans_subject.run(step=step, end=end)
			
 
				+        except Exception as e:
			
 
				+            trans_print(traceback.format_exc())
			
 
				+            message = "系统返回错误:" + str(e)
			
 
				+            update_trans_status_error(batch_no, transfer_type, message, save_db)
			
 
				+        finally:
			
 
				+            set_trance_id("")
			
 
				+            # trans_subject.pathsAndTable.delete_tmp_files()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    env = None
			
 
				+    if len(sys.argv) >= 2:
			
 
				+        env = sys.argv[1]
			
 
				+    else:
			
 
				+        env = 'prod'
			
 
				+    print(sys.argv)
			
 
				+    if env is None:
			
 
				+        raise Exception("请配置运行环境")
			
 
				+
			
 
				+    os.environ['env'] = env
			
 
				+
			
 
				+    run_count = 1
			
 
				+    if len(sys.argv) >= 3:
			
 
				+        run_count = int(sys.argv[2])
			
 
				+
			
 
				+    from utils.log.trans_log import trans_print, set_trance_id
			
 
				+    from etl.base.TransParam import TransParam
			
 
				+    from etl.base.WindFarms import WindFarms
			
 
				+    from service.plt_service import get_exec_data, update_trans_status_error, update_timeout_trans_data
			
 
				+    from service.trans_service import get_trans_conf
			
 
				+    from utils.conf.read_conf import read_conf
			
 
				+
			
 
				+    # run_schedule(run_count=run_count)
			
 
				+
			
 
				+    run_local(3, 3, batch_no='WOF035200003-WOB000005111', batch_name='MM14号机组0719', transfer_type='minute',
			
 
				+              transfer_file_addr=r'D:\trans_data\和风元宝山\收资数据\min', field_name='和风元宝山',
			
 
				+              field_code="WOF039800012", save_db=False)
			
--- a/tmp_file/ClassIdentifier_0.py
+++ b/tmp_file/ClassIdentifier_0.py
@@ -0,0 +1,756 @@
 
				+import numpy as np
			
 
				+from pandas import DataFrame
			
 
				+
			
 
				+from service.plt_service import get_base_wind_and_power
			
 
				+from utils.file.trans_methods import read_file_to_df
			
 
				+
			
 
				+
			
 
				+class ClassIdentifier(object):
			
 
				+
			
 
				+    def __init__(self, wind_turbine_number, file_path: str = None, origin_df: DataFrame = None, index='time_stamp',
			
 
				+                 wind_velocity='wind_velocity',
			
 
				+                 active_power='active_power'):
			
 
				+        """
			
 
				+        :param wind_turbine_number: The wind turbine number.
			
 
				+        :param file_path: The file path of the input data.
			
 
				+        :param origin_df: The pandas DataFrame containing the input data.
			
 
				+        :param index: 索引字段
			
 
				+        :param wind_velocity: 风速字段
			
 
				+        :param active_power: 有功功率字段
			
 
				+        """
			
 
				+        self.wind_turbine_number = wind_turbine_number
			
 
				+        self.index = index
			
 
				+        self.wind_velocity = wind_velocity
			
 
				+        self.active_power = active_power
			
 
				+
			
 
				+        self.rated_wind_speed = 'rated_wind_speed'
			
 
				+        self.rated_capacity = 'rated_capacity'
			
 
				+
			
 
				+        if file_path is None and origin_df is None:
			
 
				+            raise ValueError("Either file_path or origin_df should be provided.")
			
 
				+
			
 
				+        if file_path:
			
 
				+            self.df = read_file_to_df(file_path)
			
 
				+        else:
			
 
				+            self.df = origin_df
			
 
				+
			
 
				+        self.df = self.df.set_index(keys=self.index)
			
 
				+
			
 
				+    def identifier(self):
			
 
				+        # 风速 和 有功功率 df
			
 
				+        wind_and_power_df = self.df[[self.wind_velocity, self.active_power]]
			
 
				+        wind_and_power_df.reset_index(inplace=True)
			
 
				+        wind_and_power_df_count = wind_and_power_df.shape[0]
			
 
				+        PowerMax = wind_and_power_df[self.active_power].max()
			
 
				+        PowerRated = np.ceil(PowerMax / 100) * 100
			
 
				+        PRated = 1500  # 额定功率1500kw,可改为2000kw
			
 
				+        VCutOut = 25
			
 
				+        VCutIn = 3
			
 
				+        VRated = 10
			
 
				+        # 网格法确定风速风向分区数量，功率方向分区数量，
			
 
				+        # PNum = (PRated+100)/25  #功率分区间隔25kW
			
 
				+        PNum = int(np.ceil(PowerRated / 25))  # 功率分区间隔25kW
			
 
				+        VNum = int(np.ceil(VCutOut / 0.25))  # 风速分区间隔0.25m/s
			
 
				+
			
 
				+        # 实发电量
			
 
				+        EPActualTotal = 0  # 实发电量
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.active_power] >= 0:
			
 
				+                EPActualTotal = EPActualTotal + wind_and_power_df.loc[i, self.active_power] / 6
			
 
				+
			
 
				+        print("EPActualTotal", EPActualTotal)
			
 
				+        # 平均风速
			
 
				+        WindSpeedAvr = 0
			
 
				+        WindSum = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.wind_velocity] >= 0:
			
 
				+                WindSum = WindSum + wind_and_power_df.loc[i, self.wind_velocity]
			
 
				+        WindSpeedAvr = WindSum / wind_and_power_df_count
			
 
				+        print("windSpeedAvr", WindSpeedAvr)
			
 
				+        # 用于计算损失电量的标杆功率曲线，可更换为风机设计功率曲线
			
 
				+        # base_wind_and_power_df = get_base_wind_and_power(self.wind_turbine_number)
			
 
				+        base_wind_and_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\好点坏点matlib计算\A型风机设计功率曲线.csv", header=None)
			
 
				+        base_wind_and_power_df.columns = [self.rated_wind_speed, self.rated_capacity]
			
 
				+        if base_wind_and_power_df.empty:
			
 
				+            raise ValueError("风场编号:" + self.wind_turbine_number + "未查询到风速功率信息")
			
 
				+        base_wind_and_power_count = base_wind_and_power_df.shape[0]
			
 
				+
			
 
				+        # 风机可利用率，计算方法：大于切入风速但发电功率小于0
			
 
				+        TurbineRunRate = 0
			
 
				+        nShouldGP = 0
			
 
				+        nRealGP = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.wind_velocity] >= VCutIn:
			
 
				+                nShouldGP = nShouldGP + 1
			
 
				+                if wind_and_power_df.loc[i, self.active_power] > 0:
			
 
				+                    nRealGP = nRealGP + 1
			
 
				+        if nShouldGP > 0:
			
 
				+            TurbineRunRate = nRealGP / nShouldGP * 100
			
 
				+
			
 
				+        print("disp(TurbineRunRate)", TurbineRunRate)
			
 
				+        # 理论电量-
			
 
				+        EPIdealTotalAAA = 0  # 理论电量-
			
 
				+        nWhichBin = 0
			
 
				+        IdealPower = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            # 应发电量-理论
			
 
				+            nWhichBin = 0
			
 
				+            for m in range(base_wind_and_power_count - 1):
			
 
				+                if base_wind_and_power_df.loc[m, self.rated_wind_speed] < wind_and_power_df.loc[
			
 
				+                    i, self.wind_velocity] <= \
			
 
				+                        base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
			
 
				+                    nWhichBin = m
			
 
				+                    break
			
 
				+
			
 
				+            # 插值计算对应设计功率
			
 
				+            if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
			
 
				+                continue
			
 
				+
			
 
				+            IdealPower = (wind_and_power_df.loc[i, self.wind_velocity] - base_wind_and_power_df.loc[nWhichBin,
			
 
				+                                                                                                    self.rated_wind_speed]) / (
			
 
				+                                 base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
			
 
				+                                 base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) * (
			
 
				+                                 base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
			
 
				+                                 base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) \
			
 
				+                         + base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
			
 
				+            EPIdealTotalAAA = EPIdealTotalAAA + IdealPower / 6
			
 
				+
			
 
				+        print('EPIdealTotalAAA', EPIdealTotalAAA)
			
 
				+        #
			
 
				+        # 存储功率大于零的运行数据
			
 
				+        DzMarch809 = np.zeros([wind_and_power_df_count, 2], dtype=float)
			
 
				+        nCounter1 = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.active_power] > 0:
			
 
				+                DzMarch809[nCounter1, 0] = wind_and_power_df.loc[i, self.wind_velocity]
			
 
				+                DzMarch809[nCounter1, 1] = wind_and_power_df.loc[i, self.active_power]
			
 
				+
			
 
				+                nCounter1 = nCounter1 + 1
			
 
				+
			
 
				+        print('nCounter1', nCounter1)
			
 
				+
			
 
				+        # 统计各网格落入的散点个数
			
 
				+        XBoxNumber = np.ones([PNum, VNum], dtype=int)
			
 
				+        nWhichP = -1
			
 
				+        nWhichV = -1
			
 
				+        for i in range(nCounter1):
			
 
				+            for m in range(PNum):
			
 
				+                if m * 25 < DzMarch809[i, 1] <= (m + 1) * 25:
			
 
				+                    nWhichP = m
			
 
				+                    break
			
 
				+            for n in range(VNum):
			
 
				+                if ((n + 1) * 0.25 - 0.125) < DzMarch809[i, 0] <= ((n + 1) * 0.25 + 0.125):
			
 
				+                    nWhichV = n
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichP > -1 and nWhichV > -1:
			
 
				+                XBoxNumber[nWhichP, nWhichV] = XBoxNumber[nWhichP, nWhichV] + 1
			
 
				+
			
 
				+        for m in range(PNum):
			
 
				+            for n in range(VNum):
			
 
				+                XBoxNumber[m, n] = XBoxNumber[m, n] - 1
			
 
				+
			
 
				+        print('XBoxNumber', XBoxNumber)
			
 
				+        # 在功率方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        PBoxPercent = np.zeros([PNum, VNum], dtype=float)
			
 
				+        PBinSum = np.zeros(PNum, dtype=int)
			
 
				+
			
 
				+        for i in range(PNum):
			
 
				+            for m in range(VNum):
			
 
				+                PBinSum[i] = PBinSum[i] + XBoxNumber[i, m]
			
 
				+
			
 
				+            for m in range(VNum):
			
 
				+                if PBinSum[i] > 0:
			
 
				+                    PBoxPercent[i, m] = XBoxNumber[i, m] / PBinSum[i] * 100
			
 
				+
			
 
				+        # 在风速方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        VBoxPercent = np.zeros([PNum, VNum], dtype=float)
			
 
				+        VBinSum = np.zeros(VNum, dtype=int)
			
 
				+
			
 
				+        for i in range(VNum):
			
 
				+            for m in range(PNum):
			
 
				+                VBinSum[i] = VBinSum[i] + XBoxNumber[m, i]
			
 
				+
			
 
				+            for m in range(PNum):
			
 
				+                if VBinSum[i] > 0:
			
 
				+                    VBoxPercent[m, i] = XBoxNumber[m, i] / VBinSum[i] * 100
			
 
				+
			
 
				+        # 以水平功率带方向为准，分析每个水平功率带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        PBoxMaxIndex = np.zeros(PNum, dtype=int)  # 水平功率带最大网格位置索引
			
 
				+        PBoxMaxP = np.zeros(PNum, dtype=int)  # 水平功率带最大网格百分比
			
 
				+
			
 
				+        for m in range(PNum):
			
 
				+            # 确定每一水平功率带的最大网格位置索引即百分比值
			
 
				+            PBoxMaxP[m], PBoxMaxIndex[m] = PBoxPercent[m, :].max(), PBoxPercent[m, :].argmax()
			
 
				+
			
 
				+        # 以垂直风速方向为准，分析每个垂直风速带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        VBoxMaxIndex = np.zeros(VNum, dtype=int)
			
 
				+        VBoxMaxV = np.zeros(VNum, dtype=int)
			
 
				+
			
 
				+        for m in range(VNum):
			
 
				+            [VBoxMaxV[m], VBoxMaxIndex[m]] = VBoxPercent[:, m].max(), VBoxPercent[:, m].argmax()
			
 
				+
			
 
				+        # 切入风速特殊处理，如果切入风速过于偏右，向左拉回
			
 
				+        if PBoxMaxIndex[0] > 14:
			
 
				+            PBoxMaxIndex[0] = 9
			
 
				+
			
 
				+        # 以水平功率带方向为基准，进行分析
			
 
				+        DotDense = np.zeros(PNum, dtype=int)  # 每一水平功率带的功率主带包含的网格数
			
 
				+        DotDenseLeftRight = np.zeros([PNum, 2], dtype=int)  # 存储每一水平功率带的功率主带以最大网格为中心，向向左，向右扩展的网格数
			
 
				+        DotValve = 90  # 从中心向左右对称扩展网格的散点百分比和的阈值。
			
 
				+        PDotDenseSum = 0
			
 
				+
			
 
				+        iSpreadLeft = 1  # 向左扩展网格计数，初值为1
			
 
				+        iSpreadRight = 1  # 向右扩展网格技术，初值为1
			
 
				+        for i in range(PNum - 6):  # 从最下层水平功率带1开始，向上到第PNum-6个水平功率带（额定功率一下水平功率带），逐一分析
			
 
				+            PDotDenseSum = PBoxMaxP[i]  # 以中心最大水平功率带为基准，向左向右对称扩展网格，累加各网格散点百分比
			
 
				+            iSpreadRight = 1
			
 
				+            iSpreadLeft = 1
			
 
				+            while PDotDenseSum < DotValve:
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] + iSpreadRight) < VNum - 1:
			
 
				+                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]  # 向右侧扩展
			
 
				+                    iSpreadRight = iSpreadRight + 1
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] + iSpreadRight) > VNum - 1:
			
 
				+                    break
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] - iSpreadLeft) > 0:
			
 
				+                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]  # 向左侧扩展
			
 
				+                    iSpreadLeft = iSpreadLeft + 1
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] - iSpreadLeft) <= 0:
			
 
				+                    break
			
 
				+
			
 
				+            iSpreadRight = iSpreadRight - 1
			
 
				+
			
 
				+            iSpreadLeft = iSpreadLeft - 1
			
 
				+            # 向左右对称扩展完毕
			
 
				+
			
 
				+            DotDenseLeftRight[i, 0] = iSpreadLeft
			
 
				+            DotDenseLeftRight[i, 1] = iSpreadRight
			
 
				+            DotDense[i] = iSpreadLeft + iSpreadRight + 1
			
 
				+
			
 
				+        # 各行功率主带右侧宽度的中位数最具有代表性
			
 
				+        DotDenseWidthLeft = np.zeros([PNum - 6, 1], dtype=int)
			
 
				+        for i in range(PNum - 6):
			
 
				+            DotDenseWidthLeft[i] = DotDenseLeftRight[i, 1]
			
 
				+
			
 
				+        MainBandRight = np.median(DotDenseWidthLeft)
			
 
				+
			
 
				+        # 散点向右显著延展分布的水平功率带为限功率水平带
			
 
				+        PowerLimit = np.zeros([PNum, 1], dtype=int)  # 各水平功率带是否为限功率标识，==1：是；==0：不是
			
 
				+        WidthAverage = 0  # 功率主带平均宽度
			
 
				+        WidthVar = 0  # 功率主带方差
			
 
				+        # PowerLimitValve = 6    #限功率主带判别阈值
			
 
				+        PowerLimitValve = np.ceil(MainBandRight) + 3  # 限功率主带判别阈值
			
 
				+
			
 
				+        nCounterLimit = 0
			
 
				+        nCounter = 0
			
 
				+
			
 
				+        for i in range(PNum - 6):
			
 
				+            if DotDenseLeftRight[i, 1] > PowerLimitValve and PBinSum[i] > 20:  # 如果向右扩展网格数大于阈值，且该水平功率带点总数>20，是
			
 
				+                PowerLimit[i] = 1
			
 
				+                nCounterLimit = nCounterLimit + 1
			
 
				+
			
 
				+            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
			
 
				+                WidthAverage = WidthAverage + DotDenseLeftRight[i, 1]  # 统计正常水平功率带右侧宽度
			
 
				+                nCounter = nCounter + 1
			
 
				+
			
 
				+        WidthAverage = WidthAverage / nCounter  # 功率主带平均宽度
			
 
				+
			
 
				+        print("WidthAverage", WidthAverage)
			
 
				+
			
 
				+        # 各水平功率带的功率主带宽度的方差，反映从下到上宽度是否一致，或是否下宽上窄等异常情况
			
 
				+        for i in range(PNum - 6):
			
 
				+            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
			
 
				+                WidthVar = WidthVar + (DotDenseLeftRight[i, 1] - WidthAverage) * (
			
 
				+                        DotDenseLeftRight[i, 1] - WidthAverage)
			
 
				+
			
 
				+        WidthVar = np.sqrt(WidthVar / nCounter)
			
 
				+
			
 
				+        # 各水平功率带，功率主带的风速范围，右侧扩展网格数*2*0.25
			
 
				+        PowerBandWidth = WidthAverage * 2 * 0.25
			
 
				+
			
 
				+        # 对限负荷水平功率带的最大网格较下面相邻层显著偏右，拉回
			
 
				+        for i in range(1, PNum - 6):
			
 
				+            if PowerLimit[i] == 1 and abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5:
			
 
				+                PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1
			
 
				+
			
 
				+        # 输出各层功率主带的左右边界网格索引
			
 
				+        DotDenseInverse = np.zeros([PNum, 2], dtype=int)
			
 
				+
			
 
				+        for i in range(PNum):
			
 
				+            DotDenseInverse[i, :] = DotDenseLeftRight[PNum - i - 1, :]
			
 
				+
			
 
				+        # print('DotDenseInverse', DotDenseInverse)
			
 
				+
			
 
				+        # 功率主带的右边界
			
 
				+        CurveWidthR = int(np.ceil(WidthAverage) + 2)
			
 
				+
			
 
				+        # CurveWidthL = 6    #功率主带的左边界
			
 
				+        CurveWidthL = CurveWidthR
			
 
				+
			
 
				+        BBoxLimit = np.zeros([PNum, VNum], dtype=int)  # 网格是否为限功率网格的标识，如果为限功率水平功率带，从功率主带右侧边缘向右的网格为限功率网格
			
 
				+        for i in range(2, PNum - 6):
			
 
				+            if PowerLimit[i] == 1:
			
 
				+                for j in range(PBoxMaxIndex[i] + CurveWidthR, VNum):
			
 
				+                    BBoxLimit[i, j] = 1
			
 
				+
			
 
				+        BBoxRemove = np.zeros([PNum, VNum], dtype=int)  # 数据异常需要剔除的网格标识，标识==1：功率主带右侧的欠发网格；==2：功率主带左侧的超发网格
			
 
				+        for m in range(PNum - 6):
			
 
				+            for n in range(PBoxMaxIndex[m] + CurveWidthR - 1, VNum):
			
 
				+                BBoxRemove[m, n] = 1
			
 
				+
			
 
				+            for n in range(PBoxMaxIndex[m] - CurveWidthL - 1, 0, -1):
			
 
				+                BBoxRemove[m, n] = 2
			
 
				+
			
 
				+        # 确定功率主带的左上拐点，即额定风速位置的网格索引
			
 
				+        CurveTop = np.zeros(2, dtype=int)
			
 
				+        CurveTopValve = 3  # 网格的百分比阈值
			
 
				+        BTopFind = 0
			
 
				+        for m in range(PNum - 4 - 1, 0, -1):
			
 
				+            for n in range(VNum):
			
 
				+                if VBoxPercent[m, n] > CurveTopValve and XBoxNumber[m, n] >= 10:  # 如左上角网格的百分比和散点个数大于阈值。
			
 
				+                    CurveTop[0] = m
			
 
				+                    CurveTop[1] = n
			
 
				+                    BTopFind = 1
			
 
				+                    break
			
 
				+
			
 
				+            if BTopFind == 1:
			
 
				+                break
			
 
				+
			
 
				+        IsolateValve = 3
			
 
				+        for m in range(PNum - 6):
			
 
				+            for n in range(PBoxMaxIndex[m] + CurveWidthR - 1, VNum):
			
 
				+                if PBoxPercent[m, n] < IsolateValve:
			
 
				+                    BBoxRemove[m, n] = 1
			
 
				+
			
 
				+        # 功率主带顶部宽度
			
 
				+        CurveWidthT = 2
			
 
				+        for m in range(PNum - CurveWidthT - 1, PNum):
			
 
				+            for n in range(VNum):
			
 
				+                BBoxRemove[m, n] = 3  # 网格为额定功率以上的超发点
			
 
				+
			
 
				+        # 功率主带拐点左侧的欠发网格标识
			
 
				+        for m in range(PNum - 5 - 1, PNum):
			
 
				+            for n in range(CurveTop[1] - 2 - 1):
			
 
				+                BBoxRemove[m, n] = 2
			
 
				+
			
 
				+        # 以网格的标识，决定该网格内数据的标识。Dzwind_and_power_dfSel功率非零数据的标识位。散点在哪个网格，此网格的标识即为该点的标识
			
 
				+        Dzwind_and_power_dfSel = np.zeros(nCounter1, dtype=int)  # is ==1,欠发功率点；==2，超发功率点；==3，额定风速以上的超发功率点 ==4, 限电
			
 
				+        nWhichP = 0
			
 
				+        nWhichV = 0
			
 
				+        nBadA = 0
			
 
				+
			
 
				+        for i in range(nCounter1):
			
 
				+            for m in range(PNum):
			
 
				+                if DzMarch809[i, 1] > (m - 1) * 25 and DzMarch809[i, 1] <= m * 25:
			
 
				+                    nWhichP = m
			
 
				+                    break
			
 
				+
			
 
				+            for n in range(VNum):
			
 
				+                if DzMarch809[i, 0] > (n * 0.25 - 0.125) and DzMarch809[i, 0] <= (n * 0.25 + 0.125):
			
 
				+                    nWhichV = n
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichP > 0 and nWhichV > 0:
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 1:
			
 
				+                    Dzwind_and_power_dfSel[i] = 1
			
 
				+                    nBadA = nBadA + 1
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 2:
			
 
				+                    Dzwind_and_power_dfSel[i] = 2
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 3:
			
 
				+                    Dzwind_and_power_dfSel[i] = 0  # 3  # 额定风速以上的超发功率点认为是正常点，不再标识。
			
 
				+
			
 
				+                if BBoxLimit[nWhichP, nWhichV] == 1 and nWhichP>16:
			
 
				+                    Dzwind_and_power_dfSel[i] = 4
			
 
				+
			
 
				+        print("nWhichP", nWhichP)
			
 
				+        print("nWhichV", nWhichV)
			
 
				+        print("nBadA", nBadA)
			
 
				+
			
 
				+        # 限负荷数据标识方法2：把数据切割为若干个窗口。对每一窗口，以第一个点为基准，连续nWindowLength个数据的功率在方差范围内，呈现显著水平分布的点
			
 
				+        PVLimit = np.zeros([nCounter1, 2], dtype=int)  # 存储限负荷数据
			
 
				+        nLimitTotal = 0
			
 
				+        nWindowLength = 3
			
 
				+        LimitWindow = np.zeros(nWindowLength, dtype=int)
			
 
				+        UpLimit = 0  # 上限
			
 
				+        LowLimit = 0  # 下限
			
 
				+        PowerStd = 15  # 功率波动方差
			
 
				+        bAllInUpLow = 1  # ==1:窗口内所有数据均在方差上下限之内，限负荷==0,不满足条件
			
 
				+        bAllInAreas = 1  # ==1：窗口所有数据均在200~PRated-300kW范围内；==0：不满足此条件
			
 
				+        nWindowNum = int(np.floor(nCounter1 / nWindowLength))
			
 
				+        PowerLimitUp = PRated - 300
			
 
				+        PowerLimitLow = 200
			
 
				+        for i in range(nWindowNum):
			
 
				+            for j in range(nWindowLength):
			
 
				+                LimitWindow[j] = DzMarch809[i * nWindowLength + j, 1]
			
 
				+
			
 
				+            bAllInAreas = 1
			
 
				+            for j in range(nWindowLength):
			
 
				+                if LimitWindow[j] < PowerLimitLow or LimitWindow[j] > PowerLimitUp:
			
 
				+                    bAllInAreas = 0
			
 
				+
			
 
				+            if bAllInAreas == 0:
			
 
				+                continue
			
 
				+
			
 
				+            UpLimit = LimitWindow[0] + PowerStd
			
 
				+            LowLimit = LimitWindow[0] - PowerStd
			
 
				+            bAllInUpLow = 1
			
 
				+            for j in range(1, nWindowLength):
			
 
				+                if LimitWindow[j] < LowLimit or LimitWindow[j] > UpLimit:
			
 
				+                    bAllInUpLow = 0
			
 
				+
			
 
				+            if bAllInUpLow == 1:
			
 
				+                for j in range(nWindowLength):
			
 
				+                    Dzwind_and_power_dfSel[i * nWindowLength + j] = 4  # 标识窗口内的数据为限负荷数据
			
 
				+
			
 
				+                for j in range(nWindowLength):
			
 
				+                    PVLimit[nLimitTotal, :] = DzMarch809[i * nWindowLength + j, :]
			
 
				+                    nLimitTotal = nLimitTotal + 1
			
 
				+
			
 
				+        print("nLimitTotal", nLimitTotal)
			
 
				+
			
 
				+        # 相邻水平功率主带的锯齿平滑
			
 
				+        PVLeftDown = np.zeros(2, dtype=int)
			
 
				+        PVRightUp = np.zeros(2, dtype=int)
			
 
				+        nSmooth = 0
			
 
				+        for i in range(PNum - 6 - 1):
			
 
				+            PVLeftDown = np.zeros(2, dtype=int)
			
 
				+            PVRightUp = np.zeros(2, dtype=int)
			
 
				+
			
 
				+            if (PBoxMaxIndex[i + 1] - PBoxMaxIndex[i]) >= 1:
			
 
				+                PVLeftDown[0] = (PBoxMaxIndex[i] + CurveWidthR) * 0.25 - 0.125
			
 
				+                PVLeftDown[1] = (i - 1) * 25
			
 
				+
			
 
				+                PVRightUp[0] = (PBoxMaxIndex[i + 1] + CurveWidthR) * 0.25 - 0.125
			
 
				+                PVRightUp[1] = (i + 1 - 1) * 25
			
 
				+
			
 
				+                for m in range(nCounter1):
			
 
				+                    if DzMarch809[m, 0] > PVLeftDown[0] and DzMarch809[m, 0] < PVRightUp[0] and PVLeftDown[1] < \
			
 
				+                            DzMarch809[m, 1] < PVRightUp[1]:  # 在该锯齿中
			
 
				+                        if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (
			
 
				+                                PVRightUp[1] - PVLeftDown[1]) / (
			
 
				+                                PVRightUp[0] - PVLeftDown[0]):  # 斜率大于对角连线，则在锯齿左上三角形中，选中
			
 
				+                            Dzwind_and_power_dfSel[m] = 0
			
 
				+                            nSmooth = nSmooth + 1
			
 
				+
			
 
				+        print("nSmooth", nSmooth)
			
 
				+
			
 
				+        # 存储好点
			
 
				+        nCounterPV = 0
			
 
				+        PVDot = np.zeros([nCounter1, 2], dtype=int)
			
 
				+        for i in range(nCounter1):
			
 
				+            if Dzwind_and_power_dfSel[i] == 0:
			
 
				+                PVDot[nCounterPV, :] = DzMarch809[i, :]
			
 
				+                nCounterPV = nCounterPV + 1
			
 
				+
			
 
				+        nCounterVP = nCounterPV
			
 
				+        print("nCounterVP", nCounterVP)
			
 
				+
			
 
				+        # 存储坏点
			
 
				+        nCounterBad = 0
			
 
				+        PVBad = np.zeros([nCounter1, 2], dtype=int)
			
 
				+        for i in range(nCounter1):
			
 
				+            if Dzwind_and_power_dfSel[i] == 1 or Dzwind_and_power_dfSel[i] == 2 or Dzwind_and_power_dfSel[i] == 3:
			
 
				+                PVBad[nCounterBad, :] = DzMarch809[i, :]
			
 
				+                nCounterBad = nCounterBad + 1
			
 
				+
			
 
				+        print("nCounterBad", nCounterBad)
			
 
				+
			
 
				+        # 用功率主带中的好点绘制实测功率曲
			
 
				+        XBinNumber = np.ones(50, dtype=int)
			
 
				+        PCurve = np.zeros([50, 2], dtype=int)
			
 
				+        PCurve[:, 0] = [i / 2 for i in range(1, 51)]
			
 
				+        XBinSum = np.zeros([50, 2], dtype=int)
			
 
				+        nWhichBin = 0
			
 
				+
			
 
				+        for i in range(nCounterVP):
			
 
				+            nWhichBin = 0
			
 
				+
			
 
				+            for b in range(50):
			
 
				+                if PVDot[i, 0] > (b * 0.5 - 0.25) and PVDot[i, 0] <= (b * 0.5 + 0.25):
			
 
				+                    nWhichBin = b
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichBin > 0:
			
 
				+                XBinSum[nWhichBin, 0] = XBinSum[nWhichBin, 0] + PVDot[i, 0]  # wind speed
			
 
				+                XBinSum[nWhichBin, 1] = XBinSum[nWhichBin, 1] + PVDot[i, 1]  # Power
			
 
				+                XBinNumber[nWhichBin] = XBinNumber[nWhichBin] + 1
			
 
				+
			
 
				+        for b in range(50):
			
 
				+            XBinNumber[b] = XBinNumber[b] - 1
			
 
				+
			
 
				+        for b in range(50):
			
 
				+            if XBinNumber[b] > 0:
			
 
				+                PCurve[b, 0] = XBinSum[b, 0] / XBinNumber[b]
			
 
				+                PCurve[b, 1] = XBinSum[b, 1] / XBinNumber[b]
			
 
				+
			
 
				+        # 对额定风速以上的功率直接赋额定功率
			
 
				+        VRatedNum = int(VRated / 0.5)
			
 
				+        for m in range(VRatedNum, 50):
			
 
				+            if PCurve[m, 1] == 0:
			
 
				+                PCurve[m, 1] = PRated
			
 
				+
			
 
				+        # print("PCurve", PCurve)
			
 
				+
			
 
				+        # 绘制标准正则功率曲线，以0.5m/s标准为间隔
			
 
				+        # 15m/s以上为额定功率，15m/s以下为计算得到
			
 
				+        PCurveNorm = np.zeros([50, 2], dtype=int)
			
 
				+        for i in range(30, 50):
			
 
				+            PCurveNorm[i, 0] = i * 0.5
			
 
				+            PCurveNorm[i, 1] = PRated
			
 
				+
			
 
				+        # 15m/s一下正则功率曲线
			
 
				+        CurveData = np.zeros([30, 2], dtype=int)
			
 
				+        for i in range(30):
			
 
				+            CurveData[i, :] = PCurve[i, :]
			
 
				+
			
 
				+        CurveNorm = np.zeros([30, 2], dtype=int)
			
 
				+        VSpeed = [i / 2 for i in range(1, 31)]
			
 
				+
			
 
				+        WhichBin = 0
			
 
				+
			
 
				+        K = 0
			
 
				+        a = 0
			
 
				+        for m in range(30):
			
 
				+            K = 0
			
 
				+            a = 0
			
 
				+
			
 
				+            for n in range(30):
			
 
				+                if abs(CurveData[n, 0] - VSpeed[m]) < 0.1:
			
 
				+                    WhichBin = n
			
 
				+                    break
			
 
				+
			
 
				+            if WhichBin > 1:
			
 
				+                if CurveData[WhichBin, 0] - CurveData[WhichBin - 1, 0] > 0:
			
 
				+                    K = (CurveData[WhichBin, 1] - CurveData[WhichBin - 1, 1]) / (
			
 
				+                            CurveData[WhichBin, 0] - CurveData[WhichBin - 1, 0])
			
 
				+                    a = CurveData[WhichBin, 1] - K * CurveData[WhichBin, 0]
			
 
				+
			
 
				+            CurveNorm[m, 0] = VSpeed[m]
			
 
				+            CurveNorm[m, 1] = a + K * VSpeed[m]
			
 
				+
			
 
				+        for i in range(30):
			
 
				+            PCurveNorm[i, :] = CurveNorm[i, :]
			
 
				+
			
 
				+        # 子模块3：损失电量计算及发电性能评价
			
 
				+        CC = len(PCurve[:, 0])
			
 
				+        EPIdealTotal = 0
			
 
				+        # 计算停机损失
			
 
				+        EPLostStopTotal = 0
			
 
				+        EPLost = 0
			
 
				+
			
 
				+        nWhichBin = 0
			
 
				+        IdealPower = 0
			
 
				+        nStopTotal = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.active_power] <= 0:
			
 
				+                nWhichBin = 0
			
 
				+                for m in range(base_wind_and_power_count - 1):
			
 
				+                    if wind_and_power_df.loc[i, self.wind_velocity] > base_wind_and_power_df.loc[
			
 
				+                        m, self.rated_wind_speed] and wind_and_power_df.loc[i, self.wind_velocity] <= \
			
 
				+                            base_wind_and_power_df.loc[
			
 
				+                                m + 1, self.rated_wind_speed]:
			
 
				+                        nWhichBin = m
			
 
				+                        break
			
 
				+
			
 
				+                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                IdealPower = (wind_and_power_df.loc[i, self.wind_velocity] - base_wind_and_power_df.loc[
			
 
				+                    nWhichBin, self.rated_wind_speed]) / (
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
			
 
				+                                     base_wind_and_power_df.loc[
			
 
				+                                         nWhichBin, self.rated_wind_speed]) * (
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity]
			
 
				+                                     - base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) \
			
 
				+                             + base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
			
 
				+
			
 
				+                EPLost = IdealPower / 6
			
 
				+                EPLostStopTotal = EPLostStopTotal + EPLost
			
 
				+                nStopTotal = nStopTotal + 1
			
 
				+
			
 
				+        print("EPLost", EPLost)
			
 
				+        print("nStopTotal", nStopTotal)
			
 
				+        print("EPLostStopTotal", EPLostStopTotal)
			
 
				+
			
 
				+        nWhichP = 0
			
 
				+        nWhichV = 0
			
 
				+        nWhichBin = 0
			
 
				+        IdealPower = 0
			
 
				+
			
 
				+        # 计算欠发损失，此欠发损失已不包括限电损失，限电点在前面已经从欠发点中去除。
			
 
				+        EPLostBadTotal = 0
			
 
				+        EPLost = 0
			
 
				+
			
 
				+        nBadTotal = 0
			
 
				+
			
 
				+        LostBadPercent = 0
			
 
				+
			
 
				+        EPOverTotal = 0
			
 
				+        EPOver = 0
			
 
				+        nOverTotal = 0
			
 
				+
			
 
				+        for i in range(nCounter1):
			
 
				+            if Dzwind_and_power_dfSel[i] == 1:
			
 
				+                nWhichBin = 0
			
 
				+                for m in range(base_wind_and_power_count - 1):
			
 
				+                    if DzMarch809[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] \
			
 
				+                            and DzMarch809[i, 0] <= base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
			
 
				+                        nWhichBin = m
			
 
				+                        break
			
 
				+
			
 
				+                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
			
 
				+                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
			
 
				+                    nWhichBin, self.rated_wind_speed]) * (
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
			
 
				+                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
			
 
				+                EPLost = abs(IdealPower - DzMarch809[i, 1]) / 6
			
 
				+                EPLostBadTotal = EPLostBadTotal + EPLost
			
 
				+                nBadTotal = nBadTotal + 1
			
 
				+
			
 
				+            # 额定风速以上超发电量
			
 
				+            if Dzwind_and_power_dfSel[i] == 3:
			
 
				+                EPOver = (DzMarch809[i, 1] - PRated) / 6
			
 
				+                EPOverTotal = EPOverTotal + EPOver
			
 
				+                nOverTotal = nOverTotal + 1
			
 
				+
			
 
				+        print("EPLost", EPLost)
			
 
				+        print("nBadTotal", nBadTotal)
			
 
				+        print("EPLostBadTotal", EPLostBadTotal)
			
 
				+        print("EPOverTotal", EPOverTotal)
			
 
				+        print("nOverTotal", nOverTotal)
			
 
				+
			
 
				+        # 功率曲线未达标损失
			
 
				+        EPLostPerformTotal = 0
			
 
				+        nWhichBinI = 0
			
 
				+        IdealPower = 0
			
 
				+
			
 
				+        for i in range(nCounterVP):
			
 
				+
			
 
				+            for m in range(base_wind_and_power_count - 1):
			
 
				+                if PVDot[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] and PVDot[i, 0] <= \
			
 
				+                        base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
			
 
				+                    nWhichBinI = m
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichBinI > base_wind_and_power_count - 1 or nWhichBinI == 0:
			
 
				+                continue
			
 
				+
			
 
				+            IdealPower = (PVDot[i, 0] - base_wind_and_power_df.loc[nWhichBinI, self.rated_wind_speed]) / (
			
 
				+                    base_wind_and_power_df.loc[nWhichBinI + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
			
 
				+                nWhichBinI, self.rated_wind_speed]) * \
			
 
				+                         (base_wind_and_power_df.loc[nWhichBinI + 1, self.rated_capacity] -
			
 
				+                          base_wind_and_power_df.loc[nWhichBinI, self.rated_capacity]) + \
			
 
				+                         base_wind_and_power_df.loc[nWhichBinI, self.rated_capacity]
			
 
				+
			
 
				+            EPLostPerformTotal = EPLostPerformTotal + (IdealPower - PVDot[i, 1]) / 6
			
 
				+
			
 
				+        print("EPLostPerformTotal", EPLostPerformTotal)
			
 
				+
			
 
				+        # 限电损失
			
 
				+        EPLostLimitTotal = 0
			
 
				+        EPLost = 0
			
 
				+        nLimitTotal = 0
			
 
				+
			
 
				+        PVLimit = np.zeros([nCounter1, 2])
			
 
				+
			
 
				+        for i in range(nCounter1):
			
 
				+            if Dzwind_and_power_dfSel[i] == 4:
			
 
				+                nWhichBin = 0
			
 
				+                for m in range(base_wind_and_power_count - 1):
			
 
				+                    if DzMarch809[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] and DzMarch809[i, 0] <= \
			
 
				+                            base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
			
 
				+                        nWhichBin = m
			
 
				+                        break
			
 
				+
			
 
				+                # 插值计算对应设计功率
			
 
				+                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
			
 
				+                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
			
 
				+                        base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) * (
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
			
 
				+                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
			
 
				+                EPLost = np.abs(IdealPower - DzMarch809[i, 1]) / 6
			
 
				+                EPLostLimitTotal = EPLostLimitTotal + EPLost
			
 
				+
			
 
				+                PVLimit[nLimitTotal, :] = DzMarch809[i, :]
			
 
				+                nLimitTotal = nLimitTotal + 1
			
 
				+
			
 
				+        nLimitTotal = nLimitTotal - 1
			
 
				+
			
 
				+        print("nLimitTotal", nLimitTotal)
			
 
				+
			
 
				+        # 欠发和限点损失总和
			
 
				+        EPLostBadLimitTotal = EPLostBadTotal + EPLostLimitTotal
			
 
				+
			
 
				+        # 如果功率曲线未达标损失为正
			
 
				+        if EPLostPerformTotal >= 0:
			
 
				+            EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal + EPLostPerformTotal
			
 
				+
			
 
				+        # 如果功率曲线未达标损失为负
			
 
				+        if EPLostPerformTotal < 0:
			
 
				+            EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal
			
 
				+
			
 
				+        print("EPIdealTotal", EPIdealTotal)
			
 
				+        # 可以比较求和得到的应发功率EPIdealTotal与理论计算得到的应发功率EPIdealTotalAAA的差别
			
 
				+        # 需要去除的超发功率：（1）功率主带左侧的超发点；（2）额定风速以上的超发点。
			
 
				+        RemoveOverEP = 0
			
 
				+        nType2 = 0
			
 
				+        for i in range(nCounter1):
			
 
				+            if Dzwind_and_power_dfSel[i] == 2:  # 功率主带左侧的超发坏点
			
 
				+                nWhichBin = 0
			
 
				+                for m in range(base_wind_and_power_count - 1):
			
 
				+                    if DzMarch809[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] and DzMarch809[i, 0] <= \
			
 
				+                            base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
			
 
				+                        nWhichBin = m
			
 
				+                        break
			
 
				+
			
 
				+                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
			
 
				+                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
			
 
				+                    nWhichBin, self.rated_wind_speed]) * (
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
			
 
				+                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
			
 
				+
			
 
				+                RemoveOverEP = RemoveOverEP + (DzMarch809[i, 1] - IdealPower) / 6
			
 
				+                nType2 = nType2 + 1
			
 
				+
			
 
				+        print("RemoveOverEP", RemoveOverEP)
			
 
				+        print("nType2", nType2)
			
 
				+        # 额定功率以上的超发点
			
 
				+        nTypeOver = 0
			
 
				+        for i in range(nCounter1):
			
 
				+            if DzMarch809[i, 1] > PRated:
			
 
				+                RemoveOverEP = RemoveOverEP + (DzMarch809[i, 1] - PRated) / 6
			
 
				+                nTypeOver = nTypeOver + 1
			
 
				+
			
 
				+        print("RemoveOverEP", RemoveOverEP)
			
 
				+        print("nTypeOver", nTypeOver)
			
 
				+
			
 
				+    def run(self):
			
 
				+        # Implement your class identification logic here
			
 
				+        self.identifier()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    test = ClassIdentifier('test', r"D:\中能智能\matlib计算相关\好点坏点matlib计算\A01.csv", index='时间',
			
 
				+                           wind_velocity='风速',
			
 
				+                           active_power='功率')
			
 
				+
			
 
				+    test.run()
			
--- a/tmp_file/ClassIdentifier_1.py
+++ b/tmp_file/ClassIdentifier_1.py
@@ -0,0 +1,756 @@
 
				+import numpy as np
			
 
				+from pandas import DataFrame
			
 
				+
			
 
				+from service.plt_service import get_base_wind_and_power
			
 
				+from utils.file.trans_methods import read_file_to_df
			
 
				+
			
 
				+
			
 
				+class ClassIdentifier(object):
			
 
				+
			
 
				+    def __init__(self, wind_turbine_number, file_path: str = None, origin_df: DataFrame = None, index='time_stamp',
			
 
				+                 wind_velocity='wind_velocity',
			
 
				+                 active_power='active_power'):
			
 
				+        """
			
 
				+        :param wind_turbine_number: The wind turbine number.
			
 
				+        :param file_path: The file path of the input data.
			
 
				+        :param origin_df: The pandas DataFrame containing the input data.
			
 
				+        :param index: 索引字段
			
 
				+        :param wind_velocity: 风速字段
			
 
				+        :param active_power: 有功功率字段
			
 
				+        """
			
 
				+        self.wind_turbine_number = wind_turbine_number
			
 
				+        self.index = index
			
 
				+        self.wind_velocity = wind_velocity
			
 
				+        self.active_power = active_power
			
 
				+
			
 
				+        self.rated_wind_speed = 'rated_wind_speed'
			
 
				+        self.rated_capacity = 'rated_capacity'
			
 
				+
			
 
				+        if file_path is None and origin_df is None:
			
 
				+            raise ValueError("Either file_path or origin_df should be provided.")
			
 
				+
			
 
				+        if file_path:
			
 
				+            self.df = read_file_to_df(file_path)
			
 
				+        else:
			
 
				+            self.df = origin_df
			
 
				+
			
 
				+        self.df = self.df.set_index(keys=self.index)
			
 
				+
			
 
				+    def identifier(self):
			
 
				+        # 风速 和 有功功率 df
			
 
				+        wind_and_power_df = self.df[[self.wind_velocity, self.active_power]]
			
 
				+        wind_and_power_df.reset_index(inplace=True)
			
 
				+        wind_and_power_df_count = wind_and_power_df.shape[0]
			
 
				+        PowerMax = wind_and_power_df[self.active_power].max()
			
 
				+        PowerRated = np.ceil(PowerMax / 100) * 100
			
 
				+        PRated = 1500  # 额定功率1500kw,可改为2000kw
			
 
				+        VCutOut = 25
			
 
				+        VCutIn = 3
			
 
				+        VRated = 10
			
 
				+        # 网格法确定风速风向分区数量，功率方向分区数量，
			
 
				+        # PNum = (PRated+100)/25  #功率分区间隔25kW
			
 
				+        PNum = int(np.ceil(PowerRated / 25))  # 功率分区间隔25kW
			
 
				+        VNum = int(np.ceil(VCutOut / 0.25))  # 风速分区间隔0.25m/s
			
 
				+
			
 
				+        # 实发电量
			
 
				+        EPActualTotal = 0  # 实发电量
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.active_power] >= 0:
			
 
				+                EPActualTotal = EPActualTotal + wind_and_power_df.loc[i, self.active_power] / 6
			
 
				+
			
 
				+        print("EPActualTotal", EPActualTotal)
			
 
				+        # 平均风速
			
 
				+        WindSpeedAvr = 0
			
 
				+        WindSum = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.wind_velocity] >= 0:
			
 
				+                WindSum = WindSum + wind_and_power_df.loc[i, self.wind_velocity]
			
 
				+        WindSpeedAvr = WindSum / wind_and_power_df_count
			
 
				+        print("windSpeedAvr", WindSpeedAvr)
			
 
				+        # 用于计算损失电量的标杆功率曲线，可更换为风机设计功率曲线
			
 
				+        # base_wind_and_power_df = get_base_wind_and_power(self.wind_turbine_number)
			
 
				+        base_wind_and_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\好点坏点matlib计算\A型风机设计功率曲线.csv", header=None)
			
 
				+        base_wind_and_power_df.columns = [self.rated_wind_speed, self.rated_capacity]
			
 
				+        if base_wind_and_power_df.empty:
			
 
				+            raise ValueError("风场编号:" + self.wind_turbine_number + "未查询到风速功率信息")
			
 
				+        base_wind_and_power_count = base_wind_and_power_df.shape[0]
			
 
				+
			
 
				+        # 风机可利用率，计算方法：大于切入风速但发电功率小于0
			
 
				+        TurbineRunRate = 0
			
 
				+        nShouldGP = 0
			
 
				+        nRealGP = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.wind_velocity] >= VCutIn:
			
 
				+                nShouldGP = nShouldGP + 1
			
 
				+                if wind_and_power_df.loc[i, self.active_power] > 0:
			
 
				+                    nRealGP = nRealGP + 1
			
 
				+        if nShouldGP > 0:
			
 
				+            TurbineRunRate = nRealGP / nShouldGP * 100
			
 
				+
			
 
				+        print("disp(TurbineRunRate)", TurbineRunRate)
			
 
				+        # 理论电量-
			
 
				+        EPIdealTotalAAA = 0  # 理论电量-
			
 
				+        nWhichBin = 0
			
 
				+        IdealPower = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            # 应发电量-理论
			
 
				+            nWhichBin = 0
			
 
				+            for m in range(base_wind_and_power_count - 1):
			
 
				+                if base_wind_and_power_df.loc[m, self.rated_wind_speed] < wind_and_power_df.loc[
			
 
				+                    i, self.wind_velocity] <= \
			
 
				+                        base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
			
 
				+                    nWhichBin = m
			
 
				+                    break
			
 
				+
			
 
				+            # 插值计算对应设计功率
			
 
				+            if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
			
 
				+                continue
			
 
				+
			
 
				+            IdealPower = (wind_and_power_df.loc[i, self.wind_velocity] - base_wind_and_power_df.loc[nWhichBin,
			
 
				+                                                                                                    self.rated_wind_speed]) / (
			
 
				+                                 base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
			
 
				+                                 base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) * (
			
 
				+                                 base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
			
 
				+                                 base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) \
			
 
				+                         + base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
			
 
				+            EPIdealTotalAAA = EPIdealTotalAAA + IdealPower / 6
			
 
				+
			
 
				+        print('EPIdealTotalAAA', EPIdealTotalAAA)
			
 
				+        #
			
 
				+        # 存储功率大于零的运行数据
			
 
				+        DzMarch809 = np.zeros([wind_and_power_df_count, 2], dtype=float)
			
 
				+        nCounter1 = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.active_power] > 0:
			
 
				+                DzMarch809[nCounter1, 0] = wind_and_power_df.loc[i, self.wind_velocity]
			
 
				+                DzMarch809[nCounter1, 1] = wind_and_power_df.loc[i, self.active_power]
			
 
				+
			
 
				+                nCounter1 = nCounter1 + 1
			
 
				+
			
 
				+        print('nCounter1', nCounter1)
			
 
				+
			
 
				+        # 统计各网格落入的散点个数
			
 
				+        XBoxNumber = np.ones([PNum, VNum], dtype=int)
			
 
				+        nWhichP = -1
			
 
				+        nWhichV = -1
			
 
				+        for i in range(nCounter1):
			
 
				+            for m in range(PNum):
			
 
				+                if m * 25 < DzMarch809[i, 1] <= (m + 1) * 25:
			
 
				+                    nWhichP = m
			
 
				+                    break
			
 
				+            for n in range(VNum):
			
 
				+                if ((n + 1) * 0.25 - 0.125) < DzMarch809[i, 0] <= ((n + 1) * 0.25 + 0.125):
			
 
				+                    nWhichV = n
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichP > -1 and nWhichV > -1:
			
 
				+                XBoxNumber[nWhichP, nWhichV] = XBoxNumber[nWhichP, nWhichV] + 1
			
 
				+
			
 
				+        for m in range(PNum):
			
 
				+            for n in range(VNum):
			
 
				+                XBoxNumber[m, n] = XBoxNumber[m, n] - 1
			
 
				+
			
 
				+        print('XBoxNumber', XBoxNumber)
			
 
				+        # 在功率方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        PBoxPercent = np.zeros([PNum, VNum], dtype=float)
			
 
				+        PBinSum = np.zeros(PNum, dtype=int)
			
 
				+
			
 
				+        for i in range(PNum):
			
 
				+            for m in range(VNum):
			
 
				+                PBinSum[i] = PBinSum[i] + XBoxNumber[i, m]
			
 
				+
			
 
				+            for m in range(VNum):
			
 
				+                if PBinSum[i] > 0:
			
 
				+                    PBoxPercent[i, m] = XBoxNumber[i, m] / PBinSum[i] * 100
			
 
				+
			
 
				+        # 在风速方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        VBoxPercent = np.zeros([PNum, VNum], dtype=float)
			
 
				+        VBinSum = np.zeros(VNum, dtype=int)
			
 
				+
			
 
				+        for i in range(VNum):
			
 
				+            for m in range(PNum):
			
 
				+                VBinSum[i] = VBinSum[i] + XBoxNumber[m, i]
			
 
				+
			
 
				+            for m in range(PNum):
			
 
				+                if VBinSum[i] > 0:
			
 
				+                    VBoxPercent[m, i] = XBoxNumber[m, i] / VBinSum[i] * 100
			
 
				+
			
 
				+        # 以水平功率带方向为准，分析每个水平功率带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        PBoxMaxIndex = np.zeros(PNum, dtype=int)  # 水平功率带最大网格位置索引
			
 
				+        PBoxMaxP = np.zeros(PNum, dtype=int)  # 水平功率带最大网格百分比
			
 
				+
			
 
				+        for m in range(PNum):
			
 
				+            # 确定每一水平功率带的最大网格位置索引即百分比值
			
 
				+            PBoxMaxP[m], PBoxMaxIndex[m] = PBoxPercent[m, :].max(), PBoxPercent[m, :].argmax()
			
 
				+
			
 
				+        # 以垂直风速方向为准，分析每个垂直风速带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        VBoxMaxIndex = np.zeros(VNum, dtype=int)
			
 
				+        VBoxMaxV = np.zeros(VNum, dtype=int)
			
 
				+
			
 
				+        for m in range(VNum):
			
 
				+            [VBoxMaxV[m], VBoxMaxIndex[m]] = VBoxPercent[:, m].max(), VBoxPercent[:, m].argmax()
			
 
				+
			
 
				+        # 切入风速特殊处理，如果切入风速过于偏右，向左拉回
			
 
				+        if PBoxMaxIndex[0] > 14:
			
 
				+            PBoxMaxIndex[0] = 9
			
 
				+
			
 
				+        # 以水平功率带方向为基准，进行分析
			
 
				+        DotDense = np.zeros(PNum, dtype=int)  # 每一水平功率带的功率主带包含的网格数
			
 
				+        DotDenseLeftRight = np.zeros([PNum, 2], dtype=int)  # 存储每一水平功率带的功率主带以最大网格为中心，向向左，向右扩展的网格数
			
 
				+        DotValve = 90  # 从中心向左右对称扩展网格的散点百分比和的阈值。
			
 
				+        PDotDenseSum = 0
			
 
				+
			
 
				+        iSpreadLeft = 1  # 向左扩展网格计数，初值为1
			
 
				+        iSpreadRight = 1  # 向右扩展网格技术，初值为1
			
 
				+        for i in range(PNum - 6):  # 从最下层水平功率带1开始，向上到第PNum-6个水平功率带（额定功率一下水平功率带），逐一分析
			
 
				+            PDotDenseSum = PBoxMaxP[i]  # 以中心最大水平功率带为基准，向左向右对称扩展网格，累加各网格散点百分比
			
 
				+            iSpreadRight = 1
			
 
				+            iSpreadLeft = 1
			
 
				+            while PDotDenseSum < DotValve:
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] + iSpreadRight) < VNum - 1:
			
 
				+                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]  # 向右侧扩展
			
 
				+                    iSpreadRight = iSpreadRight + 1
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] + iSpreadRight) > VNum - 1:
			
 
				+                    break
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] - iSpreadLeft) > 0:
			
 
				+                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]  # 向左侧扩展
			
 
				+                    iSpreadLeft = iSpreadLeft + 1
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] - iSpreadLeft) <= 0:
			
 
				+                    break
			
 
				+
			
 
				+            iSpreadRight = iSpreadRight - 1
			
 
				+
			
 
				+            iSpreadLeft = iSpreadLeft - 1
			
 
				+            # 向左右对称扩展完毕
			
 
				+
			
 
				+            DotDenseLeftRight[i, 0] = iSpreadLeft
			
 
				+            DotDenseLeftRight[i, 1] = iSpreadRight
			
 
				+            DotDense[i] = iSpreadLeft + iSpreadRight + 1
			
 
				+
			
 
				+        # 各行功率主带右侧宽度的中位数最具有代表性
			
 
				+        DotDenseWidthLeft = np.zeros([PNum - 6, 1], dtype=int)
			
 
				+        for i in range(PNum - 6):
			
 
				+            DotDenseWidthLeft[i] = DotDenseLeftRight[i, 1]
			
 
				+
			
 
				+        MainBandRight = np.median(DotDenseWidthLeft)
			
 
				+
			
 
				+        # 散点向右显著延展分布的水平功率带为限功率水平带
			
 
				+        PowerLimit = np.zeros([PNum, 1], dtype=int)  # 各水平功率带是否为限功率标识，==1：是；==0：不是
			
 
				+        WidthAverage = 0  # 功率主带平均宽度
			
 
				+        WidthVar = 0  # 功率主带方差
			
 
				+        # PowerLimitValve = 6    #限功率主带判别阈值
			
 
				+        PowerLimitValve = np.ceil(MainBandRight) + 3  # 限功率主带判别阈值
			
 
				+
			
 
				+        nCounterLimit = 0
			
 
				+        nCounter = 0
			
 
				+
			
 
				+        for i in range(PNum - 6):
			
 
				+            if DotDenseLeftRight[i, 1] > PowerLimitValve and PBinSum[i] > 20:  # 如果向右扩展网格数大于阈值，且该水平功率带点总数>20，是
			
 
				+                PowerLimit[i] = 1
			
 
				+                nCounterLimit = nCounterLimit + 1
			
 
				+
			
 
				+            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
			
 
				+                WidthAverage = WidthAverage + DotDenseLeftRight[i, 1]  # 统计正常水平功率带右侧宽度
			
 
				+                nCounter = nCounter + 1
			
 
				+
			
 
				+        WidthAverage = WidthAverage / nCounter  # 功率主带平均宽度
			
 
				+
			
 
				+        print("WidthAverage", WidthAverage)
			
 
				+
			
 
				+        # 各水平功率带的功率主带宽度的方差，反映从下到上宽度是否一致，或是否下宽上窄等异常情况
			
 
				+        for i in range(PNum - 6):
			
 
				+            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
			
 
				+                WidthVar = WidthVar + (DotDenseLeftRight[i, 1] - WidthAverage) * (
			
 
				+                        DotDenseLeftRight[i, 1] - WidthAverage)
			
 
				+
			
 
				+        WidthVar = np.sqrt(WidthVar / nCounter)
			
 
				+
			
 
				+        # 各水平功率带，功率主带的风速范围，右侧扩展网格数*2*0.25
			
 
				+        PowerBandWidth = WidthAverage * 2 * 0.25
			
 
				+
			
 
				+        # 对限负荷水平功率带的最大网格较下面相邻层显著偏右，拉回
			
 
				+        for i in range(1, PNum - 6):
			
 
				+            if PowerLimit[i] == 1 and abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5:
			
 
				+                PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1
			
 
				+
			
 
				+        # 输出各层功率主带的左右边界网格索引
			
 
				+        DotDenseInverse = np.zeros([PNum, 2], dtype=int)
			
 
				+
			
 
				+        for i in range(PNum):
			
 
				+            DotDenseInverse[i, :] = DotDenseLeftRight[PNum - i - 1, :]
			
 
				+
			
 
				+        # print('DotDenseInverse', DotDenseInverse)
			
 
				+
			
 
				+        # 功率主带的右边界
			
 
				+        CurveWidthR = int(np.ceil(WidthAverage) + 2)
			
 
				+
			
 
				+        # CurveWidthL = 6    #功率主带的左边界
			
 
				+        CurveWidthL = CurveWidthR
			
 
				+
			
 
				+        BBoxLimit = np.zeros([PNum, VNum], dtype=int)  # 网格是否为限功率网格的标识，如果为限功率水平功率带，从功率主带右侧边缘向右的网格为限功率网格
			
 
				+        for i in range(2, PNum - 6):
			
 
				+            if PowerLimit[i] == 1:
			
 
				+                for j in range(PBoxMaxIndex[i] + CurveWidthR, VNum):
			
 
				+                    BBoxLimit[i, j] = 1
			
 
				+
			
 
				+        BBoxRemove = np.zeros([PNum, VNum], dtype=int)  # 数据异常需要剔除的网格标识，标识==1：功率主带右侧的欠发网格；==2：功率主带左侧的超发网格
			
 
				+        for m in range(PNum - 6):
			
 
				+            for n in range(PBoxMaxIndex[m] + CurveWidthR - 1, VNum):
			
 
				+                BBoxRemove[m, n] = 1
			
 
				+
			
 
				+            for n in range(PBoxMaxIndex[m] - CurveWidthL - 1, 0, -1):
			
 
				+                BBoxRemove[m, n] = 2
			
 
				+
			
 
				+        # 确定功率主带的左上拐点，即额定风速位置的网格索引
			
 
				+        CurveTop = np.zeros(2, dtype=int)
			
 
				+        CurveTopValve = 3  # 网格的百分比阈值
			
 
				+        BTopFind = 0
			
 
				+        for m in range(PNum - 4 - 1, 0, -1):
			
 
				+            for n in range(VNum):
			
 
				+                if VBoxPercent[m, n] > CurveTopValve and XBoxNumber[m, n] >= 10:  # 如左上角网格的百分比和散点个数大于阈值。
			
 
				+                    CurveTop[0] = m
			
 
				+                    CurveTop[1] = n
			
 
				+                    BTopFind = 1
			
 
				+                    break
			
 
				+
			
 
				+            if BTopFind == 1:
			
 
				+                break
			
 
				+
			
 
				+        IsolateValve = 3
			
 
				+        for m in range(PNum - 6):
			
 
				+            for n in range(PBoxMaxIndex[m] + CurveWidthR - 1, VNum):
			
 
				+                if PBoxPercent[m, n] < IsolateValve:
			
 
				+                    BBoxRemove[m, n] = 1
			
 
				+
			
 
				+        # 功率主带顶部宽度
			
 
				+        CurveWidthT = 2
			
 
				+        for m in range(PNum - CurveWidthT - 1, PNum):
			
 
				+            for n in range(VNum):
			
 
				+                BBoxRemove[m, n] = 3  # 网格为额定功率以上的超发点
			
 
				+
			
 
				+        # 功率主带拐点左侧的欠发网格标识
			
 
				+        for m in range(PNum - 5 - 1, PNum):
			
 
				+            for n in range(CurveTop[1] - 2 - 1):
			
 
				+                BBoxRemove[m, n] = 2
			
 
				+
			
 
				+        # 以网格的标识，决定该网格内数据的标识。Dzwind_and_power_dfSel功率非零数据的标识位。散点在哪个网格，此网格的标识即为该点的标识
			
 
				+        Dzwind_and_power_dfSel = np.zeros(nCounter1, dtype=int)  # is ==1,欠发功率点；==2，超发功率点；==3，额定风速以上的超发功率点 ==4, 限电
			
 
				+        nWhichP = 0
			
 
				+        nWhichV = 0
			
 
				+        nBadA = 0
			
 
				+
			
 
				+        for i in range(nCounter1):
			
 
				+            for m in range(PNum):
			
 
				+                if DzMarch809[i, 1] > (m - 1) * 25 and DzMarch809[i, 1] <= m * 25:
			
 
				+                    nWhichP = m
			
 
				+                    break
			
 
				+
			
 
				+            for n in range(VNum):
			
 
				+                if DzMarch809[i, 0] > (n * 0.25 - 0.125) and DzMarch809[i, 0] <= (n * 0.25 + 0.125):
			
 
				+                    nWhichV = n
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichP > 0 and nWhichV > 0:
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 1:
			
 
				+                    Dzwind_and_power_dfSel[i] = 1
			
 
				+                    nBadA = nBadA + 1
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 2:
			
 
				+                    Dzwind_and_power_dfSel[i] = 2
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 3:
			
 
				+                    Dzwind_and_power_dfSel[i] = 0  # 3  # 额定风速以上的超发功率点认为是正常点，不再标识。
			
 
				+
			
 
				+                if BBoxLimit[nWhichP, nWhichV] == 1 and nWhichP>16:
			
 
				+                    Dzwind_and_power_dfSel[i] = 4
			
 
				+
			
 
				+        print("nWhichP", nWhichP)
			
 
				+        print("nWhichV", nWhichV)
			
 
				+        print("nBadA", nBadA)
			
 
				+
			
 
				+        # 限负荷数据标识方法2：把数据切割为若干个窗口。对每一窗口，以第一个点为基准，连续nWindowLength个数据的功率在方差范围内，呈现显著水平分布的点
			
 
				+        PVLimit = np.zeros([nCounter1, 2], dtype=int)  # 存储限负荷数据
			
 
				+        nLimitTotal = 0
			
 
				+        nWindowLength = 3
			
 
				+        LimitWindow = np.zeros(nWindowLength, dtype=int)
			
 
				+        UpLimit = 0  # 上限
			
 
				+        LowLimit = 0  # 下限
			
 
				+        PowerStd = 15  # 功率波动方差
			
 
				+        bAllInUpLow = 1  # ==1:窗口内所有数据均在方差上下限之内，限负荷==0,不满足条件
			
 
				+        bAllInAreas = 1  # ==1：窗口所有数据均在200~PRated-300kW范围内；==0：不满足此条件
			
 
				+        nWindowNum = int(np.floor(nCounter1 / nWindowLength))
			
 
				+        PowerLimitUp = PRated - 300
			
 
				+        PowerLimitLow = 200
			
 
				+        for i in range(nWindowNum):
			
 
				+            for j in range(nWindowLength):
			
 
				+                LimitWindow[j] = DzMarch809[i * nWindowLength + j, 1]
			
 
				+
			
 
				+            bAllInAreas = 1
			
 
				+            for j in range(nWindowLength):
			
 
				+                if LimitWindow[j] < PowerLimitLow or LimitWindow[j] > PowerLimitUp:
			
 
				+                    bAllInAreas = 0
			
 
				+
			
 
				+            if bAllInAreas == 0:
			
 
				+                continue
			
 
				+
			
 
				+            UpLimit = LimitWindow[0] + PowerStd
			
 
				+            LowLimit = LimitWindow[0] - PowerStd
			
 
				+            bAllInUpLow = 1
			
 
				+            for j in range(1, nWindowLength):
			
 
				+                if LimitWindow[j] < LowLimit or LimitWindow[j] > UpLimit:
			
 
				+                    bAllInUpLow = 0
			
 
				+
			
 
				+            if bAllInUpLow == 1:
			
 
				+                for j in range(nWindowLength):
			
 
				+                    Dzwind_and_power_dfSel[i * nWindowLength + j] = 4  # 标识窗口内的数据为限负荷数据
			
 
				+
			
 
				+                for j in range(nWindowLength):
			
 
				+                    PVLimit[nLimitTotal, :] = DzMarch809[i * nWindowLength + j, :]
			
 
				+                    nLimitTotal = nLimitTotal + 1
			
 
				+
			
 
				+        print("nLimitTotal", nLimitTotal)
			
 
				+
			
 
				+        # 相邻水平功率主带的锯齿平滑
			
 
				+        PVLeftDown = np.zeros(2, dtype=int)
			
 
				+        PVRightUp = np.zeros(2, dtype=int)
			
 
				+        nSmooth = 0
			
 
				+        for i in range(PNum - 6 - 1):
			
 
				+            PVLeftDown = np.zeros(2, dtype=int)
			
 
				+            PVRightUp = np.zeros(2, dtype=int)
			
 
				+
			
 
				+            if (PBoxMaxIndex[i + 1] - PBoxMaxIndex[i]) >= 1:
			
 
				+                PVLeftDown[0] = (PBoxMaxIndex[i] + CurveWidthR) * 0.25 - 0.125
			
 
				+                PVLeftDown[1] = (i - 1) * 25
			
 
				+
			
 
				+                PVRightUp[0] = (PBoxMaxIndex[i + 1] + CurveWidthR) * 0.25 - 0.125
			
 
				+                PVRightUp[1] = (i + 1 - 1) * 25
			
 
				+
			
 
				+                for m in range(nCounter1):
			
 
				+                    if DzMarch809[m, 0] > PVLeftDown[0] and DzMarch809[m, 0] < PVRightUp[0] and PVLeftDown[1] < \
			
 
				+                            DzMarch809[m, 1] < PVRightUp[1]:  # 在该锯齿中
			
 
				+                        if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (
			
 
				+                                PVRightUp[1] - PVLeftDown[1]) / (
			
 
				+                                PVRightUp[0] - PVLeftDown[0]):  # 斜率大于对角连线，则在锯齿左上三角形中，选中
			
 
				+                            Dzwind_and_power_dfSel[m] = 0
			
 
				+                            nSmooth = nSmooth + 1
			
 
				+
			
 
				+        print("nSmooth", nSmooth)
			
 
				+
			
 
				+        # 存储好点
			
 
				+        nCounterPV = 0
			
 
				+        PVDot = np.zeros([nCounter1, 2], dtype=int)
			
 
				+        for i in range(nCounter1):
			
 
				+            if Dzwind_and_power_dfSel[i] == 0:
			
 
				+                PVDot[nCounterPV, :] = DzMarch809[i, :]
			
 
				+                nCounterPV = nCounterPV + 1
			
 
				+
			
 
				+        nCounterVP = nCounterPV
			
 
				+        print("nCounterVP", nCounterVP)
			
 
				+
			
 
				+        # 存储坏点
			
 
				+        nCounterBad = 0
			
 
				+        PVBad = np.zeros([nCounter1, 2], dtype=int)
			
 
				+        for i in range(nCounter1):
			
 
				+            if Dzwind_and_power_dfSel[i] == 1 or Dzwind_and_power_dfSel[i] == 2 or Dzwind_and_power_dfSel[i] == 3:
			
 
				+                PVBad[nCounterBad, :] = DzMarch809[i, :]
			
 
				+                nCounterBad = nCounterBad + 1
			
 
				+
			
 
				+        print("nCounterBad", nCounterBad)
			
 
				+
			
 
				+        # 用功率主带中的好点绘制实测功率曲
			
 
				+        XBinNumber = np.ones(50, dtype=int)
			
 
				+        PCurve = np.zeros([50, 2], dtype=int)
			
 
				+        PCurve[:, 0] = [i / 2 for i in range(1, 51)]
			
 
				+        XBinSum = np.zeros([50, 2], dtype=int)
			
 
				+        nWhichBin = 0
			
 
				+
			
 
				+        for i in range(nCounterVP):
			
 
				+            nWhichBin = 0
			
 
				+
			
 
				+            for b in range(50):
			
 
				+                if PVDot[i, 0] > (b * 0.5 - 0.25) and PVDot[i, 0] <= (b * 0.5 + 0.25):
			
 
				+                    nWhichBin = b
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichBin > 0:
			
 
				+                XBinSum[nWhichBin, 0] = XBinSum[nWhichBin, 0] + PVDot[i, 0]  # wind speed
			
 
				+                XBinSum[nWhichBin, 1] = XBinSum[nWhichBin, 1] + PVDot[i, 1]  # Power
			
 
				+                XBinNumber[nWhichBin] = XBinNumber[nWhichBin] + 1
			
 
				+
			
 
				+        for b in range(50):
			
 
				+            XBinNumber[b] = XBinNumber[b] - 1
			
 
				+
			
 
				+        for b in range(50):
			
 
				+            if XBinNumber[b] > 0:
			
 
				+                PCurve[b, 0] = XBinSum[b, 0] / XBinNumber[b]
			
 
				+                PCurve[b, 1] = XBinSum[b, 1] / XBinNumber[b]
			
 
				+
			
 
				+        # 对额定风速以上的功率直接赋额定功率
			
 
				+        VRatedNum = int(VRated / 0.5)
			
 
				+        for m in range(VRatedNum, 50):
			
 
				+            if PCurve[m, 1] == 0:
			
 
				+                PCurve[m, 1] = PRated
			
 
				+
			
 
				+        # print("PCurve", PCurve)
			
 
				+
			
 
				+        # 绘制标准正则功率曲线，以0.5m/s标准为间隔
			
 
				+        # 15m/s以上为额定功率，15m/s以下为计算得到
			
 
				+        PCurveNorm = np.zeros([50, 2], dtype=int)
			
 
				+        for i in range(30, 50):
			
 
				+            PCurveNorm[i, 0] = i * 0.5
			
 
				+            PCurveNorm[i, 1] = PRated
			
 
				+
			
 
				+        # 15m/s一下正则功率曲线
			
 
				+        CurveData = np.zeros([30, 2], dtype=int)
			
 
				+        for i in range(30):
			
 
				+            CurveData[i, :] = PCurve[i, :]
			
 
				+
			
 
				+        CurveNorm = np.zeros([30, 2], dtype=int)
			
 
				+        VSpeed = [i / 2 for i in range(1, 31)]
			
 
				+
			
 
				+        WhichBin = 0
			
 
				+
			
 
				+        K = 0
			
 
				+        a = 0
			
 
				+        for m in range(30):
			
 
				+            K = 0
			
 
				+            a = 0
			
 
				+
			
 
				+            for n in range(30):
			
 
				+                if abs(CurveData[n, 0] - VSpeed[m]) < 0.1:
			
 
				+                    WhichBin = n
			
 
				+                    break
			
 
				+
			
 
				+            if WhichBin > 1:
			
 
				+                if CurveData[WhichBin, 0] - CurveData[WhichBin - 1, 0] > 0:
			
 
				+                    K = (CurveData[WhichBin, 1] - CurveData[WhichBin - 1, 1]) / (
			
 
				+                            CurveData[WhichBin, 0] - CurveData[WhichBin - 1, 0])
			
 
				+                    a = CurveData[WhichBin, 1] - K * CurveData[WhichBin, 0]
			
 
				+
			
 
				+            CurveNorm[m, 0] = VSpeed[m]
			
 
				+            CurveNorm[m, 1] = a + K * VSpeed[m]
			
 
				+
			
 
				+        for i in range(30):
			
 
				+            PCurveNorm[i, :] = CurveNorm[i, :]
			
 
				+
			
 
				+        # 子模块3：损失电量计算及发电性能评价
			
 
				+        CC = len(PCurve[:, 0])
			
 
				+        EPIdealTotal = 0
			
 
				+        # 计算停机损失
			
 
				+        EPLostStopTotal = 0
			
 
				+        EPLost = 0
			
 
				+
			
 
				+        nWhichBin = 0
			
 
				+        IdealPower = 0
			
 
				+        nStopTotal = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.active_power] <= 0:
			
 
				+                nWhichBin = 0
			
 
				+                for m in range(base_wind_and_power_count - 1):
			
 
				+                    if wind_and_power_df.loc[i, self.wind_velocity] > base_wind_and_power_df.loc[
			
 
				+                        m, self.rated_wind_speed] and wind_and_power_df.loc[i, self.wind_velocity] <= \
			
 
				+                            base_wind_and_power_df.loc[
			
 
				+                                m + 1, self.rated_wind_speed]:
			
 
				+                        nWhichBin = m
			
 
				+                        break
			
 
				+
			
 
				+                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                IdealPower = (wind_and_power_df.loc[i, self.wind_velocity] - base_wind_and_power_df.loc[
			
 
				+                    nWhichBin, self.rated_wind_speed]) / (
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
			
 
				+                                     base_wind_and_power_df.loc[
			
 
				+                                         nWhichBin, self.rated_wind_speed]) * (
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity]
			
 
				+                                     - base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) \
			
 
				+                             + base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
			
 
				+
			
 
				+                EPLost = IdealPower / 6
			
 
				+                EPLostStopTotal = EPLostStopTotal + EPLost
			
 
				+                nStopTotal = nStopTotal + 1
			
 
				+
			
 
				+        print("EPLost", EPLost)
			
 
				+        print("nStopTotal", nStopTotal)
			
 
				+        print("EPLostStopTotal", EPLostStopTotal)
			
 
				+
			
 
				+        nWhichP = 0
			
 
				+        nWhichV = 0
			
 
				+        nWhichBin = 0
			
 
				+        IdealPower = 0
			
 
				+
			
 
				+        # 计算欠发损失，此欠发损失已不包括限电损失，限电点在前面已经从欠发点中去除。
			
 
				+        EPLostBadTotal = 0
			
 
				+        EPLost = 0
			
 
				+
			
 
				+        nBadTotal = 0
			
 
				+
			
 
				+        LostBadPercent = 0
			
 
				+
			
 
				+        EPOverTotal = 0
			
 
				+        EPOver = 0
			
 
				+        nOverTotal = 0
			
 
				+
			
 
				+        for i in range(nCounter1):
			
 
				+            if Dzwind_and_power_dfSel[i] == 1:
			
 
				+                nWhichBin = 0
			
 
				+                for m in range(base_wind_and_power_count - 1):
			
 
				+                    if DzMarch809[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] \
			
 
				+                            and DzMarch809[i, 0] <= base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
			
 
				+                        nWhichBin = m
			
 
				+                        break
			
 
				+
			
 
				+                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
			
 
				+                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
			
 
				+                    nWhichBin, self.rated_wind_speed]) * (
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
			
 
				+                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
			
 
				+                EPLost = abs(IdealPower - DzMarch809[i, 1]) / 6
			
 
				+                EPLostBadTotal = EPLostBadTotal + EPLost
			
 
				+                nBadTotal = nBadTotal + 1
			
 
				+
			
 
				+            # 额定风速以上超发电量
			
 
				+            if Dzwind_and_power_dfSel[i] == 3:
			
 
				+                EPOver = (DzMarch809[i, 1] - PRated) / 6
			
 
				+                EPOverTotal = EPOverTotal + EPOver
			
 
				+                nOverTotal = nOverTotal + 1
			
 
				+
			
 
				+        print("EPLost", EPLost)
			
 
				+        print("nBadTotal", nBadTotal)
			
 
				+        print("EPLostBadTotal", EPLostBadTotal)
			
 
				+        print("EPOverTotal", EPOverTotal)
			
 
				+        print("nOverTotal", nOverTotal)
			
 
				+
			
 
				+        # 功率曲线未达标损失
			
 
				+        EPLostPerformTotal = 0
			
 
				+        nWhichBinI = 0
			
 
				+        IdealPower = 0
			
 
				+
			
 
				+        for i in range(nCounterVP):
			
 
				+
			
 
				+            for m in range(base_wind_and_power_count - 1):
			
 
				+                if PVDot[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] and PVDot[i, 0] <= \
			
 
				+                        base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
			
 
				+                    nWhichBinI = m
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichBinI > base_wind_and_power_count - 1 or nWhichBinI == 0:
			
 
				+                continue
			
 
				+
			
 
				+            IdealPower = (PVDot[i, 0] - base_wind_and_power_df.loc[nWhichBinI, self.rated_wind_speed]) / (
			
 
				+                    base_wind_and_power_df.loc[nWhichBinI + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
			
 
				+                nWhichBinI, self.rated_wind_speed]) * \
			
 
				+                         (base_wind_and_power_df.loc[nWhichBinI + 1, self.rated_capacity] -
			
 
				+                          base_wind_and_power_df.loc[nWhichBinI, self.rated_capacity]) + \
			
 
				+                         base_wind_and_power_df.loc[nWhichBinI, self.rated_capacity]
			
 
				+
			
 
				+            EPLostPerformTotal = EPLostPerformTotal + (IdealPower - PVDot[i, 1]) / 6
			
 
				+
			
 
				+        print("EPLostPerformTotal", EPLostPerformTotal)
			
 
				+
			
 
				+        # 限电损失
			
 
				+        EPLostLimitTotal = 0
			
 
				+        EPLost = 0
			
 
				+        nLimitTotal = 0
			
 
				+
			
 
				+        PVLimit = np.zeros([nCounter1, 2])
			
 
				+
			
 
				+        for i in range(nCounter1):
			
 
				+            if Dzwind_and_power_dfSel[i] == 4:
			
 
				+                nWhichBin = 0
			
 
				+                for m in range(base_wind_and_power_count - 1):
			
 
				+                    if DzMarch809[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] and DzMarch809[i, 0] <= \
			
 
				+                            base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
			
 
				+                        nWhichBin = m
			
 
				+                        break
			
 
				+
			
 
				+                # 插值计算对应设计功率
			
 
				+                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
			
 
				+                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] -
			
 
				+                        base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) * (
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
			
 
				+                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
			
 
				+                EPLost = np.abs(IdealPower - DzMarch809[i, 1]) / 6
			
 
				+                EPLostLimitTotal = EPLostLimitTotal + EPLost
			
 
				+
			
 
				+                PVLimit[nLimitTotal, :] = DzMarch809[i, :]
			
 
				+                nLimitTotal = nLimitTotal + 1
			
 
				+
			
 
				+        nLimitTotal = nLimitTotal - 1
			
 
				+
			
 
				+        print("nLimitTotal", nLimitTotal)
			
 
				+
			
 
				+        # 欠发和限点损失总和
			
 
				+        EPLostBadLimitTotal = EPLostBadTotal + EPLostLimitTotal
			
 
				+
			
 
				+        # 如果功率曲线未达标损失为正
			
 
				+        if EPLostPerformTotal >= 0:
			
 
				+            EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal + EPLostPerformTotal
			
 
				+
			
 
				+        # 如果功率曲线未达标损失为负
			
 
				+        if EPLostPerformTotal < 0:
			
 
				+            EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal
			
 
				+
			
 
				+        print("EPIdealTotal", EPIdealTotal)
			
 
				+        # 可以比较求和得到的应发功率EPIdealTotal与理论计算得到的应发功率EPIdealTotalAAA的差别
			
 
				+        # 需要去除的超发功率：（1）功率主带左侧的超发点；（2）额定风速以上的超发点。
			
 
				+        RemoveOverEP = 0
			
 
				+        nType2 = 0
			
 
				+        for i in range(nCounter1):
			
 
				+            if Dzwind_and_power_dfSel[i] == 2:  # 功率主带左侧的超发坏点
			
 
				+                nWhichBin = 0
			
 
				+                for m in range(base_wind_and_power_count - 1):
			
 
				+                    if DzMarch809[i, 0] > base_wind_and_power_df.loc[m, self.rated_wind_speed] and DzMarch809[i, 0] <= \
			
 
				+                            base_wind_and_power_df.loc[m + 1, self.rated_wind_speed]:
			
 
				+                        nWhichBin = m
			
 
				+                        break
			
 
				+
			
 
				+                if nWhichBin > base_wind_and_power_count - 1 or nWhichBin == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                IdealPower = (DzMarch809[i, 0] - base_wind_and_power_df.loc[nWhichBin, self.rated_wind_speed]) / (
			
 
				+                        base_wind_and_power_df.loc[nWhichBin + 1, self.rated_wind_speed] - base_wind_and_power_df.loc[
			
 
				+                    nWhichBin, self.rated_wind_speed]) * (
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin + 1, self.rated_capacity] -
			
 
				+                                     base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]) + \
			
 
				+                             base_wind_and_power_df.loc[nWhichBin, self.rated_capacity]
			
 
				+
			
 
				+                RemoveOverEP = RemoveOverEP + (DzMarch809[i, 1] - IdealPower) / 6
			
 
				+                nType2 = nType2 + 1
			
 
				+
			
 
				+        print("RemoveOverEP", RemoveOverEP)
			
 
				+        print("nType2", nType2)
			
 
				+        # 额定功率以上的超发点
			
 
				+        nTypeOver = 0
			
 
				+        for i in range(nCounter1):
			
 
				+            if DzMarch809[i, 1] > PRated:
			
 
				+                RemoveOverEP = RemoveOverEP + (DzMarch809[i, 1] - PRated) / 6
			
 
				+                nTypeOver = nTypeOver + 1
			
 
				+
			
 
				+        print("RemoveOverEP", RemoveOverEP)
			
 
				+        print("nTypeOver", nTypeOver)
			
 
				+
			
 
				+    def run(self):
			
 
				+        # Implement your class identification logic here
			
 
				+        self.identifier()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    test = ClassIdentifier('test', r"D:\中能智能\matlib计算相关\好点坏点matlib计算\A01.csv", index='时间',
			
 
				+                           wind_velocity='风速',
			
 
				+                           active_power='功率')
			
 
				+
			
 
				+    test.run()
			
--- a/tmp_file/ClassIdentifier_2.py
+++ b/tmp_file/ClassIdentifier_2.py
@@ -0,0 +1,371 @@
 
				+import os
			
 
				+
			
 
				+import numpy as np
			
 
				+from pandas import DataFrame
			
 
				+
			
 
				+from service.plt_service import get_base_wind_and_power
			
 
				+from utils.draw.draw_file import scatter
			
 
				+from utils.file.trans_methods import read_file_to_df
			
 
				+
			
 
				+
			
 
				+class ClassIdentifier(object):
			
 
				+
			
 
				+    def __init__(self, wind_turbine_number, file_path: str = None, origin_df: DataFrame = None, index='time_stamp',
			
 
				+                 wind_velocity='wind_velocity',
			
 
				+                 active_power='active_power'):
			
 
				+        """
			
 
				+        :param wind_turbine_number: The wind turbine number.
			
 
				+        :param file_path: The file path of the input data.
			
 
				+        :param origin_df: The pandas DataFrame containing the input data.
			
 
				+        :param index: 索引字段
			
 
				+        :param wind_velocity: 风速字段
			
 
				+        :param active_power: 有功功率字段
			
 
				+        """
			
 
				+        self.wind_turbine_number = wind_turbine_number
			
 
				+        self.index = index
			
 
				+        self.wind_velocity = wind_velocity
			
 
				+        self.active_power = active_power
			
 
				+
			
 
				+        self.rated_wind_speed = 'rated_wind_speed'
			
 
				+        self.rated_capacity = 'rated_capacity'
			
 
				+
			
 
				+        if file_path is None and origin_df is None:
			
 
				+            raise ValueError("Either file_path or origin_df should be provided.")
			
 
				+
			
 
				+        if file_path:
			
 
				+            self.df = read_file_to_df(file_path)
			
 
				+        else:
			
 
				+            self.df = origin_df
			
 
				+
			
 
				+        self.df = self.df.set_index(keys=self.index)
			
 
				+
			
 
				+    def identifier(self):
			
 
				+        # 风速 和 有功功率 df
			
 
				+        wind_and_power_df = self.df[[self.wind_velocity, self.active_power]]
			
 
				+        wind_and_power_df.reset_index(inplace=True)
			
 
				+        wind_and_power_df_count = wind_and_power_df.shape[0]
			
 
				+        PowerMax = wind_and_power_df[self.active_power].max()
			
 
				+        PowerRated = np.ceil(PowerMax / 100) * 100
			
 
				+        PRated = 1500  # 额定功率1500kw,可改为2000kw
			
 
				+        VCutOut = 25
			
 
				+        # 网格法确定风速风向分区数量，功率方向分区数量，
			
 
				+        # PNum = (PRated+100)/25  #功率分区间隔25kW
			
 
				+        PNum = int(np.ceil(PowerRated / 25))  # 功率分区间隔25kW
			
 
				+        VNum = int(np.ceil(VCutOut / 0.25))  # 风速分区间隔0.25m/s
			
 
				+
			
 
				+        # 存储功率大于零的运行数据
			
 
				+        DzMarch809 = np.zeros([wind_and_power_df_count, 2], dtype=float)
			
 
				+        nCounter1 = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.active_power] > 0:
			
 
				+                DzMarch809[nCounter1, 0] = wind_and_power_df.loc[i, self.wind_velocity]
			
 
				+                DzMarch809[nCounter1, 1] = wind_and_power_df.loc[i, self.active_power]
			
 
				+
			
 
				+                nCounter1 = nCounter1 + 1
			
 
				+
			
 
				+        # 统计各网格落入的散点个数
			
 
				+        if VNum == 1:
			
 
				+            XBoxNumber = np.ones([PNum], dtype=int)
			
 
				+        else:
			
 
				+            XBoxNumber = np.ones([PNum, VNum], dtype=int)
			
 
				+        nWhichP = -1
			
 
				+        nWhichV = -1
			
 
				+        for i in range(nCounter1):
			
 
				+            for m in range(PNum):
			
 
				+                if m * 25 < DzMarch809[i, 1] <= (m + 1) * 25:
			
 
				+                    nWhichP = m
			
 
				+                    break
			
 
				+            for n in range(VNum):
			
 
				+                if ((n + 1) * 0.25 - 0.125) < DzMarch809[i, 0] <= ((n + 1) * 0.25 + 0.125):
			
 
				+                    nWhichV = n
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichP > -1 and nWhichV > -1:
			
 
				+                XBoxNumber[nWhichP, nWhichV] = XBoxNumber[nWhichP, nWhichV] + 1
			
 
				+
			
 
				+        for m in range(PNum):
			
 
				+            for n in range(VNum):
			
 
				+                XBoxNumber[m, n] = XBoxNumber[m, n] - 1
			
 
				+
			
 
				+        # 在功率方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        PBoxPercent = np.zeros([PNum, VNum], dtype=float)
			
 
				+        PBinSum = np.zeros(PNum, dtype=int)
			
 
				+
			
 
				+        for i in range(PNum):
			
 
				+            for m in range(VNum):
			
 
				+                PBinSum[i] = PBinSum[i] + XBoxNumber[i, m]
			
 
				+
			
 
				+            for m in range(VNum):
			
 
				+                if PBinSum[i] > 0:
			
 
				+                    PBoxPercent[i, m] = XBoxNumber[i, m] / PBinSum[i] * 100
			
 
				+
			
 
				+        # 在风速方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        VBoxPercent = np.zeros([PNum, VNum], dtype=float)
			
 
				+        VBinSum = np.zeros(VNum, dtype=int)
			
 
				+
			
 
				+        for i in range(VNum):
			
 
				+            for m in range(PNum):
			
 
				+                VBinSum[i] = VBinSum[i] + XBoxNumber[m, i]
			
 
				+
			
 
				+            for m in range(PNum):
			
 
				+                if VBinSum[i] > 0:
			
 
				+                    VBoxPercent[m, i] = XBoxNumber[m, i] / VBinSum[i] * 100
			
 
				+
			
 
				+        # 以水平功率带方向为准，分析每个水平功率带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        PBoxMaxIndex = np.zeros(PNum, dtype=int)  # 水平功率带最大网格位置索引
			
 
				+        PBoxMaxP = np.zeros(PNum, dtype=int)  # 水平功率带最大网格百分比
			
 
				+
			
 
				+        for m in range(PNum):
			
 
				+            # 确定每一水平功率带的最大网格位置索引即百分比值
			
 
				+            PBoxMaxP[m], PBoxMaxIndex[m] = PBoxPercent[m, :].max(), PBoxPercent[m, :].argmax()
			
 
				+
			
 
				+        # 以垂直风速方向为准，分析每个垂直风速带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        VBoxMaxIndex = np.zeros(VNum, dtype=int)
			
 
				+        VBoxMaxV = np.zeros(VNum, dtype=int)
			
 
				+
			
 
				+        for m in range(VNum):
			
 
				+            [VBoxMaxV[m], VBoxMaxIndex[m]] = VBoxPercent[:, m].max(), VBoxPercent[:, m].argmax()
			
 
				+
			
 
				+        # 切入风速特殊处理，如果切入风速过于偏右，向左拉回
			
 
				+        if PBoxMaxIndex[0] > 14:
			
 
				+            PBoxMaxIndex[0] = 9
			
 
				+
			
 
				+        # 以水平功率带方向为基准，进行分析
			
 
				+        DotDense = np.zeros(PNum, dtype=int)  # 每一水平功率带的功率主带包含的网格数
			
 
				+        DotDenseLeftRight = np.zeros([PNum, 2], dtype=int)  # 存储每一水平功率带的功率主带以最大网格为中心，向向左，向右扩展的网格数
			
 
				+        DotValve = 90  # 从中心向左右对称扩展网格的散点百分比和的阈值。
			
 
				+
			
 
				+        for i in range(PNum - 6):  # 从最下层水平功率带1开始，向上到第PNum-6个水平功率带（额定功率一下水平功率带），逐一分析
			
 
				+            PDotDenseSum = PBoxMaxP[i]  # 以中心最大水平功率带为基准，向左向右对称扩展网格，累加各网格散点百分比
			
 
				+            iSpreadRight = 1
			
 
				+            iSpreadLeft = 1
			
 
				+            while PDotDenseSum < DotValve:
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] + iSpreadRight) < VNum - 1:
			
 
				+                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]  # 向右侧扩展
			
 
				+                    iSpreadRight = iSpreadRight + 1
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] + iSpreadRight) > VNum - 1:
			
 
				+                    break
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] - iSpreadLeft) > 0:
			
 
				+                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]  # 向左侧扩展
			
 
				+                    iSpreadLeft = iSpreadLeft + 1
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] - iSpreadLeft) <= 0:
			
 
				+                    break
			
 
				+
			
 
				+            iSpreadRight = iSpreadRight - 1
			
 
				+            iSpreadLeft = iSpreadLeft - 1
			
 
				+            # 向左右对称扩展完毕
			
 
				+
			
 
				+            DotDenseLeftRight[i, 0] = iSpreadLeft
			
 
				+            DotDenseLeftRight[i, 1] = iSpreadRight
			
 
				+            DotDense[i] = iSpreadLeft + iSpreadRight + 1
			
 
				+
			
 
				+        # 各行功率主带右侧宽度的中位数最具有代表性
			
 
				+        DotDenseWidthLeft = np.zeros([PNum - 6, 1], dtype=int)
			
 
				+        for i in range(PNum - 6):
			
 
				+            DotDenseWidthLeft[i] = DotDenseLeftRight[i, 1]
			
 
				+
			
 
				+        MainBandRight = np.median(DotDenseWidthLeft)
			
 
				+
			
 
				+        # 散点向右显著延展分布的水平功率带为限功率水平带
			
 
				+        PowerLimit = np.zeros([PNum, 1], dtype=int)  # 各水平功率带是否为限功率标识，==1：是；==0：不是
			
 
				+        WidthAverage = 0  # 功率主带平均宽度
			
 
				+        WidthVar = 0  # 功率主带方差
			
 
				+        # PowerLimitValve = 6    #限功率主带判别阈值
			
 
				+        PowerLimitValve = np.ceil(MainBandRight) + 3  # 限功率主带判别阈值
			
 
				+
			
 
				+        nCounterLimit = 0
			
 
				+        nCounter = 0
			
 
				+
			
 
				+        for i in range(PNum - 6):
			
 
				+            if DotDenseLeftRight[i, 1] > PowerLimitValve and PBinSum[i] > 20:  # 如果向右扩展网格数大于阈值，且该水平功率带点总数>20，是
			
 
				+                PowerLimit[i] = 1
			
 
				+                nCounterLimit = nCounterLimit + 1
			
 
				+
			
 
				+            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
			
 
				+                WidthAverage = WidthAverage + DotDenseLeftRight[i, 1]  # 统计正常水平功率带右侧宽度
			
 
				+                nCounter = nCounter + 1
			
 
				+
			
 
				+        WidthAverage = WidthAverage / nCounter  # 功率主带平均宽度
			
 
				+
			
 
				+        # 各水平功率带的功率主带宽度的方差，反映从下到上宽度是否一致，或是否下宽上窄等异常情况
			
 
				+        for i in range(PNum - 6):
			
 
				+            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
			
 
				+                WidthVar = WidthVar + (DotDenseLeftRight[i, 1] - WidthAverage) * (
			
 
				+                        DotDenseLeftRight[i, 1] - WidthAverage)
			
 
				+
			
 
				+        # 对限负荷水平功率带的最大网格较下面相邻层显著偏右，拉回
			
 
				+        for i in range(1, PNum - 6):
			
 
				+            if PowerLimit[i] == 1 and abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5:
			
 
				+                PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1
			
 
				+
			
 
				+        # 输出各层功率主带的左右边界网格索引
			
 
				+        DotDenseInverse = np.zeros([PNum, 2], dtype=int)
			
 
				+
			
 
				+        for i in range(PNum):
			
 
				+            DotDenseInverse[i, :] = DotDenseLeftRight[PNum - i - 1, :]
			
 
				+
			
 
				+        # 功率主带的右边界
			
 
				+        CurveWidthR = int(np.ceil(WidthAverage) + 2)
			
 
				+
			
 
				+        # CurveWidthL = 6    #功率主带的左边界
			
 
				+        CurveWidthL = CurveWidthR
			
 
				+
			
 
				+        BBoxLimit = np.zeros([PNum, VNum], dtype=int)  # 网格是否为限功率网格的标识，如果为限功率水平功率带，从功率主带右侧边缘向右的网格为限功率网格
			
 
				+        for i in range(2, PNum - 6):
			
 
				+            if PowerLimit[i] == 1:
			
 
				+                for j in range(PBoxMaxIndex[i] + CurveWidthR, VNum):
			
 
				+                    BBoxLimit[i, j] = 1
			
 
				+
			
 
				+        BBoxRemove = np.zeros([PNum, VNum], dtype=int)  # 数据异常需要剔除的网格标识，标识==1：功率主带右侧的欠发网格；==2：功率主带左侧的超发网格
			
 
				+        for m in range(PNum - 6):
			
 
				+            for n in range(PBoxMaxIndex[m] + CurveWidthR, VNum):
			
 
				+                BBoxRemove[m, n] = 1
			
 
				+
			
 
				+            for n in range(PBoxMaxIndex[m] - CurveWidthL, -1, -1):
			
 
				+                BBoxRemove[m, n] = 2
			
 
				+
			
 
				+        # 确定功率主带的左上拐点，即额定风速位置的网格索引
			
 
				+        CurveTop = np.zeros(2, dtype=int)
			
 
				+        CurveTopValve = 3  # 网格的百分比阈值
			
 
				+        BTopFind = 0
			
 
				+        for m in range(PNum - 4 - 1, -1, -1):
			
 
				+            for n in range(VNum):
			
 
				+                if VBoxPercent[m, n] > CurveTopValve and XBoxNumber[m, n] >= 10:  # 如左上角网格的百分比和散点个数大于阈值。
			
 
				+                    CurveTop[0] = m
			
 
				+                    CurveTop[1] = n
			
 
				+                    BTopFind = 1
			
 
				+                    break
			
 
				+
			
 
				+            if BTopFind == 1:
			
 
				+                break
			
 
				+
			
 
				+        IsolateValve = 3
			
 
				+        for m in range(PNum - 6):
			
 
				+            for n in range(PBoxMaxIndex[m] + CurveWidthR, VNum):
			
 
				+                if PBoxPercent[m, n] < IsolateValve:
			
 
				+                    BBoxRemove[m, n] = 1
			
 
				+
			
 
				+        # 功率主带顶部宽度
			
 
				+        CurveWidthT = 2
			
 
				+        for m in range(PNum - CurveWidthT - 1, PNum):
			
 
				+            for n in range(VNum):
			
 
				+                BBoxRemove[m, n] = 3  # 网格为额定功率以上的超发点
			
 
				+
			
 
				+        # 功率主带拐点左侧的欠发网格标识
			
 
				+        for m in range(PNum - 5 - 1, PNum):
			
 
				+            for n in range(CurveTop[1] - 1):
			
 
				+                BBoxRemove[m, n] = 2
			
 
				+
			
 
				+        # 以网格的标识，决定该网格内数据的标识。Dzwind_and_power_dfSel功率非零数据的标识位。散点在哪个网格，此网格的标识即为该点的标识
			
 
				+        Dzwind_and_power_dfSel = np.zeros(nCounter1, dtype=int)  # -1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电
			
 
				+        nWhichP = -1
			
 
				+        nWhichV = -1
			
 
				+        nBadA = 0
			
 
				+
			
 
				+        for i in range(nCounter1):
			
 
				+            for m in range(PNum):
			
 
				+                if m * 25 < DzMarch809[i, 1] <= (m + 1) * 25:
			
 
				+                    nWhichP = m
			
 
				+                    break
			
 
				+
			
 
				+            for n in range(VNum):
			
 
				+                if ((n + 1) * 0.25 - 0.125) < DzMarch809[i, 0] <= ((n + 1) * 0.25 + 0.125):
			
 
				+                    nWhichV = n
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichP > -1 and nWhichV > -1:
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 1:
			
 
				+                    Dzwind_and_power_dfSel[i] = 1
			
 
				+                    nBadA = nBadA + 1
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 2:
			
 
				+                    Dzwind_and_power_dfSel[i] = 2
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 3:
			
 
				+                    Dzwind_and_power_dfSel[i] = 0  # 3  # 额定风速以上的超发功率点认为是正常点，不再标识。
			
 
				+
			
 
				+        # 限负荷数据标识方法2：把数据切割为若干个窗口。对每一窗口，以第一个点为基准，连续nWindowLength个数据的功率在方差范围内，呈现显著水平分布的点
			
 
				+        nWindowLength = 3
			
 
				+        LimitWindow = np.zeros(nWindowLength, dtype=float)
			
 
				+        PowerStd = 15  # 功率波动方差
			
 
				+        nWindowNum = int(np.floor(nCounter1 / nWindowLength))
			
 
				+        PowerLimitUp = PRated - 300
			
 
				+        PowerLimitLow = 200
			
 
				+        for i in range(nWindowNum):
			
 
				+            for j in range(nWindowLength):
			
 
				+                LimitWindow[j] = DzMarch809[i * nWindowLength + j, 1]
			
 
				+
			
 
				+            bAllInAreas = 1
			
 
				+            for j in range(nWindowLength):
			
 
				+                if LimitWindow[j] < PowerLimitLow or LimitWindow[j] > PowerLimitUp:
			
 
				+                    bAllInAreas = 0
			
 
				+
			
 
				+            if bAllInAreas == 0:
			
 
				+                continue
			
 
				+
			
 
				+            UpLimit = LimitWindow[0] + PowerStd
			
 
				+            LowLimit = LimitWindow[0] - PowerStd
			
 
				+            bAllInUpLow = 1
			
 
				+            for j in range(1, nWindowLength):
			
 
				+                if LimitWindow[j] < LowLimit or LimitWindow[j] > UpLimit:
			
 
				+                    bAllInUpLow = 0
			
 
				+
			
 
				+            if bAllInUpLow == 1:
			
 
				+                for j in range(nWindowLength):
			
 
				+                    Dzwind_and_power_dfSel[i * nWindowLength + j] = 4  # 标识窗口内的数据为限负荷数据
			
 
				+
			
 
				+        nSmooth = 0
			
 
				+        for i in range(PNum - 6):
			
 
				+            PVLeftDown = np.zeros(2, dtype=float)
			
 
				+            PVRightUp = np.zeros(2, dtype=float)
			
 
				+
			
 
				+            if (PBoxMaxIndex[i + 1] - PBoxMaxIndex[i]) >= 1:
			
 
				+                PVLeftDown[0] = (PBoxMaxIndex[i] + 1 + CurveWidthR) * 0.25 - 0.125
			
 
				+                PVLeftDown[1] = i * 25
			
 
				+
			
 
				+                PVRightUp[0] = (PBoxMaxIndex[i + 1] + 1 + CurveWidthR) * 0.25 - 0.125
			
 
				+                PVRightUp[1] = (i + 1) * 25
			
 
				+
			
 
				+                for m in range(nCounter1):
			
 
				+                    if DzMarch809[m, 0] > PVLeftDown[0] and DzMarch809[m, 0] < PVRightUp[0] and PVLeftDown[1] < \
			
 
				+                            DzMarch809[m, 1] < PVRightUp[1]:  # 在该锯齿中
			
 
				+                        if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (
			
 
				+                                PVRightUp[1] - PVLeftDown[1]) / (
			
 
				+                                PVRightUp[0] - PVLeftDown[0]):  # 斜率大于对角连线，则在锯齿左上三角形中，选中
			
 
				+                            Dzwind_and_power_dfSel[m] = 0
			
 
				+                            nSmooth = nSmooth + 1
			
 
				+
			
 
				+        print("nSmooth", nSmooth)
			
 
				+
			
 
				+        wind_and_power_df.loc[:, 'marker'] = -1
			
 
				+        wind_and_power_df.loc[
			
 
				+            wind_and_power_df[wind_and_power_df[self.active_power] > 0].index, 'marker'] = Dzwind_and_power_dfSel
			
 
				+        wind_and_power_df.to_csv("test.csv", index=False, encoding='utf-8')
			
 
				+
			
 
				+        # wind_and_power_df = wind_and_power_df[wind_and_power_df['marker'] == 0]
			
 
				+        color_map = {-1: 'red', 0: 'green', 1: 'blue', 2: 'black', 3: 'orange', 4: 'magenta'}
			
 
				+        c = wind_and_power_df['marker'].map(color_map)
			
 
				+
			
 
				+        # -1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电
			
 
				+        legend_map = {"停机": 'red', "好点": 'green', "欠发": 'blue', "超发": 'black', "额定风速以上的超发": 'orange', "限电": 'magenta'}
			
 
				+
			
 
				+        scatter("测试matlab结果", x_label='风速', y_label='有功功率', x_values=wind_and_power_df[self.wind_velocity].values,
			
 
				+                y_values=wind_and_power_df[self.active_power].values, color=c, col_map=legend_map,
			
 
				+                save_file_path=os.path.dirname(__file__) + os.sep + '测试matlab结果均值.png')
			
 
				+
			
 
				+    def run(self):
			
 
				+        # Implement your class identification logic here
			
 
				+        self.identifier()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    test = ClassIdentifier('test', r"D:\中能智能\matlib计算相关\好点坏点matlib计算\A01.csv", index='时间',
			
 
				+                           wind_velocity='风速',
			
 
				+                           active_power='功率')
			
 
				+
			
 
				+    test.run()
			
--- a/tmp_file/ClassIdentifier_3.py
+++ b/tmp_file/ClassIdentifier_3.py
@@ -0,0 +1,389 @@
 
				+import os
			
 
				+
			
 
				+import numpy as np
			
 
				+from pandas import DataFrame
			
 
				+
			
 
				+from service.plt_service import get_base_wind_and_power
			
 
				+from utils.draw.draw_file import scatter
			
 
				+from utils.file.trans_methods import read_file_to_df
			
 
				+
			
 
				+
			
 
				+class ClassIdentifier(object):
			
 
				+
			
 
				+    def __init__(self, wind_turbine_number, file_path: str = None, origin_df: DataFrame = None, index='time_stamp',
			
 
				+                 wind_velocity='wind_velocity',
			
 
				+                 active_power='active_power'):
			
 
				+        """
			
 
				+        :param wind_turbine_number: The wind turbine number.
			
 
				+        :param file_path: The file path of the input data.
			
 
				+        :param origin_df: The pandas DataFrame containing the input data.
			
 
				+        :param index: 索引字段
			
 
				+        :param wind_velocity: 风速字段
			
 
				+        :param active_power: 有功功率字段
			
 
				+        """
			
 
				+        self.wind_turbine_number = wind_turbine_number
			
 
				+        self.index = index
			
 
				+        self.wind_velocity = wind_velocity
			
 
				+        self.active_power = active_power
			
 
				+
			
 
				+        self.rated_wind_speed = 'rated_wind_speed'
			
 
				+        self.rated_capacity = 'rated_capacity'
			
 
				+
			
 
				+        if file_path is None and origin_df is None:
			
 
				+            raise ValueError("Either file_path or origin_df should be provided.")
			
 
				+
			
 
				+        if file_path:
			
 
				+            self.df = read_file_to_df(file_path)
			
 
				+        else:
			
 
				+            self.df = origin_df
			
 
				+
			
 
				+        self.df = self.df.set_index(keys=self.index)
			
 
				+
			
 
				+    def identifier(self):
			
 
				+        # 风速 和 有功功率 df
			
 
				+        wind_and_power_df = self.df[[self.wind_velocity, self.active_power, "叶片角度"]]
			
 
				+        wind_and_power_df.reset_index(inplace=True)
			
 
				+        wind_and_power_df_count = wind_and_power_df.shape[0]
			
 
				+        PowerMax = wind_and_power_df[self.active_power].max()
			
 
				+        PowerRated = np.ceil(PowerMax / 100) * 100
			
 
				+        PRated = 1500  # 额定功率1500kw,可改为2000kw
			
 
				+        VCutOut = 25
			
 
				+        # 网格法确定风速风向分区数量，功率方向分区数量，
			
 
				+        # PNum = (PRated+100)/25  #功率分区间隔25kW
			
 
				+        PNum = int(np.ceil(PowerRated / 25))  # 功率分区间隔25kW
			
 
				+        VNum = int(np.ceil(VCutOut / 0.25))  # 风速分区间隔0.25m/s
			
 
				+
			
 
				+        # 存储功率大于零的运行数据
			
 
				+        DzMarch809 = np.zeros([wind_and_power_df_count, 2], dtype=float)
			
 
				+        nCounter1 = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.active_power] > 0:
			
 
				+                DzMarch809[nCounter1, 0] = wind_and_power_df.loc[i, self.wind_velocity]
			
 
				+                DzMarch809[nCounter1, 1] = wind_and_power_df.loc[i, self.active_power]
			
 
				+
			
 
				+                nCounter1 = nCounter1 + 1
			
 
				+
			
 
				+        # 统计各网格落入的散点个数
			
 
				+        if VNum == 1:
			
 
				+            XBoxNumber = np.ones([PNum], dtype=int)
			
 
				+        else:
			
 
				+            XBoxNumber = np.ones([PNum, VNum], dtype=int)
			
 
				+        nWhichP = -1
			
 
				+        nWhichV = -1
			
 
				+        for i in range(nCounter1):
			
 
				+            for m in range(PNum):
			
 
				+                if m * 25 < DzMarch809[i, 1] <= (m + 1) * 25:
			
 
				+                    nWhichP = m
			
 
				+                    break
			
 
				+            for n in range(VNum):
			
 
				+                if ((n + 1) * 0.25 - 0.125) < DzMarch809[i, 0] <= ((n + 1) * 0.25 + 0.125):
			
 
				+                    nWhichV = n
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichP > -1 and nWhichV > -1:
			
 
				+                XBoxNumber[nWhichP, nWhichV] = XBoxNumber[nWhichP, nWhichV] + 1
			
 
				+
			
 
				+        for m in range(PNum):
			
 
				+            for n in range(VNum):
			
 
				+                XBoxNumber[m, n] = XBoxNumber[m, n] - 1
			
 
				+
			
 
				+        # 在功率方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        PBoxPercent = np.zeros([PNum, VNum], dtype=float)
			
 
				+        PBinSum = np.zeros(PNum, dtype=int)
			
 
				+
			
 
				+        for i in range(PNum):
			
 
				+            for m in range(VNum):
			
 
				+                PBinSum[i] = PBinSum[i] + XBoxNumber[i, m]
			
 
				+
			
 
				+            for m in range(VNum):
			
 
				+                if PBinSum[i] > 0:
			
 
				+                    PBoxPercent[i, m] = XBoxNumber[i, m] / PBinSum[i] * 100
			
 
				+
			
 
				+        # 在风速方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        VBoxPercent = np.zeros([PNum, VNum], dtype=float)
			
 
				+        VBinSum = np.zeros(VNum, dtype=int)
			
 
				+
			
 
				+        for i in range(VNum):
			
 
				+            for m in range(PNum):
			
 
				+                VBinSum[i] = VBinSum[i] + XBoxNumber[m, i]
			
 
				+
			
 
				+            for m in range(PNum):
			
 
				+                if VBinSum[i] > 0:
			
 
				+                    VBoxPercent[m, i] = XBoxNumber[m, i] / VBinSum[i] * 100
			
 
				+
			
 
				+        # 以水平功率带方向为准，分析每个水平功率带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        PBoxMaxIndex = np.zeros(PNum, dtype=int)  # 水平功率带最大网格位置索引
			
 
				+        PBoxMaxP = np.zeros(PNum, dtype=int)  # 水平功率带最大网格百分比
			
 
				+
			
 
				+        for m in range(PNum):
			
 
				+            # 确定每一水平功率带的最大网格位置索引即百分比值
			
 
				+            PBoxMaxP[m], PBoxMaxIndex[m] = PBoxPercent[m, :].max(), PBoxPercent[m, :].argmax()
			
 
				+
			
 
				+        # 以垂直风速方向为准，分析每个垂直风速带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        VBoxMaxIndex = np.zeros(VNum, dtype=int)
			
 
				+        VBoxMaxV = np.zeros(VNum, dtype=int)
			
 
				+
			
 
				+        for m in range(VNum):
			
 
				+            [VBoxMaxV[m], VBoxMaxIndex[m]] = VBoxPercent[:, m].max(), VBoxPercent[:, m].argmax()
			
 
				+
			
 
				+        # 切入风速特殊处理，如果切入风速过于偏右，向左拉回
			
 
				+        if PBoxMaxIndex[0] > 14:
			
 
				+            PBoxMaxIndex[0] = 9
			
 
				+
			
 
				+        # 以水平功率带方向为基准，进行分析
			
 
				+        DotDense = np.zeros(PNum, dtype=int)  # 每一水平功率带的功率主带包含的网格数
			
 
				+        DotDenseLeftRight = np.zeros([PNum, 2], dtype=int)  # 存储每一水平功率带的功率主带以最大网格为中心，向向左，向右扩展的网格数
			
 
				+        DotValve = 90  # 从中心向左右对称扩展网格的散点百分比和的阈值。
			
 
				+
			
 
				+        for i in range(PNum - 6):  # 从最下层水平功率带1开始，向上到第PNum-6个水平功率带（额定功率一下水平功率带），逐一分析
			
 
				+            PDotDenseSum = PBoxMaxP[i]  # 以中心最大水平功率带为基准，向左向右对称扩展网格，累加各网格散点百分比
			
 
				+            iSpreadRight = 1
			
 
				+            iSpreadLeft = 1
			
 
				+            while PDotDenseSum < DotValve:
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] + iSpreadRight) < VNum - 1:
			
 
				+                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]  # 向右侧扩展
			
 
				+                    iSpreadRight = iSpreadRight + 1
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] + iSpreadRight) > VNum - 1:
			
 
				+                    break
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] - iSpreadLeft) > 0:
			
 
				+                    PDotDenseSum = PDotDenseSum + PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]  # 向左侧扩展
			
 
				+                    iSpreadLeft = iSpreadLeft + 1
			
 
				+
			
 
				+                if (PBoxMaxIndex[i] - iSpreadLeft) <= 0:
			
 
				+                    break
			
 
				+
			
 
				+            iSpreadRight = iSpreadRight - 1
			
 
				+            iSpreadLeft = iSpreadLeft - 1
			
 
				+            # 向左右对称扩展完毕
			
 
				+
			
 
				+            DotDenseLeftRight[i, 0] = iSpreadLeft
			
 
				+            DotDenseLeftRight[i, 1] = iSpreadRight
			
 
				+            DotDense[i] = iSpreadLeft + iSpreadRight + 1
			
 
				+
			
 
				+        # 各行功率主带右侧宽度的中位数最具有代表性
			
 
				+        DotDenseWidthLeft = np.zeros([PNum - 6, 1], dtype=int)
			
 
				+        for i in range(PNum - 6):
			
 
				+            DotDenseWidthLeft[i] = DotDenseLeftRight[i, 1]
			
 
				+
			
 
				+        MainBandRight = np.median(DotDenseWidthLeft)
			
 
				+
			
 
				+        # 散点向右显著延展分布的水平功率带为限功率水平带
			
 
				+        PowerLimit = np.zeros([PNum, 1], dtype=int)  # 各水平功率带是否为限功率标识，==1：是；==0：不是
			
 
				+        WidthAverage = 0  # 功率主带平均宽度
			
 
				+        WidthVar = 0  # 功率主带方差
			
 
				+        # PowerLimitValve = 6    #限功率主带判别阈值
			
 
				+        PowerLimitValve = np.ceil(MainBandRight) + 3  # 限功率主带判别阈值
			
 
				+
			
 
				+        nCounterLimit = 0
			
 
				+        nCounter = 0
			
 
				+
			
 
				+        for i in range(PNum - 6):
			
 
				+            if DotDenseLeftRight[i, 1] > PowerLimitValve and PBinSum[i] > 20:  # 如果向右扩展网格数大于阈值，且该水平功率带点总数>20，是
			
 
				+                PowerLimit[i] = 1
			
 
				+                nCounterLimit = nCounterLimit + 1
			
 
				+
			
 
				+            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
			
 
				+                WidthAverage = WidthAverage + DotDenseLeftRight[i, 1]  # 统计正常水平功率带右侧宽度
			
 
				+                nCounter = nCounter + 1
			
 
				+
			
 
				+        WidthAverage = WidthAverage / nCounter  # 功率主带平均宽度
			
 
				+
			
 
				+        # 各水平功率带的功率主带宽度的方差，反映从下到上宽度是否一致，或是否下宽上窄等异常情况
			
 
				+        for i in range(PNum - 6):
			
 
				+            if DotDenseLeftRight[i, 1] <= PowerLimitValve:
			
 
				+                WidthVar = WidthVar + (DotDenseLeftRight[i, 1] - WidthAverage) * (
			
 
				+                        DotDenseLeftRight[i, 1] - WidthAverage)
			
 
				+
			
 
				+        # 对限负荷水平功率带的最大网格较下面相邻层显著偏右，拉回
			
 
				+        for i in range(1, PNum - 6):
			
 
				+            if PowerLimit[i] == 1 and abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5:
			
 
				+                PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1
			
 
				+
			
 
				+        # 输出各层功率主带的左右边界网格索引
			
 
				+        DotDenseInverse = np.zeros([PNum, 2], dtype=int)
			
 
				+
			
 
				+        for i in range(PNum):
			
 
				+            DotDenseInverse[i, :] = DotDenseLeftRight[PNum - i - 1, :]
			
 
				+
			
 
				+        # 功率主带的右边界
			
 
				+        CurveWidthR = int(np.ceil(WidthAverage) + 2)
			
 
				+
			
 
				+        # CurveWidthL = 6    #功率主带的左边界
			
 
				+        CurveWidthL = CurveWidthR
			
 
				+
			
 
				+        BBoxLimit = np.zeros([PNum, VNum], dtype=int)  # 网格是否为限功率网格的标识，如果为限功率水平功率带，从功率主带右侧边缘向右的网格为限功率网格
			
 
				+        for i in range(2, PNum - 6):
			
 
				+            if PowerLimit[i] == 1:
			
 
				+                for j in range(PBoxMaxIndex[i] + CurveWidthR, VNum):
			
 
				+                    BBoxLimit[i, j] = 1
			
 
				+
			
 
				+        BBoxRemove = np.zeros([PNum, VNum], dtype=int)  # 数据异常需要剔除的网格标识，标识==1：功率主带右侧的欠发网格；==2：功率主带左侧的超发网格
			
 
				+        for m in range(PNum - 6):
			
 
				+            for n in range(PBoxMaxIndex[m] + CurveWidthR, VNum):
			
 
				+                BBoxRemove[m, n] = 1
			
 
				+
			
 
				+            for n in range(PBoxMaxIndex[m] - CurveWidthL, -1, -1):
			
 
				+                BBoxRemove[m, n] = 2
			
 
				+
			
 
				+        # 确定功率主带的左上拐点，即额定风速位置的网格索引
			
 
				+        CurveTop = np.zeros(2, dtype=int)
			
 
				+        CurveTopValve = 3  # 网格的百分比阈值
			
 
				+        BTopFind = 0
			
 
				+        for m in range(PNum - 4 - 1, -1, -1):
			
 
				+            for n in range(VNum):
			
 
				+                if VBoxPercent[m, n] > CurveTopValve and XBoxNumber[m, n] >= 10:  # 如左上角网格的百分比和散点个数大于阈值。
			
 
				+                    CurveTop[0] = m
			
 
				+                    CurveTop[1] = n
			
 
				+                    BTopFind = 1
			
 
				+                    break
			
 
				+
			
 
				+            if BTopFind == 1:
			
 
				+                break
			
 
				+
			
 
				+        IsolateValve = 3
			
 
				+        for m in range(PNum - 6):
			
 
				+            for n in range(PBoxMaxIndex[m] + CurveWidthR, VNum):
			
 
				+                if PBoxPercent[m, n] < IsolateValve:
			
 
				+                    BBoxRemove[m, n] = 1
			
 
				+
			
 
				+        # 功率主带顶部宽度
			
 
				+        CurveWidthT = 2
			
 
				+        for m in range(PNum - CurveWidthT - 1, PNum):
			
 
				+            for n in range(VNum):
			
 
				+                BBoxRemove[m, n] = 3  # 网格为额定功率以上的超发点
			
 
				+
			
 
				+        # 功率主带拐点左侧的欠发网格标识
			
 
				+        for m in range(PNum - 5 - 1, PNum):
			
 
				+            for n in range(CurveTop[1] - 1):
			
 
				+                BBoxRemove[m, n] = 2
			
 
				+
			
 
				+        # 以网格的标识，决定该网格内数据的标识。Dzwind_and_power_dfSel功率非零数据的标识位。散点在哪个网格，此网格的标识即为该点的标识
			
 
				+        Dzwind_and_power_dfSel = np.zeros(nCounter1, dtype=int)  # -1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电
			
 
				+        nWhichP = -1
			
 
				+        nWhichV = -1
			
 
				+        nBadA = 0
			
 
				+
			
 
				+        for i in range(nCounter1):
			
 
				+            for m in range(PNum):
			
 
				+                if m * 25 < DzMarch809[i, 1] <= (m + 1) * 25:
			
 
				+                    nWhichP = m
			
 
				+                    break
			
 
				+
			
 
				+            for n in range(VNum):
			
 
				+                if ((n + 1) * 0.25 - 0.125) < DzMarch809[i, 0] <= ((n + 1) * 0.25 + 0.125):
			
 
				+                    nWhichV = n
			
 
				+                    break
			
 
				+
			
 
				+            if nWhichP > -1 and nWhichV > -1:
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 1:
			
 
				+                    Dzwind_and_power_dfSel[i] = 1
			
 
				+                    nBadA = nBadA + 1
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 2:
			
 
				+                    Dzwind_and_power_dfSel[i] = 2
			
 
				+
			
 
				+                if BBoxRemove[nWhichP, nWhichV] == 3:
			
 
				+                    Dzwind_and_power_dfSel[i] = 0  # 3  # 额定风速以上的超发功率点认为是正常点，不再标识。
			
 
				+
			
 
				+        # 限负荷数据标识方法2：把数据切割为若干个窗口。对每一窗口，以第一个点为基准，连续nWindowLength个数据的功率在方差范围内，呈现显著水平分布的点
			
 
				+        nWindowLength = 3
			
 
				+        LimitWindow = np.zeros(nWindowLength, dtype=float)
			
 
				+        PowerStd = 15  # 功率波动方差
			
 
				+        nWindowNum = int(np.floor(nCounter1 / nWindowLength))
			
 
				+        PowerLimitUp = PRated - 300
			
 
				+        PowerLimitLow = 200
			
 
				+        for i in range(nWindowNum):
			
 
				+            for j in range(nWindowLength):
			
 
				+                LimitWindow[j] = DzMarch809[i * nWindowLength + j, 1]
			
 
				+
			
 
				+            bAllInAreas = 1
			
 
				+            for j in range(nWindowLength):
			
 
				+                if LimitWindow[j] < PowerLimitLow or LimitWindow[j] > PowerLimitUp:
			
 
				+                    bAllInAreas = 0
			
 
				+
			
 
				+            if bAllInAreas == 0:
			
 
				+                continue
			
 
				+
			
 
				+            UpLimit = LimitWindow[0] + PowerStd
			
 
				+            LowLimit = LimitWindow[0] - PowerStd
			
 
				+            bAllInUpLow = 1
			
 
				+            for j in range(1, nWindowLength):
			
 
				+                if LimitWindow[j] < LowLimit or LimitWindow[j] > UpLimit:
			
 
				+                    bAllInUpLow = 0
			
 
				+
			
 
				+            if bAllInUpLow == 1:
			
 
				+                for j in range(nWindowLength):
			
 
				+                    Dzwind_and_power_dfSel[i * nWindowLength + j] = 4  # 标识窗口内的数据为限负荷数据
			
 
				+
			
 
				+        nSmooth = 0
			
 
				+        for i in range(PNum - 6):
			
 
				+            PVLeftDown = np.zeros(2, dtype=float)
			
 
				+            PVRightUp = np.zeros(2, dtype=float)
			
 
				+
			
 
				+            if (PBoxMaxIndex[i + 1] - PBoxMaxIndex[i]) >= 1:
			
 
				+                PVLeftDown[0] = (PBoxMaxIndex[i] + 1 + CurveWidthR) * 0.25 - 0.125
			
 
				+                PVLeftDown[1] = i * 25
			
 
				+
			
 
				+                PVRightUp[0] = (PBoxMaxIndex[i + 1] + 1 + CurveWidthR) * 0.25 - 0.125
			
 
				+                PVRightUp[1] = (i + 1) * 25
			
 
				+
			
 
				+                for m in range(nCounter1):
			
 
				+                    if DzMarch809[m, 0] > PVLeftDown[0] and DzMarch809[m, 0] < PVRightUp[0] and PVLeftDown[1] < \
			
 
				+                            DzMarch809[m, 1] < PVRightUp[1]:  # 在该锯齿中
			
 
				+                        if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (
			
 
				+                                PVRightUp[1] - PVLeftDown[1]) / (
			
 
				+                                PVRightUp[0] - PVLeftDown[0]):  # 斜率大于对角连线，则在锯齿左上三角形中，选中
			
 
				+                            Dzwind_and_power_dfSel[m] = 0
			
 
				+                            nSmooth = nSmooth + 1
			
 
				+
			
 
				+        print("nSmooth", nSmooth)
			
 
				+
			
 
				+        wind_and_power_df.loc[:, 'marker'] = -1
			
 
				+        wind_and_power_df.loc[
			
 
				+            wind_and_power_df[wind_and_power_df[self.active_power] > 0].index, 'marker'] = Dzwind_and_power_dfSel
			
 
				+
			
 
				+        # 把部分欠发的优化为限电
			
 
				+        # 假设self.active_power已经被定义为DataFrame中的一个列名，这里用'有功功率'代替
			
 
				+
			
 
				+        # 构建条件表达式
			
 
				+        cond1 = (wind_and_power_df['marker'] == 1) & (
			
 
				+                (wind_and_power_df[self.active_power] < 1100) & (wind_and_power_df['叶片角度'] > 0.5)
			
 
				+        )
			
 
				+        cond2 = (wind_and_power_df['marker'] == 1) & (
			
 
				+                (wind_and_power_df[self.active_power] < 1250) & (wind_and_power_df['叶片角度'] > 1.5)
			
 
				+        )
			
 
				+        cond3 = (wind_and_power_df['marker'] == 1) & (
			
 
				+                (wind_and_power_df[self.active_power] < 1400) & (wind_and_power_df['叶片角度'] > 2.5)
			
 
				+        )
			
 
				+
			
 
				+        # 使用逻辑或操作符|合并条件
			
 
				+        combined_condition = cond1 | cond2 | cond3
			
 
				+
			
 
				+        # 更新满足条件的行的'marker'列
			
 
				+        wind_and_power_df.loc[combined_condition, 'marker'] = 4
			
 
				+
			
 
				+        color_map = {-1: 'red', 0: 'green', 1: 'blue', 2: 'black', 3: 'orange', 4: 'magenta'}
			
 
				+        c = wind_and_power_df['marker'].map(color_map)
			
 
				+
			
 
				+        # -1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电
			
 
				+        legend_map = {"停机": 'red', "好点": 'green', "欠发": 'blue', "超发": 'black', "额定风速以上的超发": 'orange', "限电": 'magenta'}
			
 
				+
			
 
				+        scatter("测试matlab结果", x_label='风速', y_label='有功功率', x_values=wind_and_power_df[self.wind_velocity].values,
			
 
				+                y_values=wind_and_power_df[self.active_power].values, color=c, col_map=legend_map,
			
 
				+                save_file_path=os.path.dirname(__file__) + os.sep + '平陆测试matlab结果均值.png')
			
 
				+
			
 
				+    def run(self):
			
 
				+        # Implement your class identification logic here
			
 
				+        self.identifier()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    test = ClassIdentifier('test', r"D:\中能智能\matlib计算相关\好点坏点matlib计算\A01.csv", index='时间',
			
 
				+                           wind_velocity='风速',
			
 
				+                           active_power='功率')
			
 
				+
			
 
				+    test.run()
			
--- a/tmp_file/ClassIdentifier_4.py
+++ b/tmp_file/ClassIdentifier_4.py
@@ -0,0 +1,386 @@
 
				+import os
			
 
				+
			
 
				+import numpy as np
			
 
				+from pandas import DataFrame
			
 
				+
			
 
				+from utils.draw.draw_file import scatter
			
 
				+from utils.file.trans_methods import read_file_to_df
			
 
				+
			
 
				+
			
 
				+class ClassIdentifier(object):
			
 
				+
			
 
				+    def __init__(self, wind_turbine_number, file_path: str = None, origin_df: DataFrame = None,
			
 
				+                 wind_velocity='wind_velocity',
			
 
				+                 active_power='active_power',
			
 
				+                 pitch_angle_blade='pitch_angle_blade_1',
			
 
				+                 rated_power=1500):
			
 
				+        """
			
 
				+        :param wind_turbine_number: The wind turbine number.
			
 
				+        :param file_path: The file path of the input data.
			
 
				+        :param origin_df: The pandas DataFrame containing the input data.
			
 
				+        :param wind_velocity: 风速字段
			
 
				+        :param active_power: 有功功率字段
			
 
				+        :param pitch_angle_blade: 桨距角
			
 
				+        :param rated_power: 额定功率
			
 
				+        """
			
 
				+        self.wind_turbine_number = wind_turbine_number
			
 
				+        self.wind_velocity = wind_velocity
			
 
				+        self.active_power = active_power
			
 
				+        self.pitch_angle_blade = pitch_angle_blade
			
 
				+        self.rated_power = rated_power  # 额定功率1500kw,可改为2000kw
			
 
				+
			
 
				+        if file_path is None and origin_df is None:
			
 
				+            raise ValueError("Either file_path or origin_df should be provided.")
			
 
				+
			
 
				+        if file_path:
			
 
				+            self.df = read_file_to_df(file_path)
			
 
				+        else:
			
 
				+            self.df = origin_df
			
 
				+
			
 
				+    def identifier(self):
			
 
				+        # 风速 和 有功功率 df
			
 
				+        wind_and_power_df = self.df[[self.wind_velocity, self.active_power, "pitch_angle_blade_1"]]
			
 
				+        wind_and_power_df.reset_index(inplace=True)
			
 
				+        wind_and_power_df_count = wind_and_power_df.shape[0]
			
 
				+        power_max = wind_and_power_df[self.active_power].max()
			
 
				+        power_rated = np.ceil(power_max / 100) * 100
			
 
				+        v_cut_out = 25
			
 
				+        # 网格法确定风速风向分区数量，功率方向分区数量，
			
 
				+        p_num = int(np.ceil(power_rated / 25))  # 功率分区间隔25kW
			
 
				+        v_num = int(np.ceil(v_cut_out / 0.25))  # 风速分区间隔0.25m/s
			
 
				+
			
 
				+        # 存储功率大于零的运行数据
			
 
				+        dz_march = np.zeros([wind_and_power_df_count, 2], dtype=float)
			
 
				+        n_counter1 = 0
			
 
				+        for i in range(wind_and_power_df_count):
			
 
				+            if wind_and_power_df.loc[i, self.active_power] > 0:
			
 
				+                dz_march[n_counter1, 0] = wind_and_power_df.loc[i, self.wind_velocity]
			
 
				+                dz_march[n_counter1, 1] = wind_and_power_df.loc[i, self.active_power]
			
 
				+
			
 
				+                n_counter1 = n_counter1 + 1
			
 
				+
			
 
				+        # 统计各网格落入的散点个数
			
 
				+        if v_num == 1:
			
 
				+            x_box_number = np.ones([p_num], dtype=int)
			
 
				+        else:
			
 
				+            x_box_number = np.ones([p_num, v_num], dtype=int)
			
 
				+        n_which_p = -1
			
 
				+        n_which_v = -1
			
 
				+        for i in range(n_counter1):
			
 
				+            for m in range(p_num):
			
 
				+                if m * 25 < dz_march[i, 1] <= (m + 1) * 25:
			
 
				+                    n_which_p = m
			
 
				+                    break
			
 
				+            for n in range(v_num):
			
 
				+                if ((n + 1) * 0.25 - 0.125) < dz_march[i, 0] <= ((n + 1) * 0.25 + 0.125):
			
 
				+                    n_which_v = n
			
 
				+                    break
			
 
				+
			
 
				+            if n_which_p > -1 and n_which_v > -1:
			
 
				+                x_box_number[n_which_p, n_which_v] = x_box_number[n_which_p, n_which_v] + 1
			
 
				+
			
 
				+        for m in range(p_num):
			
 
				+            for n in range(v_num):
			
 
				+                x_box_number[m, n] = x_box_number[m, n] - 1
			
 
				+
			
 
				+        # 在功率方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        p_box_percent = np.zeros([p_num, v_num], dtype=float)
			
 
				+        p_bin_sum = np.zeros(p_num, dtype=int)
			
 
				+
			
 
				+        for i in range(p_num):
			
 
				+            for m in range(v_num):
			
 
				+                p_bin_sum[i] = p_bin_sum[i] + x_box_number[i, m]
			
 
				+
			
 
				+            for m in range(v_num):
			
 
				+                if p_bin_sum[i] > 0:
			
 
				+                    p_box_percent[i, m] = x_box_number[i, m] / p_bin_sum[i] * 100
			
 
				+
			
 
				+        # 在风速方向将网格内散点绝对个数转换为相对百分比，备用
			
 
				+        v_box_percent = np.zeros([p_num, v_num], dtype=float)
			
 
				+        v_bin_sum = np.zeros(v_num, dtype=int)
			
 
				+
			
 
				+        for i in range(v_num):
			
 
				+            for m in range(p_num):
			
 
				+                v_bin_sum[i] = v_bin_sum[i] + x_box_number[m, i]
			
 
				+
			
 
				+            for m in range(p_num):
			
 
				+                if v_bin_sum[i] > 0:
			
 
				+                    v_box_percent[m, i] = x_box_number[m, i] / v_bin_sum[i] * 100
			
 
				+
			
 
				+        # 以水平功率带方向为准，分析每个水平功率带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        p_box_max_index = np.zeros(p_num, dtype=int)  # 水平功率带最大网格位置索引
			
 
				+        p_box_max_p = np.zeros(p_num, dtype=int)  # 水平功率带最大网格百分比
			
 
				+
			
 
				+        for m in range(p_num):
			
 
				+            # 确定每一水平功率带的最大网格位置索引即百分比值
			
 
				+            p_box_max_p[m], p_box_max_index[m] = p_box_percent[m, :].max(), p_box_percent[m, :].argmax()
			
 
				+
			
 
				+        # 以垂直风速方向为准，分析每个垂直风速带中，功率主带中心，即找百分比最大的网格位置。
			
 
				+        v_box_max_index = np.zeros(v_num, dtype=int)
			
 
				+        v_box_max_v = np.zeros(v_num, dtype=int)
			
 
				+
			
 
				+        for m in range(v_num):
			
 
				+            [v_box_max_v[m], v_box_max_index[m]] = v_box_percent[:, m].max(), v_box_percent[:, m].argmax()
			
 
				+
			
 
				+        # 切入风速特殊处理，如果切入风速过于偏右，向左拉回
			
 
				+        if p_box_max_index[0] > 14:
			
 
				+            p_box_max_index[0] = 9
			
 
				+
			
 
				+        # 以水平功率带方向为基准，进行分析
			
 
				+        dot_dense = np.zeros(p_num, dtype=int)  # 每一水平功率带的功率主带包含的网格数
			
 
				+        dot_dense_left_right = np.zeros([p_num, 2], dtype=int)  # 存储每一水平功率带的功率主带以最大网格为中心，向向左，向右扩展的网格数
			
 
				+        dot_valve = 90  # 从中心向左右对称扩展网格的散点百分比和的阈值。
			
 
				+
			
 
				+        for i in range(p_num - 6):  # 从最下层水平功率带1开始，向上到第PNum-6个水平功率带（额定功率一下水平功率带），逐一分析
			
 
				+            p_dot_dense_sum = p_box_max_p[i]  # 以中心最大水平功率带为基准，向左向右对称扩展网格，累加各网格散点百分比
			
 
				+            i_spread_right = 1
			
 
				+            i_spread_left = 1
			
 
				+            while p_dot_dense_sum < dot_valve:
			
 
				+
			
 
				+                if (p_box_max_index[i] + i_spread_right) < v_num - 1:
			
 
				+                    p_dot_dense_sum = p_dot_dense_sum + p_box_percent[i, p_box_max_index[i] + i_spread_right]  # 向右侧扩展
			
 
				+                    i_spread_right = i_spread_right + 1
			
 
				+
			
 
				+                if (p_box_max_index[i] + i_spread_right) > v_num - 1:
			
 
				+                    break
			
 
				+
			
 
				+                if (p_box_max_index[i] - i_spread_left) > 0:
			
 
				+                    p_dot_dense_sum = p_dot_dense_sum + p_box_percent[i, p_box_max_index[i] - i_spread_left]  # 向左侧扩展
			
 
				+                    i_spread_left = i_spread_left + 1
			
 
				+
			
 
				+                if (p_box_max_index[i] - i_spread_left) <= 0:
			
 
				+                    break
			
 
				+
			
 
				+            i_spread_right = i_spread_right - 1
			
 
				+            i_spread_left = i_spread_left - 1
			
 
				+            # 向左右对称扩展完毕
			
 
				+
			
 
				+            dot_dense_left_right[i, 0] = i_spread_left
			
 
				+            dot_dense_left_right[i, 1] = i_spread_right
			
 
				+            dot_dense[i] = i_spread_left + i_spread_right + 1
			
 
				+
			
 
				+        # 各行功率主带右侧宽度的中位数最具有代表性
			
 
				+        dot_dense_width_left = np.zeros([p_num - 6, 1], dtype=int)
			
 
				+        for i in range(p_num - 6):
			
 
				+            dot_dense_width_left[i] = dot_dense_left_right[i, 1]
			
 
				+
			
 
				+        main_band_right = np.median(dot_dense_width_left)
			
 
				+
			
 
				+        # 散点向右显著延展分布的水平功率带为限功率水平带
			
 
				+        power_limit = np.zeros([p_num, 1], dtype=int)  # 各水平功率带是否为限功率标识，==1：是；==0：不是
			
 
				+        width_average = 0  # 功率主带平均宽度
			
 
				+        width_var = 0  # 功率主带方差
			
 
				+        # power_limit_valve = 6    #限功率主带判别阈值
			
 
				+        power_limit_valve = np.ceil(main_band_right) + 3  # 限功率主带判别阈值
			
 
				+
			
 
				+        n_counter_limit = 0
			
 
				+        n_counter = 0
			
 
				+
			
 
				+        for i in range(p_num - 6):
			
 
				+            if dot_dense_left_right[i, 1] > power_limit_valve and p_bin_sum[i] > 20:  # 如果向右扩展网格数大于阈值，且该水平功率带点总数>20，是
			
 
				+                power_limit[i] = 1
			
 
				+                n_counter_limit = n_counter_limit + 1
			
 
				+
			
 
				+            if dot_dense_left_right[i, 1] <= power_limit_valve:
			
 
				+                width_average = width_average + dot_dense_left_right[i, 1]  # 统计正常水平功率带右侧宽度
			
 
				+                n_counter = n_counter + 1
			
 
				+
			
 
				+        width_average = width_average / n_counter  # 功率主带平均宽度
			
 
				+
			
 
				+        # 各水平功率带的功率主带宽度的方差，反映从下到上宽度是否一致，或是否下宽上窄等异常情况
			
 
				+        for i in range(p_num - 6):
			
 
				+            if dot_dense_left_right[i, 1] <= power_limit_valve:
			
 
				+                width_var = width_var + (dot_dense_left_right[i, 1] - width_average) * (
			
 
				+                        dot_dense_left_right[i, 1] - width_average)
			
 
				+
			
 
				+        # 对限负荷水平功率带的最大网格较下面相邻层显著偏右，拉回
			
 
				+        for i in range(1, p_num - 6):
			
 
				+            if power_limit[i] == 1 and abs(p_box_max_index[i] - p_box_max_index[i - 1]) > 5:
			
 
				+                p_box_max_index[i] = p_box_max_index[i - 1] + 1
			
 
				+
			
 
				+        # 输出各层功率主带的左右边界网格索引
			
 
				+        dot_dense_inverse = np.zeros([p_num, 2], dtype=int)
			
 
				+
			
 
				+        for i in range(p_num):
			
 
				+            dot_dense_inverse[i, :] = dot_dense_left_right[p_num - i - 1, :]
			
 
				+
			
 
				+        # 功率主带的右边界
			
 
				+        curve_width_r = int(np.ceil(width_average) + 2)
			
 
				+
			
 
				+        # curve_width_l = 6    #功率主带的左边界
			
 
				+        curve_width_l = curve_width_r
			
 
				+
			
 
				+        b_box_limit = np.zeros([p_num, v_num], dtype=int)  # 网格是否为限功率网格的标识，如果为限功率水平功率带，从功率主带右侧边缘向右的网格为限功率网格
			
 
				+        for i in range(2, p_num - 6):
			
 
				+            if power_limit[i] == 1:
			
 
				+                for j in range(p_box_max_index[i] + curve_width_r, v_num):
			
 
				+                    b_box_limit[i, j] = 1
			
 
				+
			
 
				+        b_box_remove = np.zeros([p_num, v_num], dtype=int)  # 数据异常需要剔除的网格标识，标识==1：功率主带右侧的欠发网格；==2：功率主带左侧的超发网格
			
 
				+        for m in range(p_num - 6):
			
 
				+            for n in range(p_box_max_index[m] + curve_width_r, v_num):
			
 
				+                b_box_remove[m, n] = 1
			
 
				+
			
 
				+            for n in range(p_box_max_index[m] - curve_width_l, -1, -1):
			
 
				+                b_box_remove[m, n] = 2
			
 
				+
			
 
				+        # 确定功率主带的左上拐点，即额定风速位置的网格索引
			
 
				+        curve_top = np.zeros(2, dtype=int)
			
 
				+        curve_top_valve = 3  # 网格的百分比阈值
			
 
				+        b_top_find = 0
			
 
				+        for m in range(p_num - 4 - 1, -1, -1):
			
 
				+            for n in range(v_num):
			
 
				+                if v_box_percent[m, n] > curve_top_valve and x_box_number[m, n] >= 10:  # 如左上角网格的百分比和散点个数大于阈值。
			
 
				+                    curve_top[0] = m
			
 
				+                    curve_top[1] = n
			
 
				+                    b_top_find = 1
			
 
				+                    break
			
 
				+
			
 
				+            if b_top_find == 1:
			
 
				+                break
			
 
				+
			
 
				+        isolate_valve = 3
			
 
				+        for m in range(p_num - 6):
			
 
				+            for n in range(p_box_max_index[m] + curve_width_r, v_num):
			
 
				+                if p_box_percent[m, n] < isolate_valve:
			
 
				+                    b_box_remove[m, n] = 1
			
 
				+
			
 
				+        # 功率主带顶部宽度
			
 
				+        curve_width_t = 2
			
 
				+        for m in range(p_num - curve_width_t - 1, p_num):
			
 
				+            for n in range(v_num):
			
 
				+                b_box_remove[m, n] = 3  # 网格为额定功率以上的超发点
			
 
				+
			
 
				+        # 功率主带拐点左侧的欠发网格标识
			
 
				+        for m in range(p_num - 5 - 1, p_num):
			
 
				+            for n in range(curve_top[1] - 1):
			
 
				+                b_box_remove[m, n] = 2
			
 
				+
			
 
				+        # 以网格的标识，决定该网格内数据的标识。dzwind_and_power_sel。散点在哪个网格，此网格的标识即为该点的标识
			
 
				+        dzwind_and_power_sel = np.zeros(n_counter1, dtype=int)  # -1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电
			
 
				+        n_which_p = -1
			
 
				+        n_which_v = -1
			
 
				+        n_bad_a = 0
			
 
				+
			
 
				+        for i in range(n_counter1):
			
 
				+            for m in range(p_num):
			
 
				+                if m * 25 < dz_march[i, 1] <= (m + 1) * 25:
			
 
				+                    n_which_p = m
			
 
				+                    break
			
 
				+
			
 
				+            for n in range(v_num):
			
 
				+                if ((n + 1) * 0.25 - 0.125) < dz_march[i, 0] <= ((n + 1) * 0.25 + 0.125):
			
 
				+                    n_which_v = n
			
 
				+                    break
			
 
				+
			
 
				+            if n_which_p > -1 and n_which_v > -1:
			
 
				+
			
 
				+                if b_box_remove[n_which_p, n_which_v] == 1:
			
 
				+                    dzwind_and_power_sel[i] = 1
			
 
				+                    n_bad_a = n_bad_a + 1
			
 
				+
			
 
				+                if b_box_remove[n_which_p, n_which_v] == 2:
			
 
				+                    dzwind_and_power_sel[i] = 2
			
 
				+
			
 
				+                if b_box_remove[n_which_p, n_which_v] == 3:
			
 
				+                    dzwind_and_power_sel[i] = 0  # 3  # 额定风速以上的超发功率点认为是正常点，不再标识。
			
 
				+
			
 
				+        # 限负荷数据标识方法2：把数据切割为若干个窗口。对每一窗口，以第一个点为基准，连续nWindowLength个数据的功率在方差范围内，呈现显著水平分布的点
			
 
				+        n_window_length = 3
			
 
				+        limit_window = np.zeros(n_window_length, dtype=float)
			
 
				+        power_std = 15  # 功率波动方差
			
 
				+        n_window_num = int(np.floor(n_counter1 / n_window_length))
			
 
				+        power_limit_up = self.rated_power - 300
			
 
				+        power_limit_low = 200
			
 
				+        for i in range(n_window_num):
			
 
				+            for j in range(n_window_length):
			
 
				+                limit_window[j] = dz_march[i * n_window_length + j, 1]
			
 
				+
			
 
				+            b_all_in_areas = 1
			
 
				+            for j in range(n_window_length):
			
 
				+                if limit_window[j] < power_limit_low or limit_window[j] > power_limit_up:
			
 
				+                    b_all_in_areas = 0
			
 
				+
			
 
				+            if b_all_in_areas == 0:
			
 
				+                continue
			
 
				+
			
 
				+            up_limit = limit_window[0] + power_std
			
 
				+            low_limit = limit_window[0] - power_std
			
 
				+            b_all_in_up_low = 1
			
 
				+            for j in range(1, n_window_length):
			
 
				+                if limit_window[j] < low_limit or limit_window[j] > up_limit:
			
 
				+                    b_all_in_up_low = 0
			
 
				+
			
 
				+            if b_all_in_up_low == 1:
			
 
				+                for j in range(n_window_length):
			
 
				+                    dzwind_and_power_sel[i * n_window_length + j] = 4  # 标识窗口内的数据为限负荷数据
			
 
				+
			
 
				+        for i in range(p_num - 6):
			
 
				+            pv_left_down = np.zeros(2, dtype=float)
			
 
				+            pv_right_up = np.zeros(2, dtype=float)
			
 
				+
			
 
				+            if (p_box_max_index[i + 1] - p_box_max_index[i]) >= 1:
			
 
				+                pv_left_down[0] = (p_box_max_index[i] + 1 + curve_width_r) * 0.25 - 0.125
			
 
				+                pv_left_down[1] = i * 25
			
 
				+
			
 
				+                pv_right_up[0] = (p_box_max_index[i + 1] + 1 + curve_width_r) * 0.25 - 0.125
			
 
				+                pv_right_up[1] = (i + 1) * 25
			
 
				+
			
 
				+                for m in range(n_counter1):
			
 
				+                    if pv_left_down[0] < dz_march[m, 0] < pv_right_up[0] and pv_left_down[1] < \
			
 
				+                            dz_march[m, 1] < pv_right_up[1]:  # 在该锯齿中
			
 
				+                        if (dz_march[m, 1] - pv_left_down[1]) / (dz_march[m, 0] - pv_left_down[0]) > (
			
 
				+                                pv_right_up[1] - pv_left_down[1]) / (
			
 
				+                                pv_right_up[0] - pv_left_down[0]):  # 斜率大于对角连线，则在锯齿左上三角形中，选中
			
 
				+                            dzwind_and_power_sel[m] = 0
			
 
				+
			
 
				+        wind_and_power_df.loc[:, 'marker'] = -1
			
 
				+        wind_and_power_df.loc[
			
 
				+            wind_and_power_df[wind_and_power_df[self.active_power] > 0].index, 'marker'] = dzwind_and_power_sel
			
 
				+
			
 
				+        # 把部分欠发的优化为限电
			
 
				+        # 构建条件表达式
			
 
				+        cond1 = (wind_and_power_df['marker'] == 1) & (
			
 
				+                (wind_and_power_df[self.active_power] < self.rated_power * 0.75) &
			
 
				+                (wind_and_power_df[self.pitch_angle_blade] > 0.5)
			
 
				+        )
			
 
				+        cond2 = (wind_and_power_df['marker'] == 1) & (
			
 
				+                (wind_and_power_df[self.active_power] < self.rated_power * 0.85) &
			
 
				+                (wind_and_power_df[self.pitch_angle_blade] > 1.5)
			
 
				+        )
			
 
				+        cond3 = (wind_and_power_df['marker'] == 1) & (
			
 
				+                (wind_and_power_df[self.active_power] < self.rated_power * 0.9) &
			
 
				+                (wind_and_power_df[self.pitch_angle_blade] > 2.5)
			
 
				+        )
			
 
				+
			
 
				+        # 使用逻辑或操作符|合并条件
			
 
				+        combined_condition = cond1 | cond2 | cond3
			
 
				+        wind_and_power_df.loc[combined_condition, 'marker'] = 4
			
 
				+
			
 
				+        return wind_and_power_df
			
 
				+
			
 
				+    def run(self):
			
 
				+        # Implement your class identification logic here
			
 
				+        return self.identifier()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    test = ClassIdentifier('test', r"D:\中能智能\matlib计算相关\好点坏点matlib计算\WOG00436.csv",
			
 
				+                           wind_velocity='wind_velocity',
			
 
				+                           active_power='active_power',
			
 
				+                           pitch_angle_blade='pitch_angle_blade_1',
			
 
				+                           rated_power=1500
			
 
				+                           )
			
 
				+
			
 
				+    df = test.run()
			
 
				+
			
 
				+    df.to_csv("tet.csv", encoding="utf8")
			
 
				+
			
 
				+    color_map = {-1: 'red', 0: 'green', 1: 'blue', 2: 'black', 3: 'orange', 4: 'magenta'}
			
 
				+    c = df['marker'].map(color_map)
			
 
				+
			
 
				+    # -1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电
			
 
				+    legend_map = {"停机": 'red', "好点": 'green', "欠发": 'blue', "超发": 'black', "额定风速以上的超发": 'orange', "限电": 'magenta'}
			
 
				+    scatter("测试matlab结果", x_label='风速', y_label='有功功率', x_values=df[test.wind_velocity].values,
			
 
				+            y_values=df[test.active_power].values, color=c, col_map=legend_map,
			
 
				+            save_file_path=os.path.dirname(__file__) + os.sep + '元梁山测试matlab结果均值.png')
			
--- a/tmp_file/__init__.py
+++ b/tmp_file/__init__.py
--- a/tmp_file/power_derating.py
+++ b/tmp_file/power_derating.py
@@ -0,0 +1,90 @@
 
				+import multiprocessing
			
 
				+import os
			
 
				+
			
 
				+import matplotlib
			
 
				+matplotlib.use('Agg')
			
 
				+matplotlib.rcParams['font.family'] = 'SimHei'
			
 
				+matplotlib.rcParams['font.sans-serif'] = ['SimHei']
			
 
				+
			
 
				+import numpy as np
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+from utils.file.trans_methods import read_file_to_df
			
 
				+from utils.file.trans_methods import read_excel_files
			
 
				+import pandas as pd
			
 
				+
			
 
				+
			
 
				+def select_data(file, curve_wv, curve_ap, save_path):
			
 
				+    name = os.path.basename(file).split("@")[0]
			
 
				+    try:
			
 
				+        df = read_file_to_df(file)
			
 
				+        df.dropna(subset=['有功功率 kW均值', '风速 m/s均值', '有功功率设定 kW均值'], inplace=True)
			
 
				+        ap_gt_0_df = df[df['有功功率 kW均值'] > 0]
			
 
				+        ap_le_0_df = df[df['有功功率 kW均值'] <= 0]
			
 
				+        ap_le_0_df["marker"] = -1
			
 
				+
			
 
				+        ap = ap_gt_0_df['有功功率 kW均值'].values
			
 
				+        wv = ap_gt_0_df['风速 m/s均值'].values
			
 
				+        ap_set = ap_gt_0_df['有功功率设定 kW均值'].values
			
 
				+
			
 
				+        ap_gt_0_in = [0] * ap_gt_0_df.shape[0]
			
 
				+
			
 
				+        for i in range(len(ap_set)):
			
 
				+            wind_speed = wv[i]
			
 
				+            active_power = ap[i]
			
 
				+            active_power_set = ap_set[i]
			
 
				+
			
 
				+            if active_power >= 2200 - 200:
			
 
				+                ap_gt_0_in[i] = 1
			
 
				+            else:
			
 
				+                diffs = np.abs(curve_wv - wind_speed)
			
 
				+                # 找到差值最小的索引和对应的差值
			
 
				+                minDiff, idx = np.min(diffs), np.argmin(diffs)
			
 
				+
			
 
				+                # 使用找到的索引获取对应的值
			
 
				+                closestValue = curve_ap[idx]
			
 
				+                if active_power - closestValue >= -100:
			
 
				+                    ap_gt_0_in[i] = 1
			
 
				+
			
 
				+        ap_gt_0_df['marker'] = ap_gt_0_in
			
 
				+        df = pd.concat([ap_gt_0_df, ap_le_0_df])
			
 
				+
			
 
				+        df.to_csv(os.path.join(save_path, name + '.csv'), index=False, encoding='utf-8')
			
 
				+
			
 
				+        df = df[['时间', '风速 m/s均值', '有功功率 kW均值', '有功功率设定 kW均值', 'marker']]
			
 
				+
			
 
				+        df = df[df['marker'] == 1]
			
 
				+
			
 
				+        x = df['风速 m/s均值'].values
			
 
				+        y = df['有功功率 kW均值'].values
			
 
				+        # 使用scatter函数绘制散点图
			
 
				+        if not df.empty:
			
 
				+            plt.scatter(x, y, s=10, c='blue')
			
 
				+
			
 
				+            # 添加标题和坐标轴标签
			
 
				+            plt.title(name)
			
 
				+            plt.xlabel('风速均值')
			
 
				+            plt.ylabel('有功功率均值')
			
 
				+
			
 
				+            # 保存
			
 
				+            plt.savefig(os.path.join(save_path, name + '均值.png'))
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(os.path.basename(file), "出错", str(e))
			
 
				+        raise e
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    wind_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\标记derating\PV_Curve.csv")
			
 
				+    curve_wv = wind_power_df["风速"].values
			
 
				+    curve_ap = wind_power_df["功率"].values
			
 
				+
			
 
				+    all_files = read_excel_files(r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666")
			
 
				+    save_path = r"D:\trans_data\诺木洪\清理数据\min-666-derating"
			
 
				+
			
 
				+    # save_path = r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666-marker"
			
 
				+
			
 
				+    # for file in all_files:
			
 
				+
			
 
				+    with multiprocessing.Pool(10) as pool:
			
 
				+        pool.starmap(select_data, [(i, curve_wv, curve_ap, save_path) for i in all_files])
			
--- a/tmp_file/power_derating_biaozhun.py
+++ b/tmp_file/power_derating_biaozhun.py
@@ -0,0 +1,91 @@
 
				+import os
			
 
				+
			
 
				+import matplotlib
			
 
				+import numpy as np
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+from utils.draw.draw_file import scatter
			
 
				+
			
 
				+matplotlib.use('Agg')
			
 
				+matplotlib.rcParams['font.family'] = 'SimHei'  # 或者 'Microsoft YaHei'
			
 
				+matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 或者 ['Microsoft YaHei']
			
 
				+
			
 
				+from utils.file.trans_methods import read_file_to_df
			
 
				+from utils.file.trans_methods import read_excel_files
			
 
				+import pandas as pd
			
 
				+
			
 
				+
			
 
				+class ContractPowerCurve(object):
			
 
				+
			
 
				+    def __init__(self, df: pd.DataFrame, wind_velocity='风速', active_power='功率'):
			
 
				+        self.df = df
			
 
				+        self.wind_velocity = wind_velocity
			
 
				+        self.active_power = active_power
			
 
				+
			
 
				+
			
 
				+def marker_active_power(contract_power_curve_class: ContractPowerCurve, df: pd.DataFrame, active_power='有功功率 kW均值',
			
 
				+                        wind_velocity='风速 m/s均值'):
			
 
				+    """
			
 
				+    标记有功功率为正的记录
			
 
				+    :param contract_power_curve_class: 合同功率曲线
			
 
				+    :param df: 原始数据
			
 
				+    :return: 标记有功功率为正的原始数据
			
 
				+    """
			
 
				+    contract_power_curve_df = contract_power_curve_class.df
			
 
				+    curve_wv = contract_power_curve_df[contract_power_curve_class.wind_velocity].values
			
 
				+    curve_ap = contract_power_curve_df[contract_power_curve_class.active_power].values
			
 
				+
			
 
				+    df.dropna(subset=[active_power, wind_velocity], inplace=True)
			
 
				+    ap_gt_0_df = df[df[active_power] > 0]
			
 
				+    ap_le_0_df = df[df[active_power] <= 0]
			
 
				+    ap_le_0_df["marker"] = -1
			
 
				+
			
 
				+    active_power_values = ap_gt_0_df[active_power].values
			
 
				+    wind_speed_values = ap_gt_0_df[wind_velocity].values
			
 
				+    ap_gt_0_in = [0] * ap_gt_0_df.shape[0]
			
 
				+
			
 
				+    for i in range(len(ap_gt_0_in)):
			
 
				+        wind_speed = wind_speed_values[i]
			
 
				+        active_power = active_power_values[i]
			
 
				+
			
 
				+        # if active_power >= 2200 - 200:
			
 
				+        #     ap_gt_0_in[i] = 1
			
 
				+        # else:
			
 
				+        diffs = np.abs(curve_wv - wind_speed)
			
 
				+        # 找到差值最小的索引和对应的差值
			
 
				+        minDiff, idx = np.min(diffs), np.argmin(diffs)
			
 
				+
			
 
				+        # 使用找到的索引获取对应的值
			
 
				+        closestValue = curve_ap[idx]
			
 
				+        if active_power - closestValue >= -100:
			
 
				+            ap_gt_0_in[i] = 1
			
 
				+
			
 
				+    ap_gt_0_df['marker'] = ap_gt_0_in
			
 
				+    return pd.concat([ap_gt_0_df, ap_le_0_df])
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    wind_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\标记derating\PV_Curve.csv")
			
 
				+
			
 
				+    all_files = read_excel_files(r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666")
			
 
				+    save_path = r"D:\trans_data\诺木洪\清理数据\min-666-derating"
			
 
				+
			
 
				+    wind_power_df_class = ContractPowerCurve(wind_power_df)
			
 
				+
			
 
				+    for file in all_files:
			
 
				+        name = os.path.basename(file).split("@")[0]
			
 
				+        try:
			
 
				+            df = read_file_to_df(file)
			
 
				+            df = marker_active_power(wind_power_df_class, df)
			
 
				+            df = df[df['marker'] == 1]
			
 
				+            df.to_csv(os.path.join(save_path, name + '.csv'), index=False, encoding='utf-8')
			
 
				+
			
 
				+            # 使用scatter函数绘制散点图
			
 
				+            if not df.empty:
			
 
				+                scatter(name, x_label='风速均值', y_label='有功功率均值', x_values=df['风速 m/s均值'].values,
			
 
				+                        y_values=df['有功功率 kW均值'].values, color='green',
			
 
				+                        save_file_path=os.path.join(save_path, name + '均值.png'))
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(os.path.basename(file), "出错", str(e))
			
 
				+            raise e
			
--- a/tmp_file/power_derating_for_chunlin.py
+++ b/tmp_file/power_derating_for_chunlin.py
@@ -0,0 +1,213 @@
 
				+import os
			
 
				+
			
 
				+import matplotlib
			
 
				+import numpy as np
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+matplotlib.use('Agg')
			
 
				+matplotlib.rcParams['font.family'] = 'SimHei'  # 或者 'Microsoft YaHei'
			
 
				+matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 或者 ['Microsoft YaHei']
			
 
				+
			
 
				+import pandas as pd
			
 
				+import chardet
			
 
				+import warnings
			
 
				+
			
 
				+warnings.filterwarnings("ignore")
			
 
				+
			
 
				+
			
 
				+# 获取文件编码
			
 
				+def detect_file_encoding(filename):
			
 
				+    # 读取文件的前1000个字节（足够用于大多数编码检测）
			
 
				+    with open(filename, 'rb') as f:
			
 
				+        rawdata = f.read(1000)
			
 
				+    result = chardet.detect(rawdata)
			
 
				+    encoding = result['encoding']
			
 
				+
			
 
				+    if encoding is None:
			
 
				+        encoding = 'gb18030'
			
 
				+
			
 
				+    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
			
 
				+        encoding = 'gb18030'
			
 
				+    return encoding
			
 
				+
			
 
				+
			
 
				+def del_blank(df=pd.DataFrame(), cols=list()):
			
 
				+    for col in cols:
			
 
				+        if df[col].dtype == object:
			
 
				+            df[col] = df[col].str.strip()
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+# 切割数组到多个数组
			
 
				+def split_array(array, num):
			
 
				+    return [array[i:i + num] for i in range(0, len(array), num)]
			
 
				+
			
 
				+
			
 
				+# 读取数据到df
			
 
				+def read_file_to_df(file_path, read_cols=list(), header=0):
			
 
				+    try:
			
 
				+        df = pd.DataFrame()
			
 
				+        if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
			
 
				+            encoding = detect_file_encoding(file_path)
			
 
				+            end_with_gz = str(file_path).lower().endswith("gz")
			
 
				+            if read_cols:
			
 
				+                if end_with_gz:
			
 
				+                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
			
 
				+                else:
			
 
				+                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header,
			
 
				+                                     on_bad_lines='warn')
			
 
				+            else:
			
 
				+
			
 
				+                if end_with_gz:
			
 
				+                    df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
			
 
				+                else:
			
 
				+                    df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
			
 
				+
			
 
				+        else:
			
 
				+            xls = pd.ExcelFile(file_path)
			
 
				+            # 获取所有的sheet名称
			
 
				+            sheet_names = xls.sheet_names
			
 
				+            for sheet in sheet_names:
			
 
				+                if read_cols:
			
 
				+                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)
			
 
				+                else:
			
 
				+                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header)
			
 
				+
			
 
				+                df = pd.concat([df, now_df])
			
 
				+
			
 
				+        print('文件读取成功', file_path, '文件数量', df.shape)
			
 
				+    except Exception as e:
			
 
				+        print('读取文件出错', file_path, str(e))
			
 
				+        message = '文件:' + os.path.basename(file_path) + ',' + str(e)
			
 
				+        raise ValueError(message)
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def __build_directory_dict(directory_dict, path, filter_types=None):
			
 
				+    # 遍历目录下的所有项
			
 
				+    for item in os.listdir(path):
			
 
				+        item_path = os.path.join(path, item)
			
 
				+        if os.path.isdir(item_path):
			
 
				+            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
			
 
				+        elif os.path.isfile(item_path):
			
 
				+            if path not in directory_dict:
			
 
				+                directory_dict[path] = []
			
 
				+
			
 
				+            if filter_types is None or len(filter_types) == 0:
			
 
				+                directory_dict[path].append(item_path)
			
 
				+            elif str(item_path).split(".")[-1] in filter_types:
			
 
				+                if str(item_path).count("~$") == 0:
			
 
				+                    directory_dict[path].append(item_path)
			
 
				+
			
 
				+    # 读取所有文件
			
 
				+
			
 
				+
			
 
				+# 读取路径下所有的excel文件
			
 
				+def read_excel_files(read_path):
			
 
				+    directory_dict = {}
			
 
				+    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
			
 
				+
			
 
				+    return [path for paths in directory_dict.values() for path in paths if path]
			
 
				+
			
 
				+
			
 
				+class ContractPowerCurve(object):
			
 
				+
			
 
				+    def __init__(self, df: pd.DataFrame, wind_velocity='风速', active_power='功率'):
			
 
				+        self.df = df
			
 
				+        self.wind_velocity = wind_velocity
			
 
				+        self.active_power = active_power
			
 
				+
			
 
				+
			
 
				+# 创建路径
			
 
				+def create_file_path(path, is_file_path=False):
			
 
				+    if is_file_path:
			
 
				+        path = os.path.dirname(path)
			
 
				+
			
 
				+    if not os.path.exists(path):
			
 
				+        os.makedirs(path, exist_ok=True)
			
 
				+
			
 
				+
			
 
				+def scatter(title, x_label, y_label, x_values, y_values, color='blue', size=10, save_file_path=''):
			
 
				+    if save_file_path:
			
 
				+        create_file_path(save_file_path, True)
			
 
				+    else:
			
 
				+        save_file_path = title + '.png'
			
 
				+
			
 
				+    plt.figure(figsize=(8, 6))
			
 
				+    plt.title(title, fontsize=16)
			
 
				+    plt.xlabel(x_label, fontsize=14)
			
 
				+    plt.ylabel(y_label, fontsize=14)
			
 
				+    plt.scatter(x_values, y_values, s=size, c=color)
			
 
				+    plt.savefig(save_file_path)
			
 
				+    plt.close()
			
 
				+
			
 
				+
			
 
				+def marker_active_power(contract_power_curve_class: ContractPowerCurve, df: pd.DataFrame, active_power='有功功率 kW均值',
			
 
				+                        wind_velocity='风速 m/s均值'):
			
 
				+    """
			
 
				+    标记有功功率为正的记录
			
 
				+    :param contract_power_curve_class: 合同功率曲线
			
 
				+    :param df: 原始数据
			
 
				+    :return: 标记有功功率为正的原始数据
			
 
				+    """
			
 
				+    contract_power_curve_df = contract_power_curve_class.df
			
 
				+    curve_wv = contract_power_curve_df[contract_power_curve_class.wind_velocity].values
			
 
				+    curve_ap = contract_power_curve_df[contract_power_curve_class.active_power].values
			
 
				+
			
 
				+    df.dropna(subset=[active_power, wind_velocity], inplace=True)
			
 
				+    ap_gt_0_df = df[df[active_power] > 0]
			
 
				+    ap_le_0_df = df[df[active_power] <= 0]
			
 
				+    ap_le_0_df["marker"] = -1
			
 
				+
			
 
				+    active_power_values = ap_gt_0_df[active_power].values
			
 
				+    wind_speed_values = ap_gt_0_df[wind_velocity].values
			
 
				+    ap_gt_0_in = [0] * ap_gt_0_df.shape[0]
			
 
				+
			
 
				+    for i in range(len(ap_gt_0_in)):
			
 
				+        wind_speed = wind_speed_values[i]
			
 
				+        active_power = active_power_values[i]
			
 
				+
			
 
				+        # if active_power >= 2200 - 200:
			
 
				+        #     ap_gt_0_in[i] = 1
			
 
				+        # else:
			
 
				+        diffs = np.abs(curve_wv - wind_speed)
			
 
				+        # 找到差值最小的索引和对应的差值
			
 
				+        minDiff, idx = np.min(diffs), np.argmin(diffs)
			
 
				+
			
 
				+        # 使用找到的索引获取对应的值
			
 
				+        closestValue = curve_ap[idx]
			
 
				+        if active_power - closestValue >= -100:
			
 
				+            ap_gt_0_in[i] = 1
			
 
				+
			
 
				+    ap_gt_0_df['marker'] = ap_gt_0_in
			
 
				+    return pd.concat([ap_gt_0_df, ap_le_0_df])
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    wind_power_df = read_file_to_df(r"D:\中能智能\matlib计算相关\标记derating\PV_Curve.csv")
			
 
				+
			
 
				+    all_files = read_excel_files(r"Z:\collection_data\1进行中\诺木洪风电场-甘肃-华电\清理数据\min-666")
			
 
				+    save_path = r"D:\trans_data\诺木洪\清理数据\min-666-derating"
			
 
				+
			
 
				+    wind_power_df_class = ContractPowerCurve(wind_power_df)
			
 
				+
			
 
				+    for file in all_files:
			
 
				+        name = os.path.basename(file).split("@")[0]
			
 
				+        try:
			
 
				+            df = read_file_to_df(file)
			
 
				+            df = marker_active_power(wind_power_df_class, df)
			
 
				+            df = df[df['marker'] == 1]
			
 
				+            # 保存筛选后数据
			
 
				+            name = name.replace('HD', 'HD2')
			
 
				+            df.to_csv(os.path.join(save_path, name + '.csv'), index=False, encoding='utf-8')
			
 
				+
			
 
				+            # 使用scatter函数绘制散点图
			
 
				+            if not df.empty:
			
 
				+                scatter(name, x_label='风速均值', y_label='有功功率均值', x_values=df['风速 m/s均值'].values,
			
 
				+                        y_values=df['有功功率 kW均值'].values, color='green',
			
 
				+                        save_file_path=os.path.join(save_path, name + '均值.png'))
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(os.path.basename(file), "出错", str(e))
			
 
				+            raise e
			
--- a/tmp_file/pv_youxiaoxing.py
+++ b/tmp_file/pv_youxiaoxing.py
@@ -0,0 +1,264 @@
 
				+import multiprocessing
			
 
				+import os
			
 
				+
			
 
				+import matplotlib
			
 
				+import numpy as np
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+matplotlib.use('Agg')
			
 
				+matplotlib.rcParams['font.family'] = 'SimHei'  # 或者 'Microsoft YaHei'
			
 
				+matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 或者 ['Microsoft YaHei']
			
 
				+
			
 
				+import chardet
			
 
				+import warnings
			
 
				+
			
 
				+warnings.filterwarnings("ignore")
			
 
				+
			
 
				+import datetime
			
 
				+
			
 
				+import pandas as pd
			
 
				+
			
 
				+
			
 
				+def get_time_space(df, time_str):
			
 
				+    """
			
 
				+    :return: 查询时间间隔
			
 
				+    """
			
 
				+    begin = datetime.datetime.now()
			
 
				+    df1 = pd.DataFrame(df[time_str])
			
 
				+    df1[time_str] = pd.to_datetime(df1[time_str], errors='coerce')
			
 
				+    df1.sort_values(by=time_str, inplace=True)
			
 
				+    df1['chazhi'] = df1[time_str].shift(-1) - df1[time_str]
			
 
				+    result = df1.sample(int(df1.shape[0] / 100))['chazhi'].value_counts().idxmax().seconds
			
 
				+    del df1
			
 
				+    print(datetime.datetime.now() - begin)
			
 
				+    return abs(result)
			
 
				+
			
 
				+
			
 
				+def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
			
 
				+    """
			
 
				+    获取俩个时间之间的个数
			
 
				+    :return: 查询时间间隔
			
 
				+    """
			
 
				+    delta = end_time - start_time
			
 
				+    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
			
 
				+
			
 
				+    return abs(int(total_seconds / time_space)) + 1
			
 
				+
			
 
				+
			
 
				+# 获取文件编码
			
 
				+def detect_file_encoding(filename):
			
 
				+    # 读取文件的前1000个字节（足够用于大多数编码检测）
			
 
				+    with open(filename, 'rb') as f:
			
 
				+        rawdata = f.read(1000)
			
 
				+    result = chardet.detect(rawdata)
			
 
				+    encoding = result['encoding']
			
 
				+
			
 
				+    if encoding is None:
			
 
				+        encoding = 'gb18030'
			
 
				+
			
 
				+    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
			
 
				+        encoding = 'gb18030'
			
 
				+    return encoding
			
 
				+
			
 
				+
			
 
				+def del_blank(df=pd.DataFrame(), cols=list()):
			
 
				+    for col in cols:
			
 
				+        if df[col].dtype == object:
			
 
				+            df[col] = df[col].str.strip()
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+# 切割数组到多个数组
			
 
				+def split_array(array, num):
			
 
				+    return [array[i:i + num] for i in range(0, len(array), num)]
			
 
				+
			
 
				+
			
 
				+# 读取数据到df
			
 
				+def read_file_to_df(file_path, read_cols=list(), header=0):
			
 
				+    try:
			
 
				+        df = pd.DataFrame()
			
 
				+        if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
			
 
				+            encoding = detect_file_encoding(file_path)
			
 
				+            end_with_gz = str(file_path).lower().endswith("gz")
			
 
				+            if read_cols:
			
 
				+                if end_with_gz:
			
 
				+                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
			
 
				+                else:
			
 
				+                    df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header,
			
 
				+                                     on_bad_lines='warn')
			
 
				+            else:
			
 
				+
			
 
				+                if end_with_gz:
			
 
				+                    df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
			
 
				+                else:
			
 
				+                    df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
			
 
				+
			
 
				+        else:
			
 
				+            xls = pd.ExcelFile(file_path)
			
 
				+            # 获取所有的sheet名称
			
 
				+            sheet_names = xls.sheet_names
			
 
				+            for sheet in sheet_names:
			
 
				+                if read_cols:
			
 
				+                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)
			
 
				+                else:
			
 
				+                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header)
			
 
				+
			
 
				+                df = pd.concat([df, now_df])
			
 
				+
			
 
				+        print('文件读取成功', file_path, '文件数量', df.shape)
			
 
				+    except Exception as e:
			
 
				+        print('读取文件出错', file_path, str(e))
			
 
				+        message = '文件:' + os.path.basename(file_path) + ',' + str(e)
			
 
				+        raise ValueError(message)
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def __build_directory_dict(directory_dict, path, filter_types=None):
			
 
				+    # 遍历目录下的所有项
			
 
				+    for item in os.listdir(path):
			
 
				+        item_path = os.path.join(path, item)
			
 
				+        if os.path.isdir(item_path):
			
 
				+            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
			
 
				+        elif os.path.isfile(item_path):
			
 
				+            if path not in directory_dict:
			
 
				+                directory_dict[path] = []
			
 
				+
			
 
				+            if filter_types is None or len(filter_types) == 0:
			
 
				+                directory_dict[path].append(item_path)
			
 
				+            elif str(item_path).split(".")[-1] in filter_types:
			
 
				+                if str(item_path).count("~$") == 0:
			
 
				+                    directory_dict[path].append(item_path)
			
 
				+
			
 
				+    # 读取所有文件
			
 
				+
			
 
				+
			
 
				+# 读取路径下所有的excel文件
			
 
				+def read_excel_files(read_path):
			
 
				+    directory_dict = {}
			
 
				+    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
			
 
				+
			
 
				+    return [path for paths in directory_dict.values() for path in paths if path]
			
 
				+
			
 
				+
			
 
				+# 创建路径
			
 
				+def create_file_path(path, is_file_path=False):
			
 
				+    if is_file_path:
			
 
				+        path = os.path.dirname(path)
			
 
				+
			
 
				+    if not os.path.exists(path):
			
 
				+        os.makedirs(path, exist_ok=True)
			
 
				+
			
 
				+
			
 
				+def time_biaozhun(df):
			
 
				+    time_space = get_time_space(df, '时间')
			
 
				+    query_df = df[['时间']]
			
 
				+    query_df['时间'] = pd.to_datetime(df['时间'], errors="coerce")
			
 
				+    query_df = query_df.dropna(subset=['时间'])
			
 
				+    total = get_time_space_count(query_df['时间'].min(), query_df['时间'].max(), time_space)
			
 
				+    return total, save_percent(1 - query_df.shape[0] / total), save_percent(1 - df.shape[0] / total)
			
 
				+
			
 
				+
			
 
				+def save_percent(value, save_decimal=7):
			
 
				+    return round(value, save_decimal) * 100
			
 
				+
			
 
				+
			
 
				+def calc(df, file_name):
			
 
				+    error_dict = {}
			
 
				+    lose_dict = {}
			
 
				+    error_dict['箱变'] = "".join(file_name.split(".")[:-1])
			
 
				+    lose_dict['箱变'] = "".join(file_name.split(".")[:-1])
			
 
				+
			
 
				+    total, lose_time, error_time = time_biaozhun(df)
			
 
				+    error_dict['时间'] = error_time
			
 
				+    lose_dict['时间'] = lose_time
			
 
				+
			
 
				+    error_df = pd.DataFrame()
			
 
				+    lose_df = pd.DataFrame()
			
 
				+
			
 
				+    try:
			
 
				+        df.columns = ["".join(["逆变器" + "".join(col.split("逆变器")[1:])]) if col.find("逆变器") > -1 else col for col in
			
 
				+                      df.columns]
			
 
				+
			
 
				+        for col in df.columns:
			
 
				+            if col == '时间':
			
 
				+                continue
			
 
				+            query_df = df[[col]]
			
 
				+            query_df[col] = pd.to_numeric(query_df[col], errors="coerce")
			
 
				+            query_df = query_df.dropna(subset=[col])
			
 
				+            lose_dict[col] = save_percent(1 - query_df.shape[0] / total)
			
 
				+
			
 
				+            if col.find('电压') > -1:
			
 
				+                error_dict[col] = save_percent(query_df[query_df[col] < 0].shape[0] / total)
			
 
				+
			
 
				+            if col.find('电流') > -1:
			
 
				+                error_dict[col] = save_percent(query_df[query_df[col] < -0.1].shape[0] / total)
			
 
				+
			
 
				+            if col.find('逆变器效率') > -1:
			
 
				+                error_dict[col] = save_percent(query_df[(query_df[col] <= 0) | (query_df[col] >= 100)].shape[0] / total)
			
 
				+
			
 
				+            if col.find('温度') > -1:
			
 
				+                error_dict[col] = save_percent(query_df[(query_df[col] < 0) | (query_df[col] > 100)].shape[0] / total)
			
 
				+
			
 
				+            if col.find('功率因数') > -1:
			
 
				+                error_dict[col] = save_percent(query_df[(query_df[col] < 0) | (query_df[col] > 1)].shape[0] / total)
			
 
				+
			
 
				+        total, count = 0, 0
			
 
				+        for k, v in error_dict.items():
			
 
				+            if k != '箱变':
			
 
				+                total = total + error_dict[k]
			
 
				+                count = count + 1
			
 
				+
			
 
				+        error_dict['平均异常率'] = save_percent(total / count / 100)
			
 
				+
			
 
				+        total, count = 0, 0
			
 
				+        for k, v in lose_dict.items():
			
 
				+            if k != '箱变':
			
 
				+                total = total + lose_dict[k]
			
 
				+                count = count + 1
			
 
				+
			
 
				+        lose_dict['平均缺失率'] = save_percent(total / count / 100)
			
 
				+
			
 
				+        error_df = pd.concat([error_df, pd.DataFrame(error_dict, index=[0])])
			
 
				+        lose_df = pd.concat([lose_df, pd.DataFrame(lose_dict, index=[0])])
			
 
				+
			
 
				+        error_df_cols = ['箱变', '平均异常率']
			
 
				+        for col in error_df.columns:
			
 
				+            if col not in error_df_cols:
			
 
				+                error_df_cols.append(col)
			
 
				+
			
 
				+        lose_df_cols = ['箱变', '平均缺失率']
			
 
				+        for col in lose_df.columns:
			
 
				+            if col not in lose_df_cols:
			
 
				+                lose_df_cols.append(col)
			
 
				+
			
 
				+        error_df = error_df[error_df_cols]
			
 
				+        lose_df = lose_df[lose_df_cols]
			
 
				+    except Exception as e:
			
 
				+        print("异常文件", os.path.basename(file_name))
			
 
				+        raise e
			
 
				+
			
 
				+    return error_df, lose_df
			
 
				+
			
 
				+
			
 
				+def run(file_path):
			
 
				+    df = read_file_to_df(file_path)
			
 
				+    return calc(df, os.path.basename(file_path))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # read_path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/逆变器数据'
			
 
				+    # save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/逆变器数据'
			
 
				+
			
 
				+    read_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\yuanshi'
			
 
				+    save_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\zhengli'
			
 
				+    all_files = read_excel_files(read_path)
			
 
				+
			
 
				+    with multiprocessing.Pool(2) as pool:
			
 
				+        df_arrys = pool.starmap(run, [(file,) for file in all_files])
			
 
				+
			
 
				+    error_df = pd.concat([df[0] for df in df_arrys])
			
 
				+    lose_df = pd.concat([df[1] for df in df_arrys])
			
 
				+    with pd.ExcelWriter(os.path.join(save_path, "玉湖光伏数据统计.xlsx")) as writer:
			
 
				+        error_df.to_excel(writer, sheet_name='error_percent', index=False)
			
 
				+        lose_df.to_excel(writer, sheet_name='lose_percent', index=False)
			
--- a/tmp_file/qinghai-nuomuhong-guifan.py
+++ b/tmp_file/qinghai-nuomuhong-guifan.py
@@ -0,0 +1,135 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Spyder 编辑器
			
 
				+
			
 
				+这是一个临时脚本文件。
			
 
				+"""
			
 
				+import copy
			
 
				+import datetime
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+
			
 
				+dianjian_str = """
			
 
				+wind_turbine_number		
			
 
				+time_stamp		时间
			
 
				+active_power		有功功率 kW
			
 
				+rotor_speed		风轮转速 rpm
			
 
				+generator_speed		发电机转速 rpm
			
 
				+wind_velocity		风速 m/s
			
 
				+pitch_angle_blade_1		叶片1角度 °
			
 
				+pitch_angle_blade_2		叶片2角度 °
			
 
				+pitch_angle_blade_3		叶片3角度 °
			
 
				+cabin_position		机舱位置 °
			
 
				+true_wind_direction		
			
 
				+yaw_error1		风向 °
			
 
				+twisted_cable_angle		
			
 
				+main_bearing_temperature		主轴温度 ℃
			
 
				+gearbox_oil_temperature		齿轮箱温度 ℃
			
 
				+gearbox_low_speed_shaft_bearing_temperature		齿轮箱轴承温度 ℃
			
 
				+gearboxmedium_speed_shaftbearing_temperature		
			
 
				+gearbox_high_speed_shaft_bearing_temperature		齿轮箱轴承温度2 ℃
			
 
				+generatordrive_end_bearing_temperature		发电机驱动侧轴承温度 ℃
			
 
				+generatornon_drive_end_bearing_temperature		发电机非驱动侧轴承温度 ℃
			
 
				+cabin_temperature		机舱温度 ℃
			
 
				+outside_cabin_temperature		舱外温度 ℃
			
 
				+generator_winding1_temperature		
			
 
				+generator_winding2_temperature		
			
 
				+generator_winding3_temperature		
			
 
				+front_back_vibration_of_the_cabin		
			
 
				+side_to_side_vibration_of_the_cabin		
			
 
				+required_gearbox_speed		
			
 
				+inverter_speed_master_control		
			
 
				+actual_torque		
			
 
				+given_torque		
			
 
				+clockwise_yaw_count		
			
 
				+counterclockwise_yaw_count		
			
 
				+unusable		
			
 
				+power_curve_available		
			
 
				+set_value_of_active_power		有功功率设定 kW
			
 
				+wind_turbine_status		
			
 
				+wind_turbine_status2		
			
 
				+turbulence_intensity		
			
 
				+"""
			
 
				+
			
 
				+datas = [i for i in dianjian_str.split("\n") if i]
			
 
				+
			
 
				+dianjian_dict = dict()
			
 
				+
			
 
				+for data in datas:
			
 
				+    ds = data.split("\t")
			
 
				+
			
 
				+    if len(ds) == 3:
			
 
				+        dianjian_dict[ds[0]] = ds[2]
			
 
				+    else:
			
 
				+        dianjian_dict[ds[0]] = ''
			
 
				+
			
 
				+
			
 
				+def read_df(file_path):
			
 
				+    df = pd.read_csv(file_path, header=[0, 1])
			
 
				+
			
 
				+    col_nams_map = dict()
			
 
				+    pre_col = ""
			
 
				+    for tuple_col in df.columns:
			
 
				+        col1 = tuple_col[0]
			
 
				+        col2 = tuple_col[1]
			
 
				+        if str(col1).startswith("Unnamed"):
			
 
				+            if pre_col:
			
 
				+                col1 = pre_col
			
 
				+                pre_col = ''
			
 
				+            else:
			
 
				+                col1 = ''
			
 
				+        else:
			
 
				+            pre_col = col1
			
 
				+
			
 
				+        if str(col2).startswith("Unnamed"):
			
 
				+            col2 = ''
			
 
				+
			
 
				+        col_nams_map[str(tuple_col)] = ''.join([col1, col2])
			
 
				+    # print(col_nams_map)
			
 
				+    # for k, v in col_nams_map.items():
			
 
				+    #     if str(v).endswith('采样值'):
			
 
				+    #         col_nams_map[k] = str(v)[:-3]
			
 
				+
			
 
				+    df.columns = [str(col) for col in df.columns]
			
 
				+    df.rename(columns=col_nams_map, inplace=True)
			
 
				+
			
 
				+    # for col, name in dianjian_dict.items():
			
 
				+    #     if name in df.columns:
			
 
				+    #         df.rename(columns={name: col}, inplace=True)
			
 
				+
			
 
				+    # for col in df.columns:
			
 
				+    #     if col not in dianjian_dict.keys():
			
 
				+    #         del df[col]
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def get_wind_name_files(path):
			
 
				+    files = os.listdir(path)
			
 
				+    return files
			
 
				+
			
 
				+
			
 
				+def combine_df(save_path, file):
			
 
				+    begin = datetime.datetime.now()
			
 
				+    df = read_df(file)
			
 
				+    print("读取", file, df.shape)
			
 
				+    df.replace("-", np.nan,inplace=True)
			
 
				+    df.to_csv(os.path.join(save_path, os.path.basename(file)), encoding='utf-8', index=False)
			
 
				+
			
 
				+    print('整理完成', '耗时:', (datetime.datetime.now() - begin).seconds)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    read_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/min-666'
			
 
				+    save_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/清理数据/min-666'
			
 
				+
			
 
				+    # read_path = r'D:\trans_data\诺木洪\收资数据\min-666'
			
 
				+    # save_path = r'D:\trans_data\诺木洪\清理数据\min-666'
			
 
				+    if not os.path.exists(save_path):
			
 
				+        os.makedirs(save_path, exist_ok=True)
			
 
				+
			
 
				+    with multiprocessing.Pool(20) as pool:
			
 
				+        pool.starmap(combine_df, [(save_path, read_path + os.sep + file) for file in os.listdir(read_path)])
			
--- a/tmp_file/qinghai-nuomuhong.py
+++ b/tmp_file/qinghai-nuomuhong.py
@@ -0,0 +1,162 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Spyder 编辑器
			
 
				+
			
 
				+这是一个临时脚本文件。
			
 
				+"""
			
 
				+import copy
			
 
				+import datetime
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+
			
 
				+dianjian_str = """
			
 
				+wind_turbine_number		
			
 
				+time_stamp		时间
			
 
				+active_power		有功功率 kW
			
 
				+rotor_speed		风轮转速 rpm
			
 
				+generator_speed		发电机转速 rpm
			
 
				+wind_velocity		风速 m/s
			
 
				+pitch_angle_blade_1		叶片1角度 °
			
 
				+pitch_angle_blade_2		叶片2角度 °
			
 
				+pitch_angle_blade_3		叶片3角度 °
			
 
				+cabin_position		机舱位置 °
			
 
				+true_wind_direction		
			
 
				+yaw_error1		风向 °
			
 
				+twisted_cable_angle		
			
 
				+main_bearing_temperature		主轴温度 ℃
			
 
				+gearbox_oil_temperature		齿轮箱温度 ℃
			
 
				+gearbox_low_speed_shaft_bearing_temperature		齿轮箱轴承温度 ℃
			
 
				+gearboxmedium_speed_shaftbearing_temperature		
			
 
				+gearbox_high_speed_shaft_bearing_temperature		齿轮箱轴承温度2 ℃
			
 
				+generatordrive_end_bearing_temperature		发电机驱动侧轴承温度 ℃
			
 
				+generatornon_drive_end_bearing_temperature		发电机非驱动侧轴承温度 ℃
			
 
				+cabin_temperature		机舱温度 ℃
			
 
				+outside_cabin_temperature		舱外温度 ℃
			
 
				+generator_winding1_temperature		
			
 
				+generator_winding2_temperature		
			
 
				+generator_winding3_temperature		
			
 
				+front_back_vibration_of_the_cabin		
			
 
				+side_to_side_vibration_of_the_cabin		
			
 
				+required_gearbox_speed		
			
 
				+inverter_speed_master_control		
			
 
				+actual_torque		
			
 
				+given_torque		
			
 
				+clockwise_yaw_count		
			
 
				+counterclockwise_yaw_count		
			
 
				+unusable		
			
 
				+power_curve_available		
			
 
				+set_value_of_active_power		有功功率设定 kW
			
 
				+wind_turbine_status		
			
 
				+wind_turbine_status2		
			
 
				+turbulence_intensity		
			
 
				+"""
			
 
				+
			
 
				+datas = [i for i in dianjian_str.split("\n") if i]
			
 
				+
			
 
				+dianjian_dict = dict()
			
 
				+
			
 
				+for data in datas:
			
 
				+    ds = data.split("\t")
			
 
				+
			
 
				+    if len(ds) == 3:
			
 
				+        dianjian_dict[ds[0]] = ds[2]
			
 
				+    else:
			
 
				+        dianjian_dict[ds[0]] = ''
			
 
				+
			
 
				+
			
 
				+def read_df(file_path):
			
 
				+    df = pd.read_csv(file_path, header=[0, 1])
			
 
				+
			
 
				+    col_nams_map = dict()
			
 
				+    pre_col = ""
			
 
				+    for tuple_col in df.columns:
			
 
				+        col1 = tuple_col[0]
			
 
				+        col2 = tuple_col[1]
			
 
				+        if str(col1).startswith("Unnamed"):
			
 
				+            if pre_col:
			
 
				+                col1 = pre_col
			
 
				+                pre_col = ''
			
 
				+            else:
			
 
				+                col1 = ''
			
 
				+        else:
			
 
				+            pre_col = col1
			
 
				+
			
 
				+        if str(col2).startswith("Unnamed"):
			
 
				+            col2 = ''
			
 
				+
			
 
				+        col_nams_map[str(tuple_col)] = ''.join([col1, col2])
			
 
				+    print(col_nams_map)
			
 
				+    for k, v in col_nams_map.items():
			
 
				+        if str(v).endswith('采样值'):
			
 
				+            col_nams_map[k] = str(v)[:-3]
			
 
				+
			
 
				+    df.columns = [str(col) for col in df.columns]
			
 
				+    df.rename(columns=col_nams_map, inplace=True)
			
 
				+
			
 
				+    for col, name in dianjian_dict.items():
			
 
				+        if name in df.columns:
			
 
				+            df.rename(columns={name: col}, inplace=True)
			
 
				+
			
 
				+    for col in df.columns:
			
 
				+        if col not in dianjian_dict.keys():
			
 
				+            del df[col]
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def get_wind_name_files(path):
			
 
				+    files = os.listdir(path)
			
 
				+
			
 
				+    wind_files_map = dict()
			
 
				+    for file in files:
			
 
				+        full_file = os.path.join(path, file)
			
 
				+        file_datas = str(file).split("@")
			
 
				+        key = file_datas[0].replace("HD", "HD2")
			
 
				+        if key in wind_files_map.keys():
			
 
				+            wind_files_map[key].append(full_file)
			
 
				+        else:
			
 
				+            wind_files_map[key] = [full_file]
			
 
				+
			
 
				+    return wind_files_map
			
 
				+
			
 
				+
			
 
				+def combine_df(save_path, wind_name, files):
			
 
				+    begin = datetime.datetime.now()
			
 
				+    df = pd.DataFrame()
			
 
				+    for file in files:
			
 
				+        query_df = read_df(file)
			
 
				+        print("读取", file, query_df.shape)
			
 
				+        query_df['time_stamp'] = pd.to_datetime(query_df['time_stamp'])
			
 
				+        query_df.set_index(keys='time_stamp', inplace=True)
			
 
				+        query_df = query_df[~query_df.index.duplicated(keep='first')]
			
 
				+        if df.empty:
			
 
				+            df = copy.deepcopy(query_df)
			
 
				+        else:
			
 
				+            df = pd.concat([df, query_df], join='inner')
			
 
				+    df.reset_index(inplace=True)
			
 
				+    df['wind_turbine_number'] = wind_name
			
 
				+    for col, name in dianjian_dict.items():
			
 
				+        if col not in df.columns:
			
 
				+            df[col] = np.nan
			
 
				+
			
 
				+    df = df[dianjian_dict.keys()]
			
 
				+    df.to_csv(os.path.join(save_path, wind_name + ".csv"), encoding='utf-8', index=False)
			
 
				+
			
 
				+    print(wind_name, '整理完成', '耗时:', (datetime.datetime.now() - begin).seconds)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    read_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/sec'
			
 
				+    save_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/sec_采样值'
			
 
				+
			
 
				+    # read_path = r'D:\trans_data\诺木洪\收资数据\min'
			
 
				+    # save_path = r'D:\trans_data\诺木洪\清理数据\min'
			
 
				+    if not os.path.exists(save_path):
			
 
				+        os.makedirs(save_path, exist_ok=True)
			
 
				+    wind_files_map = get_wind_name_files(read_path)
			
 
				+
			
 
				+    with multiprocessing.Pool(20) as pool:
			
 
				+        pool.starmap(combine_df, [(save_path, wind_name, files) for wind_name, files in wind_files_map.items()])
			
--- a/tmp_file/光伏箱体.py
+++ b/tmp_file/光伏箱体.py
@@ -0,0 +1,155 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Tue Jul  9 16:28:48 2024
			
 
				+
			
 
				+@author: Administrator
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+from datetime import datetime, timedelta
			
 
				+import pandas as pd
			
 
				+import chardet
			
 
				+from multiprocessing import Pool
			
 
				+
			
 
				+
			
 
				+# 获取文件编码
			
 
				+def detect_file_encoding(filename):
			
 
				+    # 读取文件的前1000个字节（足够用于大多数编码检测）
			
 
				+    with open(filename, 'rb') as f:
			
 
				+        rawdata = f.read(1000)
			
 
				+    result = chardet.detect(rawdata)
			
 
				+    encoding = result['encoding']
			
 
				+
			
 
				+    if encoding is None:
			
 
				+        encoding = 'gb18030'
			
 
				+
			
 
				+    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
			
 
				+        encoding = 'gb18030'
			
 
				+    return encoding
			
 
				+
			
 
				+
			
 
				+# 读取数据到df
			
 
				+def read_file_to_df(file_path, read_cols=list(), header=0):
			
 
				+    df = pd.DataFrame()
			
 
				+    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
			
 
				+        encoding = detect_file_encoding(file_path)
			
 
				+        end_with_gz = str(file_path).lower().endswith("gz")
			
 
				+        if read_cols:
			
 
				+            if end_with_gz:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
			
 
				+            else:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
			
 
				+        else:
			
 
				+
			
 
				+            if end_with_gz:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
			
 
				+            else:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
			
 
				+
			
 
				+    else:
			
 
				+        xls = pd.ExcelFile(file_path)
			
 
				+        # 获取所有的sheet名称
			
 
				+        sheet_names = xls.sheet_names
			
 
				+        for sheet in sheet_names:
			
 
				+            if read_cols:
			
 
				+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
			
 
				+            else:
			
 
				+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def __build_directory_dict(directory_dict, path, filter_types=None):
			
 
				+    # 遍历目录下的所有项
			
 
				+    for item in os.listdir(path):
			
 
				+        item_path = os.path.join(path, item)
			
 
				+        if os.path.isdir(item_path):
			
 
				+            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
			
 
				+        elif os.path.isfile(item_path):
			
 
				+            if path not in directory_dict:
			
 
				+                directory_dict[path] = []
			
 
				+
			
 
				+            if filter_types is None or len(filter_types) == 0:
			
 
				+                directory_dict[path].append(item_path)
			
 
				+            elif str(item_path).split(".")[-1] in filter_types:
			
 
				+                if str(item_path).count("~$") == 0:
			
 
				+                    directory_dict[path].append(item_path)
			
 
				+
			
 
				+    # 读取所有文件
			
 
				+
			
 
				+
			
 
				+# 读取路径下所有的excel文件
			
 
				+def read_excel_files(read_path):
			
 
				+    directory_dict = {}
			
 
				+    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
			
 
				+
			
 
				+    return [path for paths in directory_dict.values() for path in paths if path]
			
 
				+
			
 
				+
			
 
				+# 创建路径
			
 
				+def create_file_path(path, is_file_path=False):
			
 
				+    if is_file_path:
			
 
				+        path = os.path.dirname(path)
			
 
				+
			
 
				+    if not os.path.exists(path):
			
 
				+        os.makedirs(path, exist_ok=True)
			
 
				+
			
 
				+
			
 
				+def read_and_save_csv(file_path):
			
 
				+    begin = datetime.now()
			
 
				+    base_name = os.path.basename(file_path)
			
 
				+    print('开始', base_name)
			
 
				+
			
 
				+    df1 = read_file_to_df(file_path + "箱变（1-8号逆变器）数据1.xls")
			
 
				+    del df1['Unnamed: 0']
			
 
				+    df1['时间'] = pd.to_datetime(df1['时间'])
			
 
				+    df1.set_index(keys='时间', inplace=True)
			
 
				+
			
 
				+    df2 = read_file_to_df(file_path + "箱变（9-16号逆变器）数据1.xls")
			
 
				+    del df2['Unnamed: 0']
			
 
				+    df2['时间'] = pd.to_datetime(df2['时间'])
			
 
				+    df2.set_index(keys='时间', inplace=True)
			
 
				+
			
 
				+    df3 = read_file_to_df(file_path + "箱变（1-8号逆变器）数据2.xls")
			
 
				+    del df3['Unnamed: 0']
			
 
				+    df3['时间'] = pd.to_datetime(df3['时间'])
			
 
				+    df3.set_index(keys='时间', inplace=True)
			
 
				+
			
 
				+    df4 = read_file_to_df(file_path + "箱变（9-16号逆变器）数据2.xls")
			
 
				+    del df4['Unnamed: 0']
			
 
				+    df4['时间'] = pd.to_datetime(df4['时间'])
			
 
				+    df4.set_index(keys='时间', inplace=True)
			
 
				+
			
 
				+    df = pd.concat([df1, df2, df3, df4], axis=1)
			
 
				+    df.reset_index(inplace=True)
			
 
				+    columns = list(df.columns)
			
 
				+    columns.sort()
			
 
				+
			
 
				+    print(df.columns)
			
 
				+
			
 
				+    df = df[columns]
			
 
				+    df.sort_values(by='时间', inplace=True)
			
 
				+
			
 
				+    df.to_csv(os.path.join(r'D:\trans_data\大唐玉湖性能分析离线分析', '05整理数据', base_name + '_箱变.csv'), encoding='utf-8',
			
 
				+              index=False)
			
 
				+    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+    path = r'D:\trans_data\大唐玉湖性能分析离线分析\test'
			
 
				+    all_files = read_excel_files(path)
			
 
				+
			
 
				+    all_paths = set()
			
 
				+    for file in all_files:
			
 
				+        base_name = os.path.basename(file).split("箱变")[0]
			
 
				+        base_path = os.path.dirname(file)
			
 
				+        if base_name not in all_paths:
			
 
				+            all_paths.add(os.path.join(base_path, base_name))
			
 
				+
			
 
				+    all_datas = list(all_paths)
			
 
				+    all_datas.sort()
			
 
				+
			
 
				+    print(all_datas)
			
 
				+    # with Pool(1) as pool:
			
 
				+    #     pool.starmap(read_and_save_csv, [(i,) for i in all_datas])
			
--- a/tmp_file/玉湖光伏-标准化.py
+++ b/tmp_file/玉湖光伏-标准化.py
@@ -0,0 +1,157 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Tue Jul  9 16:28:48 2024
			
 
				+
			
 
				+@author: Administrator
			
 
				+"""
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+from datetime import datetime, timedelta
			
 
				+import pandas as pd
			
 
				+import chardet
			
 
				+
			
 
				+pd.options.mode.copy_on_write = True
			
 
				+
			
 
				+
			
 
				+# 获取文件编码
			
 
				+def detect_file_encoding(filename):
			
 
				+    # 读取文件的前1000个字节（足够用于大多数编码检测）
			
 
				+    with open(filename, 'rb') as f:
			
 
				+        rawdata = f.read(1000)
			
 
				+    result = chardet.detect(rawdata)
			
 
				+    encoding = result['encoding']
			
 
				+
			
 
				+    if encoding is None:
			
 
				+        encoding = 'gb18030'
			
 
				+
			
 
				+    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
			
 
				+        encoding = 'gb18030'
			
 
				+    return encoding
			
 
				+
			
 
				+
			
 
				+# 读取数据到df
			
 
				+def read_file_to_df(file_path, read_cols=list(), header=0):
			
 
				+    df = pd.DataFrame()
			
 
				+    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
			
 
				+        encoding = detect_file_encoding(file_path)
			
 
				+        end_with_gz = str(file_path).lower().endswith("gz")
			
 
				+        if read_cols:
			
 
				+            if end_with_gz:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
			
 
				+            else:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
			
 
				+        else:
			
 
				+
			
 
				+            if end_with_gz:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
			
 
				+            else:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
			
 
				+
			
 
				+    else:
			
 
				+        xls = pd.ExcelFile(file_path)
			
 
				+        # 获取所有的sheet名称
			
 
				+        sheet_names = xls.sheet_names
			
 
				+        for sheet in sheet_names:
			
 
				+            if read_cols:
			
 
				+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
			
 
				+            else:
			
 
				+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def __build_directory_dict(directory_dict, path, filter_types=None):
			
 
				+    # 遍历目录下的所有项
			
 
				+    for item in os.listdir(path):
			
 
				+        item_path = os.path.join(path, item)
			
 
				+        if os.path.isdir(item_path):
			
 
				+            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
			
 
				+        elif os.path.isfile(item_path):
			
 
				+            if path not in directory_dict:
			
 
				+                directory_dict[path] = []
			
 
				+
			
 
				+            if filter_types is None or len(filter_types) == 0:
			
 
				+                directory_dict[path].append(item_path)
			
 
				+            elif str(item_path).split(".")[-1] in filter_types:
			
 
				+                if str(item_path).count("~$") == 0:
			
 
				+                    directory_dict[path].append(item_path)
			
 
				+
			
 
				+    # 读取所有文件
			
 
				+
			
 
				+
			
 
				+# 读取路径下所有的excel文件
			
 
				+def read_excel_files(read_path):
			
 
				+    directory_dict = {}
			
 
				+    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
			
 
				+
			
 
				+    return [path for paths in directory_dict.values() for path in paths if path]
			
 
				+
			
 
				+
			
 
				+# 创建路径
			
 
				+def create_file_path(path, is_file_path=False):
			
 
				+    if is_file_path:
			
 
				+        path = os.path.dirname(path)
			
 
				+
			
 
				+    if not os.path.exists(path):
			
 
				+        os.makedirs(path, exist_ok=True)
			
 
				+
			
 
				+
			
 
				+def generate_df(pv_df, col):
			
 
				+    if col != '时间':
			
 
				+        xiangbian = col.split("逆变器")[0].replace("#", "")
			
 
				+        nibianqi = col.split("-")[0].split('逆变器')[1]
			
 
				+        pv_index = col.split("-")[1].replace("PV", "")
			
 
				+        now_df = pv_df[['时间', col + '输入电流()', col + '输入电压()']]
			
 
				+        now_df.loc[:, '箱变'] = xiangbian
			
 
				+        now_df.loc[:, '逆变器'] = nibianqi
			
 
				+        now_df.loc[:, 'PV'] = pv_index
			
 
				+        now_df.columns = [df_col.replace(col, "").replace("()", "") for df_col in now_df.columns]
			
 
				+        now_df['输入电流'] = now_df['输入电流'].astype(float)
			
 
				+        now_df['输入电压'] = now_df['输入电压'].astype(float)
			
 
				+
			
 
				+        print(xiangbian, nibianqi, pv_index, now_df.shape)
			
 
				+        return now_df
			
 
				+    return pd.DataFrame()
			
 
				+
			
 
				+
			
 
				+def read_and_save_csv(file_path, save_path):
			
 
				+    begin = datetime.now()
			
 
				+    base_name = os.path.basename(file_path)
			
 
				+    print('开始', base_name)
			
 
				+
			
 
				+    df = read_file_to_df(file_path)
			
 
				+    df['时间'] = pd.to_datetime(df['时间'])
			
 
				+    # df.set_index(keys='时间', inplace=True)
			
 
				+
			
 
				+    pv_df_cols = [col for col in df.columns if col.find('输入电') > -1]
			
 
				+    pv_df_cols.append('时间')
			
 
				+    pv_df = df[pv_df_cols]
			
 
				+    shuru_cols = set([col.split("输入电")[0] for col in pv_df.columns])
			
 
				+
			
 
				+    with multiprocessing.Pool(6) as pool:
			
 
				+        dfs = pool.starmap(generate_df, [(pv_df, col) for col in shuru_cols])
			
 
				+
			
 
				+    saved_pv_df = pd.concat(dfs)
			
 
				+    saved_pv_df.sort_values(by=['箱变', '逆变器', 'PV', '时间'], inplace=True)
			
 
				+    save_file = os.path.join(save_path, os.path.basename(file_path).split(".")[0], 'PV.csv')
			
 
				+    create_file_path(save_file, True)
			
 
				+
			
 
				+    saved_pv_df.to_csv(save_file, encoding='utf-8', index=False)
			
 
				+
			
 
				+    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\yuanshi'
			
 
				+    save_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\zhengli'
			
 
				+    all_files = read_excel_files(path)
			
 
				+
			
 
				+    all_datas = list(all_files)
			
 
				+    all_datas.sort()
			
 
				+    print(all_datas)
			
 
				+
			
 
				+    for file in all_datas:
			
 
				+        read_and_save_csv(file, save_path)
			
 
				+
			
 
				+    # with Pool(1) as pool:
			
 
				+    #     pool.starmap(read_and_save_csv, [(i, save_path) for i in all_datas])
			
--- a/tmp_file/玉湖光伏-标准化_1.py
+++ b/tmp_file/玉湖光伏-标准化_1.py
@@ -0,0 +1,208 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Tue Jul  9 16:28:48 2024
			
 
				+
			
 
				+@author: Administrator
			
 
				+"""
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+from datetime import datetime, timedelta
			
 
				+import pandas as pd
			
 
				+import chardet
			
 
				+
			
 
				+pd.options.mode.copy_on_write = True
			
 
				+
			
 
				+
			
 
				+# 获取文件编码
			
 
				+def detect_file_encoding(filename):
			
 
				+    # 读取文件的前1000个字节（足够用于大多数编码检测）
			
 
				+    with open(filename, 'rb') as f:
			
 
				+        rawdata = f.read(1000)
			
 
				+    result = chardet.detect(rawdata)
			
 
				+    encoding = result['encoding']
			
 
				+
			
 
				+    if encoding is None:
			
 
				+        encoding = 'gb18030'
			
 
				+
			
 
				+    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
			
 
				+        encoding = 'gb18030'
			
 
				+    return encoding
			
 
				+
			
 
				+
			
 
				+# 读取数据到df
			
 
				+def read_file_to_df(file_path, read_cols=list(), header=0):
			
 
				+    df = pd.DataFrame()
			
 
				+    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
			
 
				+        encoding = detect_file_encoding(file_path)
			
 
				+        end_with_gz = str(file_path).lower().endswith("gz")
			
 
				+        if read_cols:
			
 
				+            if end_with_gz:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
			
 
				+            else:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
			
 
				+        else:
			
 
				+
			
 
				+            if end_with_gz:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
			
 
				+            else:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
			
 
				+
			
 
				+    else:
			
 
				+        xls = pd.ExcelFile(file_path)
			
 
				+        # 获取所有的sheet名称
			
 
				+        sheet_names = xls.sheet_names
			
 
				+        for sheet in sheet_names:
			
 
				+            if read_cols:
			
 
				+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
			
 
				+            else:
			
 
				+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def __build_directory_dict(directory_dict, path, filter_types=None):
			
 
				+    # 遍历目录下的所有项
			
 
				+    for item in os.listdir(path):
			
 
				+        item_path = os.path.join(path, item)
			
 
				+        if os.path.isdir(item_path):
			
 
				+            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
			
 
				+        elif os.path.isfile(item_path):
			
 
				+            if path not in directory_dict:
			
 
				+                directory_dict[path] = []
			
 
				+
			
 
				+            if filter_types is None or len(filter_types) == 0:
			
 
				+                directory_dict[path].append(item_path)
			
 
				+            elif str(item_path).split(".")[-1] in filter_types:
			
 
				+                if str(item_path).count("~$") == 0:
			
 
				+                    directory_dict[path].append(item_path)
			
 
				+
			
 
				+    # 读取所有文件
			
 
				+
			
 
				+
			
 
				+# 读取路径下所有的excel文件
			
 
				+def read_excel_files(read_path):
			
 
				+    directory_dict = {}
			
 
				+    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
			
 
				+
			
 
				+    return [path for paths in directory_dict.values() for path in paths if path]
			
 
				+
			
 
				+
			
 
				+# 创建路径
			
 
				+def create_file_path(path, is_file_path=False):
			
 
				+    if is_file_path:
			
 
				+        path = os.path.dirname(path)
			
 
				+
			
 
				+    if not os.path.exists(path):
			
 
				+        os.makedirs(path, exist_ok=True)
			
 
				+
			
 
				+
			
 
				+def generate_df(pv_df, col):
			
 
				+    if col != '时间':
			
 
				+        xiangbian = col.split("逆变器")[0].replace("#", "")
			
 
				+        nibianqi = col.split("-")[0].split('逆变器')[1]
			
 
				+        pv_index = col.split("-")[1].replace("PV", "")
			
 
				+        now_df = pv_df[['时间', col + '输入电流()', col + '输入电压()']]
			
 
				+        now_df.loc[:, '箱变'] = xiangbian
			
 
				+        now_df.loc[:, '逆变器'] = nibianqi
			
 
				+        now_df.loc[:, 'PV'] = pv_index
			
 
				+        now_df.columns = [df_col.replace(col, "").replace("()", "") for df_col in now_df.columns]
			
 
				+        now_df['输入电流'] = now_df['输入电流'].astype(float)
			
 
				+        now_df['输入电压'] = now_df['输入电压'].astype(float)
			
 
				+
			
 
				+        print(xiangbian, nibianqi, pv_index, now_df.shape)
			
 
				+        return now_df
			
 
				+    return pd.DataFrame()
			
 
				+
			
 
				+
			
 
				+def split_index(split_data: str, split_str: str):
			
 
				+    count = split_data.find(split_str)
			
 
				+    if count > -1:
			
 
				+        return split_data[count + len(split_str):]
			
 
				+    else:
			
 
				+        return split_str
			
 
				+
			
 
				+
			
 
				+def replece_col_to_biaozhun(col):
			
 
				+    for k, v in dianjian_dict.items():
			
 
				+        if col.find(k) > -1:
			
 
				+            col = col.replace(k, v)
			
 
				+            return col
			
 
				+
			
 
				+    return col
			
 
				+
			
 
				+
			
 
				+def read_and_save_csv(file_path, save_path):
			
 
				+    begin = datetime.now()
			
 
				+    base_name = os.path.basename(file_path)
			
 
				+    print('开始', base_name)
			
 
				+
			
 
				+    df = read_file_to_df(file_path)
			
 
				+
			
 
				+    for col in df.columns:
			
 
				+        for del_col in del_cols:
			
 
				+            if col.find(del_col) > -1:
			
 
				+                del df[col]
			
 
				+
			
 
				+    df['时间'] = pd.to_datetime(df['时间'])
			
 
				+    xiangbian = [col for col in df.columns if str(col).startswith('#') and str(col).find('逆变器') > -1][0].split("逆变器")[
			
 
				+        0].replace("#", "")
			
 
				+    df.columns = [xiangbian + "_" + split_index(df_col, "逆变器").replace('PV', "").replace("()", "").replace("-",
			
 
				+                                                                                                           "_") if df_col.startswith(
			
 
				+        "#") else df_col for df_col in
			
 
				+                  df.columns]
			
 
				+
			
 
				+    df.columns = [col.replace("输入", "_输入") for col in df.columns]
			
 
				+
			
 
				+    df.columns = [replece_col_to_biaozhun(col) for col in df.columns]
			
 
				+
			
 
				+    # saved_pv_df = pd.concat(dfs)
			
 
				+    df.sort_values(by=['时间'], inplace=True)
			
 
				+    save_file = os.path.join(save_path, os.path.basename(file_path))
			
 
				+    create_file_path(save_file, True)
			
 
				+
			
 
				+    df.to_csv(save_file, encoding='utf-8', index=False)
			
 
				+
			
 
				+    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
			
 
				+
			
 
				+
			
 
				+dianjian_data_str = """
			
 
				+输入电压	支路输出电压
			
 
				+输入电流	支路输出电流
			
 
				+功率因数	
			
 
				+总发电量	逆变器总发电量
			
 
				+无功功率	
			
 
				+有功功率	逆变器输出有功功率
			
 
				+机内温度	逆变器温度
			
 
				+电网AB线电压	交流输出电压
			
 
				+电网A相电流	逆变器输出电流A相
			
 
				+电网BC线电压	
			
 
				+电网B相电流	逆变器输出电流B相
			
 
				+电网CA线电压	
			
 
				+电网C相电流	逆变器输出电流C相
			
 
				+逆变器效率	逆变器转换效率
			
 
				+"""
			
 
				+
			
 
				+dianjian_dict = {}
			
 
				+del_cols = []
			
 
				+for data in dianjian_data_str.split("\n"):
			
 
				+    if data:
			
 
				+        datas = data.split("\t")
			
 
				+        if len(datas) == 2 and datas[1]:
			
 
				+            dianjian_dict[datas[0]] = datas[1]
			
 
				+        else:
			
 
				+            del_cols.append(datas[0])
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/逆变器数据'
			
 
				+    save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/逆变器数据'
			
 
				+    all_files = read_excel_files(path)
			
 
				+
			
 
				+    all_datas = list(all_files)
			
 
				+    all_datas.sort()
			
 
				+    print(all_datas)
			
 
				+    #
			
 
				+    # for file in all_datas:
			
 
				+    #     read_and_save_csv(file, save_path)
			
 
				+
			
 
				+    with multiprocessing.Pool(20) as pool:
			
 
				+        pool.starmap(read_and_save_csv, [(i, save_path) for i in all_datas])
			
--- a/tmp_file/玉湖光伏-标准化_2.py
+++ b/tmp_file/玉湖光伏-标准化_2.py
@@ -0,0 +1,283 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Tue Jul  9 16:28:48 2024
			
 
				+
			
 
				+@author: Administrator
			
 
				+"""
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+from datetime import datetime, timedelta
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import chardet
			
 
				+
			
 
				+pd.options.mode.copy_on_write = True
			
 
				+
			
 
				+
			
 
				+# 获取文件编码
			
 
				+def detect_file_encoding(filename):
			
 
				+    # 读取文件的前1000个字节（足够用于大多数编码检测）
			
 
				+    with open(filename, 'rb') as f:
			
 
				+        rawdata = f.read(1000)
			
 
				+    result = chardet.detect(rawdata)
			
 
				+    encoding = result['encoding']
			
 
				+
			
 
				+    if encoding is None:
			
 
				+        encoding = 'gb18030'
			
 
				+
			
 
				+    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
			
 
				+        encoding = 'gb18030'
			
 
				+    return encoding
			
 
				+
			
 
				+
			
 
				+# 读取数据到df
			
 
				+def read_file_to_df(file_path, read_cols=list(), header=0):
			
 
				+    df = pd.DataFrame()
			
 
				+    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
			
 
				+        encoding = detect_file_encoding(file_path)
			
 
				+        end_with_gz = str(file_path).lower().endswith("gz")
			
 
				+        if read_cols:
			
 
				+            if end_with_gz:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
			
 
				+            else:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
			
 
				+        else:
			
 
				+
			
 
				+            if end_with_gz:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
			
 
				+            else:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
			
 
				+
			
 
				+    else:
			
 
				+        xls = pd.ExcelFile(file_path)
			
 
				+        # 获取所有的sheet名称
			
 
				+        sheet_names = xls.sheet_names
			
 
				+        for sheet in sheet_names:
			
 
				+            if read_cols:
			
 
				+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
			
 
				+            else:
			
 
				+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def __build_directory_dict(directory_dict, path, filter_types=None):
			
 
				+    # 遍历目录下的所有项
			
 
				+    for item in os.listdir(path):
			
 
				+        item_path = os.path.join(path, item)
			
 
				+        if os.path.isdir(item_path):
			
 
				+            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
			
 
				+        elif os.path.isfile(item_path):
			
 
				+            if path not in directory_dict:
			
 
				+                directory_dict[path] = []
			
 
				+
			
 
				+            if filter_types is None or len(filter_types) == 0:
			
 
				+                directory_dict[path].append(item_path)
			
 
				+            elif str(item_path).split(".")[-1] in filter_types:
			
 
				+                if str(item_path).count("~$") == 0:
			
 
				+                    directory_dict[path].append(item_path)
			
 
				+
			
 
				+    # 读取所有文件
			
 
				+
			
 
				+
			
 
				+# 读取路径下所有的excel文件
			
 
				+def read_excel_files(read_path):
			
 
				+    directory_dict = {}
			
 
				+    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
			
 
				+
			
 
				+    return [path for paths in directory_dict.values() for path in paths if path]
			
 
				+
			
 
				+
			
 
				+# 创建路径
			
 
				+def create_file_path(path, is_file_path=False):
			
 
				+    if is_file_path:
			
 
				+        path = os.path.dirname(path)
			
 
				+
			
 
				+    if not os.path.exists(path):
			
 
				+        os.makedirs(path, exist_ok=True)
			
 
				+
			
 
				+
			
 
				+def split_index(split_data: str, split_str: str):
			
 
				+    count = split_data.find(split_str)
			
 
				+    if count > -1:
			
 
				+        return split_data[count + len(split_str):]
			
 
				+    else:
			
 
				+        return split_str
			
 
				+
			
 
				+
			
 
				+def replece_col_to_biaozhun(col):
			
 
				+    for k, v in dianjian_dict.items():
			
 
				+        if col.find(k) > -1:
			
 
				+            col = col.replace(k, v)
			
 
				+            return col
			
 
				+
			
 
				+    return col
			
 
				+
			
 
				+
			
 
				+def row_to_datas(row, pv_dict, inverter_cols, df_cols):
			
 
				+    row_datas = list(list())
			
 
				+    for xiangbian in pv_dict.keys():
			
 
				+        for nibianqi in pv_dict[xiangbian].keys():
			
 
				+            for pv in pv_dict[xiangbian][nibianqi]:
			
 
				+                datas = [np.nan] * 14
			
 
				+                datas[0] = row['时间']
			
 
				+                datas[1] = xiangbian
			
 
				+                datas[2] = nibianqi
			
 
				+                datas[3] = pv
			
 
				+                datas_4_col = "_".join([str(xiangbian), str(nibianqi), str(pv), '支路输出电压'])
			
 
				+                if datas_4_col in df_cols:
			
 
				+                    datas[4] = row[datas_4_col]
			
 
				+                else:
			
 
				+                    datas[4] = np.nan
			
 
				+
			
 
				+                datas_5_col = "_".join([str(xiangbian), str(nibianqi), str(pv), '支路输出电流'])
			
 
				+                if datas_5_col in df_cols:
			
 
				+                    datas[5] = row[datas_5_col]
			
 
				+                else:
			
 
				+                    datas[5] = np.nan
			
 
				+
			
 
				+                row_datas.append(datas)
			
 
				+
			
 
				+    for xiangbian in pv_dict.keys():
			
 
				+        for nibianqi in pv_dict[xiangbian].keys():
			
 
				+            datas = [np.nan] * 14
			
 
				+            datas[0] = row['时间']
			
 
				+            datas[1] = xiangbian
			
 
				+            datas[2] = nibianqi
			
 
				+            datas[3] = 0
			
 
				+            for index, col_name in enumerate(inverter_cols):
			
 
				+                col = '_'.join([str(xiangbian), str(nibianqi), col_name])
			
 
				+                if col in df_cols:
			
 
				+                    datas[index + 6] = row[col]
			
 
				+                else:
			
 
				+                    datas[index + 6] = np.nan
			
 
				+
			
 
				+            row_datas.append(datas)
			
 
				+
			
 
				+    return row_datas
			
 
				+
			
 
				+
			
 
				+def df_to_biaozhun(df):
			
 
				+    pv_cols = ['支路输出电压', '支路输出电流']
			
 
				+    inverter_cols = ['逆变器总发电量', '逆变器输出有功功率', '逆变器温度', '交流输出电压', '逆变器输出电流A相', '逆变器输出电流B相', '逆变器输出电流C相', '逆变器转换效率']
			
 
				+    # 从列名获取箱变->逆变器->PV等的字典
			
 
				+    pv_dict = dict(dict())
			
 
				+    for col in df.columns:
			
 
				+        for pv_col in pv_cols:
			
 
				+            if str(col).endswith(pv_col):
			
 
				+                datas = col.split("_")
			
 
				+                xiangbian = datas[0]
			
 
				+                nibiangqi = datas[1]
			
 
				+                pv = datas[2]
			
 
				+
			
 
				+                if xiangbian in pv_dict.keys():
			
 
				+                    if nibiangqi in pv_dict[xiangbian]:
			
 
				+                        pv_dict[xiangbian][nibiangqi].add(pv)
			
 
				+                    else:
			
 
				+                        pv_dict[xiangbian][nibiangqi] = set([pv])
			
 
				+                else:
			
 
				+                    pv_dict[xiangbian] = {nibiangqi: set([pv])}
			
 
				+
			
 
				+    results = df.apply(row_to_datas, args=(pv_dict, inverter_cols, df.columns), axis=1)
			
 
				+
			
 
				+    df_datas = results.to_list()
			
 
				+    df_datas = [da for data in df_datas for da in data]
			
 
				+    df_cols = ["时间", "箱变", "逆变器", "支路"]
			
 
				+    df_cols.extend(pv_cols)
			
 
				+    df_cols.extend(inverter_cols)
			
 
				+    df = pd.DataFrame(df_datas, columns=df_cols)
			
 
				+
			
 
				+    type_conver_list = []
			
 
				+    type_conver_list.extend(pv_cols)
			
 
				+    type_conver_list.extend(inverter_cols)
			
 
				+    for type_conver in type_conver_list:
			
 
				+        df[type_conver] = pd.to_numeric(df[type_conver], errors='coerce')
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def read_and_save_csv(file_path, save_path):
			
 
				+    begin = datetime.now()
			
 
				+    base_name = os.path.basename(file_path)
			
 
				+    print('开始', base_name)
			
 
				+
			
 
				+    df = read_file_to_df(file_path)
			
 
				+
			
 
				+    for col in df.columns:
			
 
				+        for del_col in del_cols:
			
 
				+            if col.find(del_col) > -1:
			
 
				+                del df[col]
			
 
				+
			
 
				+    df['时间'] = pd.to_datetime(df['时间'])
			
 
				+    xiangbian = [col for col in df.columns if str(col).startswith('#') and str(col).find('逆变器') > -1][0].split("逆变器")[
			
 
				+        0].replace("#", "")
			
 
				+    df.columns = [xiangbian + "_" + split_index(df_col, "逆变器").replace('PV', "").replace("()", "").replace("-",
			
 
				+                                                                                                           "_") if df_col.startswith(
			
 
				+        "#") else df_col for df_col in
			
 
				+                  df.columns]
			
 
				+
			
 
				+    df.columns = [col.replace("输入", "_输入") for col in df.columns]
			
 
				+    df.columns = [replece_col_to_biaozhun(col) for col in df.columns]
			
 
				+
			
 
				+    df = df_to_biaozhun(df)
			
 
				+
			
 
				+    # df.sort_values(by=['时间', "箱变", "逆变器", "支路"], inplace=True)
			
 
				+    # save_file = os.path.join(save_path, os.path.basename(file_path))
			
 
				+    # create_file_path(save_file, True)
			
 
				+
			
 
				+    # df.to_csv(save_file, encoding='utf-8', index=False)
			
 
				+
			
 
				+    print('结束', base_name, '耗时:' + str(datetime.now() - begin))
			
 
				+    return df
			
 
				+
			
 
				+dianjian_data_str = """
			
 
				+输入电压	支路输出电压
			
 
				+输入电流	支路输出电流
			
 
				+功率因数	
			
 
				+总发电量	逆变器总发电量
			
 
				+无功功率	
			
 
				+有功功率	逆变器输出有功功率
			
 
				+机内温度	逆变器温度
			
 
				+电网AB线电压	交流输出电压
			
 
				+电网A相电流	逆变器输出电流A相
			
 
				+电网BC线电压	
			
 
				+电网B相电流	逆变器输出电流B相
			
 
				+电网CA线电压	
			
 
				+电网C相电流	逆变器输出电流C相
			
 
				+逆变器效率	逆变器转换效率
			
 
				+"""
			
 
				+
			
 
				+dianjian_dict = {}
			
 
				+del_cols = []
			
 
				+for data in dianjian_data_str.split("\n"):
			
 
				+    if data:
			
 
				+        datas = data.split("\t")
			
 
				+        if len(datas) == 2 and datas[1]:
			
 
				+            dianjian_dict[datas[0]] = datas[1]
			
 
				+        else:
			
 
				+            del_cols.append(datas[0])
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/逆变器数据'
			
 
				+    save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/逆变器数据'
			
 
				+    # path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\yuanshi'
			
 
				+    # save_path = r'D:\trans_data\大唐玉湖性能分析离线分析\test\zhengli'
			
 
				+    all_files = read_excel_files(path)
			
 
				+
			
 
				+    all_datas = list(all_files)
			
 
				+    all_datas.sort()
			
 
				+    print(all_datas)
			
 
				+
			
 
				+    # for file in all_datas:
			
 
				+    #     read_and_save_csv(file, save_path)
			
 
				+
			
 
				+    with multiprocessing.Pool(40) as pool:
			
 
				+        dfs = pool.starmap(read_and_save_csv, [(i, save_path) for i in all_datas])
			
 
				+
			
 
				+    saved_pv_df = pd.concat(dfs)
			
 
				+    saved_pv_df.sort_values(by=['时间', "箱变", "逆变器", "支路"], inplace=True)
			
 
				+    save_file = os.path.join(save_path, "合并.csv")
			
 
				+    create_file_path(save_file, True)
			
 
				+    saved_pv_df.sort_values(by=['时间', "箱变", "逆变器", "支路"], inplace=True)
			
 
				+    saved_pv_df.to_csv(save_file, encoding='utf-8', index=False)
			
--- a/tmp_file/玉湖光伏-气象标准化.py
+++ b/tmp_file/玉湖光伏-气象标准化.py
@@ -0,0 +1,125 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Tue Jul  9 16:28:48 2024
			
 
				+
			
 
				+@author: Administrator
			
 
				+"""
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+from datetime import datetime, timedelta
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import chardet
			
 
				+
			
 
				+pd.options.mode.copy_on_write = True
			
 
				+
			
 
				+
			
 
				+# 获取文件编码
			
 
				+def detect_file_encoding(filename):
			
 
				+    # 读取文件的前1000个字节（足够用于大多数编码检测）
			
 
				+    with open(filename, 'rb') as f:
			
 
				+        rawdata = f.read(1000)
			
 
				+    result = chardet.detect(rawdata)
			
 
				+    encoding = result['encoding']
			
 
				+
			
 
				+    if encoding is None:
			
 
				+        encoding = 'gb18030'
			
 
				+
			
 
				+    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
			
 
				+        encoding = 'gb18030'
			
 
				+    return encoding
			
 
				+
			
 
				+
			
 
				+# 读取数据到df
			
 
				+def read_file_to_df(file_path, read_cols=list(), header=0):
			
 
				+    df = pd.DataFrame()
			
 
				+    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
			
 
				+        encoding = detect_file_encoding(file_path)
			
 
				+        end_with_gz = str(file_path).lower().endswith("gz")
			
 
				+        if read_cols:
			
 
				+            if end_with_gz:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip', header=header)
			
 
				+            else:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, header=header, on_bad_lines='warn')
			
 
				+        else:
			
 
				+
			
 
				+            if end_with_gz:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, compression='gzip', header=header)
			
 
				+            else:
			
 
				+                df = pd.read_csv(file_path, encoding=encoding, header=header, on_bad_lines='warn')
			
 
				+
			
 
				+    else:
			
 
				+        xls = pd.ExcelFile(file_path)
			
 
				+        # 获取所有的sheet名称
			
 
				+        sheet_names = xls.sheet_names
			
 
				+        for sheet in sheet_names:
			
 
				+            if read_cols:
			
 
				+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
			
 
				+            else:
			
 
				+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def __build_directory_dict(directory_dict, path, filter_types=None):
			
 
				+    # 遍历目录下的所有项
			
 
				+    for item in os.listdir(path):
			
 
				+        item_path = os.path.join(path, item)
			
 
				+        if os.path.isdir(item_path):
			
 
				+            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
			
 
				+        elif os.path.isfile(item_path):
			
 
				+            if path not in directory_dict:
			
 
				+                directory_dict[path] = []
			
 
				+
			
 
				+            if filter_types is None or len(filter_types) == 0:
			
 
				+                directory_dict[path].append(item_path)
			
 
				+            elif str(item_path).split(".")[-1] in filter_types:
			
 
				+                if str(item_path).count("~$") == 0:
			
 
				+                    directory_dict[path].append(item_path)
			
 
				+
			
 
				+    # 读取所有文件
			
 
				+
			
 
				+
			
 
				+# 读取路径下所有的excel文件
			
 
				+def read_excel_files(read_path):
			
 
				+    directory_dict = {}
			
 
				+    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
			
 
				+
			
 
				+    return [path for paths in directory_dict.values() for path in paths if path]
			
 
				+
			
 
				+
			
 
				+# 创建路径
			
 
				+def create_file_path(path, is_file_path=False):
			
 
				+    if is_file_path:
			
 
				+        path = os.path.dirname(path)
			
 
				+
			
 
				+    if not os.path.exists(path):
			
 
				+        os.makedirs(path, exist_ok=True)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # path = r'/data/download/大唐玉湖性能分析离线分析/05整理数据/气象站数据'
			
 
				+    # save_path = r'/data/download/大唐玉湖性能分析离线分析/06整理数据/气象站数据'
			
 
				+    path = r'Z:\大唐玉湖性能分析离线分析\05整理数据\气象站数据'
			
 
				+    save_path = r'Z:\大唐玉湖性能分析离线分析\06整理数据\气象站数据'
			
 
				+
			
 
				+    fengsu_df = read_file_to_df(os.path.join(path, '风速.csv'), read_cols=['当前时间', '实际风速'])
			
 
				+    fengxiang_df = read_file_to_df(os.path.join(path, '风向.csv'), read_cols=['当前时间', '实际风向'])
			
 
				+    fuzhaodu_df = read_file_to_df(os.path.join(path, '辐照度.csv'), read_cols=['时间', '水平总辐照度', '倾斜总辐照度', '散射辐照度'])
			
 
				+    shidu_df = read_file_to_df(os.path.join(path, '湿度.csv'), read_cols=['时间', '实际湿度'])
			
 
				+    wendu_df = read_file_to_df(os.path.join(path, '温度.csv'), read_cols=['时间', '实际温度'])
			
 
				+    yali_df = read_file_to_df(os.path.join(path, '压力.csv'), read_cols=['时间', '实际气压'])
			
 
				+
			
 
				+    fengsu_df.rename(columns={'当前时间': '时间'}, inplace=True)
			
 
				+    fengxiang_df.rename(columns={'当前时间': '时间'}, inplace=True)
			
 
				+
			
 
				+    dfs = [fengxiang_df, fengsu_df, fuzhaodu_df, shidu_df, wendu_df, yali_df]
			
 
				+
			
 
				+    for df in dfs:
			
 
				+        df['时间'] = pd.to_datetime(df['时间'])
			
 
				+        df.set_index(keys='时间', inplace=True)
			
 
				+
			
 
				+    df = pd.concat(dfs, axis=1)
			
 
				+    create_file_path(save_path, is_file_path=False)
			
 
				+    df.to_csv(os.path.join(save_path, '气象合并.csv'), encoding='utf-8')
			
--- a/utils/draw/__init__.py
+++ b/utils/draw/__init__.py
--- a/utils/draw/draw_file.py
+++ b/utils/draw/draw_file.py
@@ -0,0 +1,31 @@
 
				+import matplotlib
			
 
				+
			
 
				+from utils.file.trans_methods import create_file_path
			
 
				+
			
 
				+matplotlib.use('Agg')
			
 
				+matplotlib.rcParams['font.family'] = 'SimHei'
			
 
				+matplotlib.rcParams['font.sans-serif'] = ['SimHei']
			
 
				+from matplotlib import pyplot as plt
			
 
				+
			
 
				+
			
 
				+def scatter(title, x_label, y_label, x_values, y_values, color=None, col_map=dict(), size=10,
			
 
				+            save_file_path=''):
			
 
				+    if save_file_path:
			
 
				+        create_file_path(save_file_path, True)
			
 
				+    else:
			
 
				+        save_file_path = title + '.png'
			
 
				+
			
 
				+    plt.figure(figsize=(8, 6))
			
 
				+    plt.title(title, fontsize=16)
			
 
				+    plt.xlabel(x_label, fontsize=14)
			
 
				+    plt.ylabel(y_label, fontsize=14)
			
 
				+    if color is not None:
			
 
				+        plt.scatter(x_values, y_values, s=size, c=color)
			
 
				+        if col_map:
			
 
				+            patches = [plt.Rectangle((0, 0), 1, 1, fc=c) for c in col_map.values()]
			
 
				+            plt.legend(patches, list(col_map.keys()))
			
 
				+    else:
			
 
				+        plt.scatter(x_values, y_values, s=size)
			
 
				+
			
 
				+    plt.savefig(save_file_path)
			
 
				+    plt.close()
			
--- a/utils/file/trans_methods.py
+++ b/utils/file/trans_methods.py
@@ -71,9 +71,12 @@ def read_file_to_df(file_path, read_cols=list(), header=0):
 
				             sheet_names = xls.sheet_names
			
 
				             for sheet in sheet_names:
			
 
				                 if read_cols:
			
 
				-                    df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)])
			
 
				+                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header, usecols=read_cols)
			
 
				                 else:
			
 
				-                    df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, header=header)])
			
 
				+                    now_df = pd.read_excel(xls, sheet_name=sheet, header=header)
			
 
				+
			
 
				+
			
 
				+                df = pd.concat([df, now_df])
			
 
				 
			
 
				         trans_print('文件读取成功', file_path, '文件数量', df.shape)
			
 
				     except Exception as e:
			
--- a/utils/systeminfo/sysinfo.py
+++ b/utils/systeminfo/sysinfo.py
@@ -31,15 +31,17 @@ def get_max_file_size(file_paths: list[str]):
 
				     return max_size
			
 
				 
			
 
				 
			
 
				-def use_files_get_max_cpu_count(file_paths: list[str], memory_percent: float = 1 / 9, cpu_percent: float = 1 / 3):
			
 
				+def use_files_get_max_cpu_count(file_paths: list[str], memory_percent: float = 1 / 6, cpu_percent: float = 2 / 5):
			
 
				     max_file_size = get_max_file_size(file_paths)
			
 
				     free_memory = get_available_memory_with_percent(memory_percent)
			
 
				     count = int(free_memory / max_file_size)
			
 
				     max_cpu_count = get_available_cpu_count_with_percent(cpu_percent)
			
 
				     result = count if count <= max_cpu_count else max_cpu_count
			
 
				+    if result == 0:
			
 
				+        result = 1
			
 
				     trans_print("总文件数:", len(file_paths), ",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
			
 
				                 "可用内存:", str(get_available_memory_with_percent(1) / 2 ** 20) + "M",
			
 
				-                "总CPU数:", get_cpu_count(), "CPU可用数量:", max_cpu_count,
			
 
				+                "总CPU数:", get_cpu_count(), "CPU使用比例:", round(cpu_percent, 2), "CPU可用数量:", max_cpu_count,
			
 
				                 ",最终确定使用进程数:", result)
			
 
				     return result
			
 
				 
			
@@ -52,9 +54,8 @@ if __name__ == '__main__':
 
				     begin = datetime.datetime.now()
			
 
				     all_files = read_files(read_path)
			
 
				     print(datetime.datetime.now() - begin)
			
 
				-    count = use_files_get_max_cpu_count(all_files)
			
 
				 
			
 
				-    print(count)
			
 
				+    print(use_files_get_max_cpu_count(all_files))
			
 
				 
			
 
				     print(get_available_memory_with_percent(1) / 2 ** 20)
			
 
				     print(get_available_memory_with_percent(2 / 3) / 2 ** 20)