Browse Source

master分支整合

chenhongyan1989 2 months ago
parent
commit
f0a225f800
30 changed files with 0 additions and 3030 deletions
  1. 0 0
      dataAnalysisBusiness/common/__init__.py
  2. 0 0
      dataAnalysisBusiness/dataContract/__init__.py
  3. 0 148
      dataAnalysisBusiness/dataContract/confBusiness.py
  4. 0 450
      dataAnalysisBusiness/demo/SCADA_10min_category_0.py
  5. 0 507
      dataAnalysisBusiness/demo/SCADA_10min_category_1.py
  6. 0 193
      dataAnalysisBusiness/demo/SCADA_10min_category_2.py
  7. 0 632
      dataAnalysisBusiness/demo/SCADA_10min_category_3.py
  8. 0 0
      dataAnalysisBusiness/demo/__init__.py
  9. 0 16
      dataAnalysisBusiness/demo/demoDataFrame.py
  10. 0 62
      dataAnalysisBusiness/demo/scatter3D_plotly.py
  11. 0 50
      dataAnalysisBusiness/demo/scatter3D_plotly_make_subplots.py
  12. 0 19
      dataAnalysisBusiness/demo/test.py
  13. 0 58
      dataAnalysisBusiness/demo/testBoxLine.py
  14. 0 113
      dataAnalysisBusiness/demo/testDataProcess.py
  15. 0 85
      dataAnalysisBusiness/demo/testDatabase.py
  16. 0 50
      dataAnalysisBusiness/demo/testLargeComponetTemp.py
  17. 0 57
      dataAnalysisBusiness/demo/testLegend.py
  18. 0 12
      dataAnalysisBusiness/demo/testPandas.py
  19. 0 0
      dataContract/algorithmContract/__init__.py
  20. 0 169
      dataContract/algorithmContract/confBusiness.py
  21. 0 14
      dataContract/algorithmContract/configAnalysis.py
  22. 0 3
      dataContract/algorithmContract/const.py
  23. 0 199
      dataContract/algorithmContract/contract.py
  24. 0 35
      dataContract/algorithmContract/customDataContract.py
  25. 0 7
      dataContract/algorithmContract/customFilter.py
  26. 0 7
      dataContract/algorithmContract/dataContractType.py
  27. 0 23
      dataContract/algorithmContract/dataFilter.py
  28. 0 6
      dataContract/algorithmContract/dataSource.py
  29. 0 8
      dataContract/algorithmContract/graphSet.py
  30. 0 107
      dataContract/algorithmContract/testDataContract.py

+ 0 - 0
dataAnalysisBusiness/common/__init__.py


+ 0 - 0
dataAnalysisBusiness/dataContract/__init__.py


+ 0 - 148
dataAnalysisBusiness/dataContract/confBusiness.py

@@ -1,148 +0,0 @@
-import pandas as pd
-from algorithm.utils.jsonUtil.jsonUtil import JsonUtil
-
-# 全局变量
-charset_unify = 'utf-8'
-
-Field_NameOfTurbine = "turbine_name"
-Field_GeneratorTorque = "generator_torque"
-Field_AngleIncluded="angle_included"
-
-
-class ConfBusiness:
-    def __init__(self):
-        self.farm_name = None
-        self.rated_power = None
-        self.rated_WindSpeed = None
-        self.rotor_diameter = None
-        self.density_air = None
-        self.rotational_Speed_Ratio = None
-
-        self.type_name = None
-
-        self.time_period = None            # 时间间隔,单位是秒
-
-        self.output_name = None
-        self.output_prefix = None
-
-        self.turbineInfoFilePathCSV = None  # 风电机组信息
-        self.turbineGuaranteedPowerCurveFilePathCSV = None  # 合同担保功率曲线
-
-        self.input_path = None
-        self.skip_row_number = None  # 跳过的行数
-        self.csvFileNameSplitStringForTurbine = None  # 自文件名中获取机组号的分隔符
-        self.index_turbine = None  # 自文件名中获取机组号的索引
-        self.filter = None
-
-        self.output_path = None
-
-        self.start_time_str = None
-        self.end_time_str = None
-
-        # 将字符串转换为 pd.Timestamp 类型
-        self.start_time = None
-        self.end_time = None
-        self.excludingMonths=None # 排除指定的月份数据 格式%Y-%m
-
-        self.field_turbine_time = None    # 字段名 时间
-        self.field_turbine_name = None    # 字段名 机组名
-
-        self.field_wind_speed = None      # 字段名 风速
-        self.field_power = None           # 字段名 有功功率
-        self.field_pitch_angle1 = None    # 字段名 桨距角1
-        self.field_pitch_angle2 = None    # 字段名 桨距角2
-        self.field_pitch_angle3 = None    # 字段名 桨距角3
-        self.field_turbine_state = None   # 字段名 风机状态
-        self.field_gen_speed = None       # 字段名 发电机转速
-        self.value_gen_speed_min = None       # 值 发电机转速最小
-        self.value_gen_speed_max = None       # 值 发电机转速最大
-        self.field_rotor_speed = None     # 字段名 叶轮转速
-        self.field_torque = None          # 字段名 转矩
-        self.field_wind_dir = None        # 字段名 风向
-        self.field_angle_included = None
-        self.field_nacelle_pos = None     # 字段名 机舱温度
-        self.field_env_temp = None        # 字段名 环境温度
-        self.field_nacelle_temp = None    # 字段名 机舱温度
-        self.field_temperature_large_components = None  # 字段名列表  大部件温度传感器
-
-    def loadConfig(self,jsonFilePath, charset=charset_unify):
-        """
-        配置初始化
-        """
-        # # 使用global声明,表示我们要修改的是全局变量config_data
-        # global farm_name
-
-        # 将配置数据存储在变量中
-        configData = JsonUtil.read_json(jsonFilePath)
-                
-        self.farm_name = configData['name_PowerFarm']
-        self.rated_power = configData['rated_Power_Turbine_Unit_kW']
-        self.rated_WindSpeed = configData["rated_WindSpeed"]
-        self.rotor_diameter = configData['rotor_diameter']
-        self.rotational_Speed_Ratio = configData['rotational_Speed_Ratio']
-        self.density_air = configData['density_air']
-
-        self.type_name = configData['name_Type_For_Analysis']
-        # 时间间隔,单位是秒
-        self.time_period = configData['time_Period_Unit_Second']
-
-        self.output_name = configData['name_Output']
-        self.output_prefix = configData['outputFileDirectory']
-
-        self.turbineInfoFilePathCSV = configData["turbineInfoFilePathCSV"]
-        self.turbineGuaranteedPowerCurveFilePathCSV = configData[
-            "turbineGuaranteedPowerCurveFilePathCSV"]
-        self.input_path = configData['inputFileDirectoryByCSV']
-        self.csvFileNameSplitStringForTurbine = configData["csvFileNameSplitStringForTurbine"]
-        self.index_turbine = configData["index_turbine"]
-        self.skip_row_number = configData['skip_row_number']
-        self.filter = configData['filter']
-
-        self.output_path = self.output_prefix + \
-            r"/{}".format(self.farm_name)
-
-        # start_time_str = '{} 00:00:00'.format(configData['date_Begin'])
-        # end_time_str = '{} 23:59:59'.format(configData['date_End'])
-        self.start_time_str = configData['date_Begin']
-        self.end_time_str = configData['date_End']
-
-        # 将字符串转换为 pd.Timestamp 类型
-        self.start_time = pd.to_datetime(
-            self.start_time_str, format='%Y-%m-%d %H:%M:%S')
-        self.end_time = pd.to_datetime(
-            self.end_time_str, format='%Y-%m-%d %H:%M:%S')
-        self.excludingMonths= configData['excludingMonths']
-
-        self.field_turbine_time = configData['turbine_Time']
-        self.field_turbine_name = configData['turbine_Name']
-
-        self.field_wind_speed = configData['speed_Wind']
-        self.field_power = configData['power_Active']
-        self.field_pitch_angle1 = configData['pitch_Angle1']
-        self.field_pitch_angle2 = configData['pitch_Angle2']
-        self.field_pitch_angle3 = configData['pitch_Angle3']
-        self.field_turbine_state = configData['state_Turbine']
-        self.field_gen_speed = configData['speed_Generator']
-        self.value_gen_speed_min = configData['speed_Generato_min']
-        self.value_gen_speed_max = configData['speed_Generato_max']
-        self.field_rotor_speed = configData['speed_Rotor']
-        self.field_torque = configData['torque']
-        self.field_wind_dir = configData['direction_Wind']
-        self.field_angle_included = configData['angle_included']
-        self.field_nacelle_pos = configData['nacelle_Pos']
-        self.field_env_temp = configData['temperature_Env']
-        self.field_nacelle_temp = configData['temperature_Nacelle']
-        self.field_temperature_large_components = configData['temperature_large_components']
-
-        return self
-        
-    # def add_W_if_starts_with_digit(self,s):  
-    #     if s and s[0].isdigit():  
-    #         return 'W' + s  
-    #     return s  
-    
-    # 定义一个函数,用于检查字符串首字母是否为数字,并在是的情况下添加'W'  
-    def add_W_if_starts_with_digit(self,s):  
-        if isinstance(s, str) and s[0].isdigit():  
-            return 'W' + s  
-        return s  

+ 0 - 450
dataAnalysisBusiness/demo/SCADA_10min_category_0.py

@@ -1,450 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Apr  8 15:01:43 2024
-
-@author: LDDN
-"""
-import math
-import pandas as pd  
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.pyplot import MultipleLocator#设定固定刻度
-
-
-scada_10min = pd.read_csv(r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\82.csv',encoding="utf-8")  #.value是将单元格
-turbine_info = pd.read_csv(r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\info.csv')  #.value是将单元格
-PRated = turbine_info.loc[:,["额定功率"]] #2000
-PRated = PRated.values
-VCutOut = turbine_info.loc[:,["切出风速"]]  #25
-VCutOut = VCutOut.values
-VCutIn = turbine_info.loc[:,["切入风速"]]  #3
-VCutIn = VCutIn.values
-VRated = turbine_info.loc[:,["额定风速"]] #10
-VRated = VRated.values
-
-time_stamp = scada_10min.loc[:,['时间']] #dataframe
-active_power = scada_10min.loc[:,['变频器电网侧有功功率']]
-wind_speed = scada_10min.loc[:,['风速']]
-LM = pd.concat([time_stamp,active_power,wind_speed],axis=1)  #dataframe
-
-
-Labeled_March809 = LM
-APower = Labeled_March809["变频器电网侧有功功率"]  #series读入有功功率
-WSpeed = Labeled_March809["风速"]  #读入风速
-maxP=np.max(APower)
-intervalP=25  #ceil(PRated*0.01)#功率分区间隔为额定功率的1%
-intervalwindspeed=0.25  #风速分区间隔0.25m/s
-
-#初始化
-PNum = 0  
-TopP = 0   
-# 根据条件计算PNum和TopP  
-if maxP >= PRated:  
-    PNum = math.floor(maxP / intervalP) + 1  
-    TopP = math.floor((maxP - PRated) / intervalP) + 1  
-else:  
-    PNum = math.floor(PRated / intervalP)  
-    TopP = 0   
-VNum = math.ceil(VCutOut / intervalwindspeed)  
-  
-SM1 = Labeled_March809.shape
-AA1 = SM1[0]  
-lab = [[0] for _ in range(AA1)]
-lab = pd.DataFrame(lab,columns=['lab'])
-Labeled_March809 = pd.concat([Labeled_March809,lab],axis=1)  #在tpv后加一列标签列
-Labeled_March809 = Labeled_March809.values
-SM = Labeled_March809.shape #(52561,4)
-AA = SM[0]  
-#存储功率大于0的运行数据
-#标识功率为0的点,标识-1
-DzMarch809_0 = np.zeros((AA, 3)) # 初始化数组来存储功率大于零的运行数据  
-nCounter1 = 0 
-Point_line = np.zeros(AA, dtype=int)  
-#考虑到很多功率小于10的数据存在,将<10的功率视为0
-for i in range(AA):
-    if (APower[i] > 10) & (WSpeed[i] > 0):
-        nCounter1 += 1   #共有nCounter1个功率大于0的正常数据
-        DzMarch809_0[nCounter1-1, 0] = WSpeed[i]  
-        DzMarch809_0[nCounter1-1, 1] = APower[i]  
-        Point_line[nCounter1-1] = i+1  # 记录nCounter1记下的数据在原始数据中的位置  
-    if APower[i] <= 10: 
-        Labeled_March809[i,SM[1]-1] = -1  # 功率为0标识为-1  array类型
-# 截取DzMarch809_0中实际存储的数据  其他全为0
-DzMarch809 = DzMarch809_0[:nCounter1, :]  
-#统计各网格落入的散点个数
-XBoxNumber = np.ones((PNum, VNum),dtype=int)  #(86 100)
-nWhichP = 0
-nWhichV = 0
-
-# 循环遍历DzMarch809中的有效数据  
-for i in range(nCounter1):  
-    
-    # 查找功率所在的区间  
-    for m in range(1, PNum + 1):  # 注意Python的range是左闭右开的,所以需要+1  
-        if (DzMarch809[i,1] > (m - 1) * intervalP) and (DzMarch809[i,1] <= m * intervalP):  
-            nWhichP = m  
-            break  
-      
-    # 查找风速所在的区间  
-    for n in range(1, VNum + 1):  # 同样需要+1  
-        if (DzMarch809[i, 0] > (n - 1)*intervalwindspeed) and (DzMarch809[i, 0] <= n*intervalwindspeed):  
-            nWhichV = n  
-            break  
-      
-    # 如果功率和风速都在有效区间内,增加对应网格的计数  
-    if (nWhichP > 0) and (nWhichV > 0):  
-        XBoxNumber[nWhichP - 1, nWhichV - 1] += 1  # 注意Python的索引是从0开始的,所以需要减1  
-# XBoxNumber现在包含了每个网格的计数[PNum行, VNum列]
-
-for m in range(1,PNum+1):
-    for n in range(1,VNum+1):
-        XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
-
-#在功率方向将网格内散点绝对个数转换为相对百分比,备用
-PBoxPercent = np.zeros((PNum, VNum),dtype = float)  #(86 100) #计算后会出现浮点型,所以不能定义int类型
-PBinSum = np.zeros((PNum,1),dtype=int)
-for i in range(1,PNum+1):
-    for m in range(1,VNum+1):
-        PBinSum[i-1] = PBinSum[i-1] + XBoxNumber[i-1,m-1] 
-    for m in range(1,VNum+1):
-        if PBinSum[i-1]>0:
-            PBoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / PBinSum[i-1])*100
-#在风速方向将网格内散点绝对个数转换为相对百分比,备用          
-VBoxPercent = np.zeros((PNum, VNum))  #(86 100) #计算后会出现浮点型,所以不能定义int类型
-VBinSum = np.zeros((VNum,1),dtype=int)
-for i in range(1,VNum+1):
-    for m in range(1,PNum+1):
-        VBinSum[i-1] = VBinSum[i-1] + XBoxNumber[m-1,i-1] 
-    for m in range(1,PNum+1):
-        if VBinSum[i-1]>0:
-            VBoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / VBinSum[i-1])*100
-# VBoxPercent PBoxPercent 左上-右下
-# 将数据颠倒一下  左下-右上         第一行换为倒数第一行 方便可视化
-InvXBoxNumber = np.zeros((PNum,VNum),dtype = int)
-InvPBoxPercent = np.zeros((PNum,VNum),dtype = float)
-InvVBoxPercent = np.zeros((PNum,VNum),dtype = float)
-for m in range(1,PNum+1):
-    for n in range(1,VNum+1):
-        InvXBoxNumber[m-1,n-1] = XBoxNumber[PNum-(m-1)-1,n-1]
-        InvPBoxPercent[m-1,n-1] = PBoxPercent[PNum-(m-1)-1,n-1]
-        InvVBoxPercent[m-1,n-1] = VBoxPercent[PNum-(m-1)-1,n-1]
-
-#以水平功率带方向为准,分析每个水平功率带中,功率主带中心,即找百分比最大的网格位置。
-PBoxMaxIndex = np.zeros((PNum,1),dtype = int)  #水平功率带最大网格位置索引
-PBoxMaxP = np.zeros((PNum,1),dtype = float)       #水平功率带最大网格百分比
-for m in range(1,PNum+1):
-    PBoxMaxIndex[m-1] = np.argmax(PBoxPercent[m-1, :])   #argmax返回最大值的索引
-    PBoxMaxP[m-1] = np.max(PBoxPercent[m-1, :])
-#以垂直风速方向为准,分析每个垂直风速带中,功率主带中心,即找百分比最大的网格位置。
-VBoxMaxIndex = np.zeros((VNum,1),dtype = int)  
-VBoxMaxV = np.zeros((VNum,1),dtype = float)       
-for m in range(1,VNum+1):
-    VBoxMaxIndex[m-1] = np.argmax(VBoxPercent[:, m-1])   
-    VBoxMaxV[m-1] = np.max(VBoxPercent[:, m-1])
-
-#切入风速特殊处理,如果切入风速过于偏右,向左拉回
-if PBoxMaxIndex[0]>14:                     #第一个值对应的是风速最小处 即切入风速
-    PBoxMaxIndex[0] = 9 
-#以水平功率带方向为基准,进行分析
-DotDense = np.zeros(PNum)   #每一水平功率带的功率主带包含的网格数
-DotDenseLeftRight = np.zeros((PNum,2))  #存储每一水平功率带的功率主带以最大网格为中心,向左,向右扩展的网格数
-DotValve = 90  #从中心向左右对称扩展网格的散点百分比和的阈值。
-PDotDenseSum = 0
-for i in range(PNum - TopP):  # 从最下层水平功率带开始,向上分析到特定的功率带  
-    PDotDenseSum = PBoxMaxP[i]  # 以中心最大水平功率带为基准,向左向右对称扩展网格,累加各网格散点百分比  
-    iSpreadRight = 1  
-    iSpreadLeft = 1  
-      
-    while PDotDenseSum < DotValve:  
-        if (PBoxMaxIndex[i] + iSpreadRight) < VNum-1-1:  
-            PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]  # 向右侧扩展  
-            iSpreadRight += 1  
-        else:
-            break  
-          
-        if (PBoxMaxIndex[i] - iSpreadLeft) > 0:  
-            PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]  # 向左侧扩展  
-            iSpreadLeft += 1  
-        else:  
-            break  
-    iSpreadRight = iSpreadRight-1
-    iSpreadLeft = iSpreadLeft-1
-    #向左右扩展完毕
-    DotDenseLeftRight[i, 0] = iSpreadLeft  # 左  
-    DotDenseLeftRight[i, 1] = iSpreadRight  # 右  
-    DotDense[i] = iSpreadLeft + iSpreadRight + 1  # 记录向左向右扩展的个数及每个功率仓内网格的个数  
-# 此时DotDense和DotDenseLeftRight数组已经包含了所需信息    
-#各行功率主带右侧宽度的中位数最具有代表性(因为先右后左)
-DotDenseWidthLeft = np.zeros((PNum-TopP))
-for i in range(PNum-TopP):
-    DotDenseWidthLeft[i] = DotDenseLeftRight[i,1]  #DotDenseLeftRight[i,1]:向右延伸个数
-MainBandRight = np.median(DotDenseWidthLeft) #计算中位数
-
-# 初始化变量  
-PowerLimit = np.zeros(PNum, dtype=int)  # 各水平功率带是否为限功率标识,1:是;0:不是  
-WidthAverage = 0  # 功率主带右侧平均宽度  
-WidthAverage_L = 0  # 功率主带左侧平均宽度  
-WidthVar = 0  # 功率主带方差(此变量在提供的代码中并未使用)  
-PowerLimitValve = 6  # 限功率主带判别阈值  
-N_Pcount = 20  # 阈值  
-  
-nCounterLimit = 0  # 限功率的个数  
-nCounter = 0  # 正常水平功率带的个数  
-  
-# 循环遍历水平功率带,从第1个到第PNum-TopP个  
-for i in range(PNum - TopP):  
-    # 如果向右扩展网格数大于阈值,且该水平功率带点总数大于20,则标记为限功率带  
-    if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):  
-        PowerLimit[i] = 1  
-        nCounterLimit += 1  #限功率的个数
-      
-    # 如果向右扩展网格数小于等于阈值,则累加右侧宽度(左侧宽度在代码中似乎有误)  
-    if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
-        WidthAverage += DotDenseLeftRight[i, 1]  # 统计正常水平功率带右侧宽度
-        WidthAverage_L += DotDenseLeftRight[i,1]   #统计正常水平功率带左侧宽度
-        nCounter += 1  
-# 计算平均宽度  
-WidthAverage /= nCounter if nCounter > 0 else 1  # 避免除以0的情况  
-WidthAverage_L /= nCounter if nCounter > 0 else 1   
-
-#计算正常(即非限功率)水平功率带的功率主带宽度的方差,以此来反映从下到上宽度是否一致
-WidthVar = 0  # 功率主带宽度的方差   
-for i in range(PNum - TopP):  
-    # 如果向右扩展网格数小于等于阈值,则计算当前宽度与平均宽度的差值平方  
-    if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
-        WidthVar += (DotDenseLeftRight[i, 1] - WidthAverage) ** 2  
-# 计算方差(注意:除以nCounter-1是为了得到样本方差)  
-WidthVar = np.sqrt(WidthVar / (nCounter - 1) if nCounter > 1 else 0)  # 避免除以0的情况
-
-#各水平功率带,功率主带的风速范围,右侧扩展网格数*2*0.25
-PowerBandWidth = WidthAverage*intervalwindspeed+WidthAverage_L*intervalwindspeed
-
-# 对限负荷水平功率带的最大网格进行修正  
-for i in range(1, PNum - TopP+1):  
-    if (PowerLimit[i] == 1) and (abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5):  
-        PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1  
-  
-# 输出各层功率主带的左右边界网格索引  
-DotDenseInverse = np.flipud(DotDenseLeftRight)  # 上下翻转数组以得到反向顺序  
-  
-# 计算功率主带的左右边界  
-CurveWidthR = np.ceil(WidthAverage) + 2  # 功率主带的右边界 + 2  
-CurveWidthL = np.ceil(WidthAverage_L) + 2  # 功率主带的左边界 + 2  
-  
-# 网格是否为限功率网格的标识数组  
-BBoxLimit = np.zeros((PNum, VNum), dtype=int)  
-# 标记限功率网格  
-for i in range(2, PNum - TopP):  
-    if PowerLimit[i] == 1:
-        BBoxLimit[i, int(PBoxMaxIndex[i] + CurveWidthR + 1):VNum] = 1
-
-# 初始化数据异常需要剔除的网格标识数组  
-BBoxRemove = np.zeros((PNum, VNum), dtype=int)  
-# 标记需要剔除的网格  
-for m in range(PNum - TopP): 
-    for n in range(int(PBoxMaxIndex[m]) + int(CurveWidthR), VNum):  # 注意Python中的索引从0开始,因此需要减去1  
-        BBoxRemove[m, n] = 1 
-    # 功率主带左侧的超发网格,从最大索引向左直到第一个网格  
-    
-    for n in range(int(PBoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):  # 使用range的步长参数来实现从右向左的迭代  
-        BBoxRemove[m, n-1] = 2  # 注意Python中的索引从0开始,因此需要减去1
-
-# 初始化变量  
-CurveTop = np.zeros((2, 1), dtype=int)  
-CurveTopValve = 1  # 网格的百分比阈值  
-BTopFind = 0  
-mm = 0  
-#确定功率主带的左上拐点,即额定风速位置的网格索引
-CurveTop = np.zeros((2, 1), dtype=int)  
-CurveTopValve = 1  # 网格的百分比阈值  
-BTopFind = 0  
-mm = 0   
-for m in range(PNum - TopP, 0, -1):  # 注意Python的range是左闭右开区间,所以这里从PNum-TopP开始到1(不包括0)  
-    for n in range(int(np.floor(int(VCutIn) / intervalwindspeed)), VNum - 1):  # 使用floor函数来向下取整  
-        if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):   
-            CurveTop[0] = m  
-            CurveTop[1] = n  #[第80个,第40个]
-            BTopFind = 1  
-            mm = m  # mm是拐点所在功率仓,对应其index
-            break  # 找到后退出内层循环  
-    if BTopFind == 1:  
-        break  # 找到后退出外层循环
-        
-IsolateValve = 3  #功率主带右侧孤立点占比功率仓阈值 3%
-# 遍历功率仓和网格  
-for m in range(PNum - TopP):    
-    for n in range(int(PBoxMaxIndex[m]) + int(CurveWidthR), VNum):  
-        # 检查PBoxPercent是否小于阈值,如果是,则标记BBoxRemove为1  
-        if PBoxPercent[m, n] < IsolateValve:   
-            BBoxRemove[m, n] = 1
-#功率主带顶部宽度
-CurveWidthT = np.floor((maxP - PRated) / intervalP) + 1  
-# 标记额定功率以上的超发点(PNum-PTop之间)  
-for m in range(PNum - TopP, PNum):   
-    for n in range(VNum):  
-        BBoxRemove[m, n] = 3
-  
-# 标记功率主带拐点左侧的欠发网格  
-for m in range(mm-1, PNum - TopP): 
-    for n in range(int(CurveTop[1]) - 2):
-        BBoxRemove[m, n] = 2    # BBoxRemove数组现在包含了根据条件标记的超发点和欠发网格的信息
-
-#以网格的标识,决定该网格内数据的标识。
-# DzMarch809Sel数组现在包含了每个数据点的标识
-DzMarch809Sel = np.zeros(nCounter1, dtype=int)  # 初始化标识数组   
-nWhichP = 0  
-nWhichV = 0  
-nBadA = 0   
-for i in range(nCounter1):  
-    for m in range( PNum ):   
-        if (DzMarch809[i, 1] > m * intervalP) and (DzMarch809[i, 1] <= (m+1) * intervalP):  
-            nWhichP = m  #m记录的是index
-            break  
-    for n in range( VNum ):  # 注意Python的range是左闭右开区间,所以这里到VNum+1  
-        if DzMarch809[i, 0] > ((n+1) * intervalwindspeed - intervalwindspeed/2) and DzMarch809[i, 0] <= ((n+1) * intervalwindspeed + intervalwindspeed / 2):  
-            nWhichV = n  #index
-            break  
-    if nWhichP >= 0 and nWhichV >= 0:  
-        if BBoxRemove[nWhichP, nWhichV] == 1:   
-            DzMarch809Sel[i] = 1  
-            nBadA += 1  
-        elif BBoxRemove[nWhichP, nWhichV] == 2:  
-            DzMarch809Sel[i] = 2  
-        elif BBoxRemove[nWhichP , nWhichV] == 3:  
-            DzMarch809Sel[i] = 0  # 额定风速以上的超发功率点认为是正常点,不再标识  
-# DzMarch809Sel数组现在包含了每个数据点的标识
-
-##############################滑动窗口方法
-# 存储限负荷数据  
-PVLimit = np.zeros((nCounter1, 3))  #存储限负荷数据  %第3列用于存储限电的点所在的行数
-nLimitTotal = 0  
-nWindowLength = 6   #滑动窗口长度设置为6
-LimitWindow = np.zeros(nWindowLength)  #滑动窗口空列表
-UpLimit = 0    #上限
-LowLimit = 0   #下限
-PowerStd = 30  # 功率波动方差  
-nWindowNum = np.floor(nCounter1/nWindowLength) #6587
-PowerLimitUp = PRated - 100  
-PowerLimitLow = 100  
-
-# 循环遍历每个窗口  
-for i in range(int(nWindowNum)):  
-    start_idx = i * nWindowLength  
-    end_idx = start_idx + nWindowLength  
-    LimitWindow = DzMarch809[start_idx:end_idx, 1]  # 提取当前窗口的数据  
-      
-    # 检查窗口内所有数据是否在功率范围内  
-    bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)  
-    if not bAllInAreas:  
-        continue  
-      
-    # 计算方差上下限  
-    UpLimit = LimitWindow[0] + PowerStd  
-    LowLimit = LimitWindow[0] - PowerStd  
-      
-    # 检查窗口内数据是否在方差范围内  
-    bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)  
-    if bAllInUpLow:  
-        # 标识窗口内的数据为限负荷数据  
-        DzMarch809Sel[start_idx:end_idx] = 4  
-          
-        # 存储限负荷数据  
-        for j in range(nWindowLength):  
-            PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]  
-            PVLimit[nLimitTotal, 2] = Point_line[start_idx + j]  # 对数据进行标识  
-            nLimitTotal += 1  
-# PVLimit现在包含了限负荷数据,nLimitTotal是限负荷数据的总数
-
-
-#将功率滑动窗口主带平滑化
-# 初始化锯齿平滑的计数器  
-nSmooth = 0  
-# 遍历除了最后 TopP+1 个元素之外的所有 PBoxMaxIndex 元素  
-for i in range(PNum - TopP - 1):  
-    PVLeftDown = np.zeros(2)  
-    PVRightUp = np.zeros(2)  
-    # 检查当前与下一个 PBoxMaxIndex 之间的距离是否大于等于1  
-    if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:  
-        # 计算左下和右上顶点的坐标  
-        PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125  
-        PVLeftDown[1] = (i) * 25  
-        PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125  
-        PVRightUp[1] = (i+1) * 25  
-          
-        # 遍历 DzMarch809 数组  
-        for m in range(nCounter1):  
-            # 检查当前点是否在锯齿区域内  
-            if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
-                # 检查斜率是否大于对角连线  
-                if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0]):
-                    # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器  
-                    DzMarch809Sel[m] = 0  
-                    nSmooth += 1  
-# DzMarch809Sel 数组现在+包含了锯齿平滑的选择结果,nSmooth 是选中的点数
-
-
-###################################存储数据
-# 存储好点  
-nCounterPV = 0  # 初始化计数器  
-PVDot = np.zeros((nCounter1, 3))  # 初始化存储好点的数组  nCounter1是p>0的数
-for i in range(nCounter1):  
-    if DzMarch809Sel[i] == 0:  
-        nCounterPV += 1  
-        PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]  
-        PVDot[nCounterPV-1, 2] = Point_line[i]  # 好点 Point_line记录nCounter1在原始数据中的位置 
-nCounterVP = nCounterPV  
- 
-# 对所有数据中的好点进行标注    
-for i in range(nCounterVP):  
-    Labeled_March809[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1  # 注意Python的索引从0开始,并且需要转换为整数索引  
- 
-# 存储坏点  
-nCounterBad = 0  # 初始化计数器  
-PVBad = np.zeros((nCounter1, 3))  # 初始化存储坏点的数组  
-for i in range(nCounter1):  
-    if DzMarch809Sel[i] in [1, 2, 3]:  
-        nCounterBad += 1  
-        PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]  
-        PVBad[nCounterBad-1, 2] = Point_line[i]  
-    
-# 对所有数据中的坏点进行标注  
-for i in range(nCounterBad):  
-    Labeled_March809[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5  # 坏点标识  
-
-# 对所有数据中的限电点进行标注   
-for i in range(nLimitTotal):  
-    Labeled_March809[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4  # 限电点标识  
-# 对所有的数据点进行标注  
-# Labeled_March809是array,提取所第四列的值保存为dataframe
-A = Labeled_March809[:,3]
-A=pd.DataFrame(A,columns=['lab'])
-
-
-mergedTable = pd.concat([scada_10min,A],axis=1)#合并dataframe
-D = mergedTable[mergedTable['lab'] == 1]#选择为1的行
-
-ws = D["风速"].values  #array
-ap = D["变频器电网侧有功功率"]
-
-# fig=plt.figure(figsize=(10,6),dpi=500)  #figsize是图形大小,dpi像素
-fig=plt.figure()  #figsize是图形大小,dpi像素
-plt.scatter(ws,ap,s=1,c='black',marker='.') #'.'比'o'要更小
-
-# plt.scatter(x2,y2,s=10,c='b',marker='.',label='5.8-6.5建模噪声点')
-
-x_major_locator=MultipleLocator(5)
-y_major_locator=MultipleLocator(500)
-ax=plt.gca()
-ax.xaxis.set_major_locator(x_major_locator)
-ax.yaxis.set_major_locator(y_major_locator)
-plt.xlim((0,30))
-plt.ylim((0,2200))
-plt.tick_params(labelsize=8)
-
-# plt.grid(c='dimgray',alpha=0.2)
-
-plt.xlabel("V/(m$·$s$^{-1}$)",fontsize=8)
-plt.ylabel("P/kW",fontsize=8)
-
-# plt.savefig(r'D:\赵雅丽\研究生学习资料\学习资料\劣化度健康度\spyder\大论文\图\风速-功率.jpg',bbox_inches='tight')
-plt.show()

+ 0 - 507
dataAnalysisBusiness/demo/SCADA_10min_category_1.py

@@ -1,507 +0,0 @@
-import os
-import re
-import math
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.pyplot import MultipleLocator#设定固定刻度
-
-def scada_10min_category():
-    turbine_number=24
-    
-    fpath = 'D:/赵雅丽/实习/算法/min_scada_LuoTuoGou/72/'
-    # 定义一个正则表达式来匹配纯数字文件名且扩展名为.csv  
-    pattern = re.compile(r'^\d+\.csv$')
-    
-    # 列出指定路径下的所有文件和文件夹  
-    files_in_dir = os.listdir(fpath)
-    for file in files_in_dir:  
-        # 使用正则表达式匹配文件名  
-        if pattern.match(file):  
-            # 拼接文件的完整路径  
-            fname = os.path.join(fpath, file)
-            # 读取csv文件,保持原始变量名而不忽略任何行
-            scada_10min = pd.read_csv(fname)
-        
-            # 显示数据        
-            time_stamp = scada_10min.loc[:,['时间']] #dataframe
-            active_power = scada_10min.loc[:,['变频器电网侧有功功率']]
-            wind_speed = scada_10min.loc[:,['风速']]
-            LM = pd.concat([time_stamp,active_power,wind_speed],axis=1)  #dataframe
-            # lm=LM.values #array
-    
-            xx = data_label(LM,fpath)#dataframe
-            mergedTable = pd.concat([scada_10min,xx],axis=1)#合并dataframe
-            D = mergedTable[mergedTable['lab'] == 1]#选择为1的行
-            ws = D["风速"]#series
-            ap = D["变频器电网侧有功功率"]
-            ##绘图
-            # fig = plt.figure(figsize=(10,6),dpi=500)  #figsize是图形大小,dpi像素
-            plt.scatter(ws,ap,s=8,c='black',marker='.',label='好点')
-            # x_major_locator=MultipleLocator(5)
-            # y_major_locator=MultipleLocator(500)
-            # ax=plt.gca()
-            # ax.xaxis.set_major_locator(x_major_locator)
-            # ax.yaxis.set_major_locator(y_major_locator)
-            # plt.xlim((0,30))
-            # plt.ylim((0,2200))
-            # plt.tick_params(labelsize=20)
-            # # plt.grid(c='dimgray',alpha=0.2)
-            # plt.xlabel("V/(m$·$s$^{-1}$)",fontsize=20)
-            # plt.ylabel("P/kW",fontsize=20)
-
-            # # plt.savefig(r'D:\赵雅丽\研究生学习资料\学习资料\劣化度健康度\spyder\大论文\图\风速-功率.jpg',bbox_inches='tight')
-            # plt.show()
-        
-        
-        
-def data_label(x1,x2):   # LM:T P V  path:文件获取路径
-    fpath2 = x2
-    fname2 = os.path.join(fpath2, "info.csv") #读取数据文件2(额定风速额定功率等)
-    # 参数na_filter=False仅阻止了pandas自动检测这些缺失值,并不能忽略  
-    # 但请注意,pandas没有直接的'omitrow'选项,如果需要忽略包含缺失值的行,需要在后续处理中处理
-    turbine_info = pd.read_csv(fname2, na_filter=False)
-    # 删除包含任何缺失值的行  
-    turbine_info = turbine_info.dropna() 
-    
-    PRated = turbine_info.loc[:,["额定功率"]] #dataframe
-    VCutOut = turbine_info.loc[:,["切出风速"]]  
-    VCutIn = turbine_info.loc[:,["切入风速"]]  
-    VRated = turbine_info.loc[:,["额定风速"]]
-    
-    #网格法确定风速风向分区数量,功率方向分区数量
-    Labeled_March809 = x1
-    APower = Labeled_March809["active_power"]  #series读入有功功率
-    WSpeed = Labeled_March809["wind_speed"]  #读入风速
-    maxP=np.max(APower)
-    intervalP=25  #ceil(PRated*0.01)#功率分区间隔为额定功率的1%
-    intervalwindspeed=0.25  #风速分区间隔0.25m/s
-    #初始化
-    PNum = 0  
-    TopP = 0   
-    # 根据条件计算PNum和TopP  
-    if maxP >= PRated:  
-        PNum = math.floor(maxP / intervalP) + 1  
-        TopP = math.floor((maxP - PRated) / intervalP) + 1  
-    else:  
-        PNum = math.floor(PRated / intervalP)  
-        TopP = 0   
-    VNum = math.ceil(VCutOut / intervalwindspeed)  
-    SM1 = Labeled_March809.shape  
-    AA1 = SM1[0]  #运行数据的条数
-    lab = [[0] for _ in range(AA1)]  #创建全0空列表
-    lab = pd.DataFrame(lab,columns=['lab'])
-    Labeled_March809 = pd.concat([Labeled_March809,lab],axis=1)  #在tpv后加一列标签列
-    SM = Labeled_March809.shape #(52561,4)
-    AA = SM[0]  
-    #存储功率大于0的运行数据
-    #标识功率为0的点,标识-1
-    DzMarch809_0 = np.zeros(AA, 3)  #array(52561,3)
-    nCounter1 = 1
-    Point_line=np.zeros(AA,1)
-    #考虑到很多功率小于10的数据存在,将<10的功率视为0
-    for i in range(AA):
-        if (APower[i] > 10) & (WSpeed[i] > 0):
-            nCounter1 += 1   #共有nCounter1个功率大于0的正常数据
-            DzMarch809_0[nCounter1-1, 0] = WSpeed[i]  
-            DzMarch809_0[nCounter1-1, 1] = APower[i]  
-            Point_line[nCounter1-1] = i+1  # 记录nCounter1记下的数据在原始数据中的位置  
-        if APower[i] <= 10: 
-            Labeled_March809[i,SM[1]-1] = -1  # 功率为0标识为-1  array类型
-    # 截取DzMarch809_0中实际存储的数据  其他全为0
-    DzMarch809 = DzMarch809_0[:nCounter1, :]  
-    #统计各网格落入的散点个数
-    XBoxNumber = np.ones((PNum, VNum),dtype=int)  #(86 100)
-    nWhichP = 0
-    nWhichV = 0
-
-    # 循环遍历DzMarch809中的有效数据  
-    for i in range(nCounter1):  
-        
-        # 查找功率所在的区间  
-        for m in range(1, PNum + 1):  # 注意Python的range是左闭右开的,所以需要+1  
-            if (DzMarch809[i,1] > (m - 1) * intervalP) and (DzMarch809[i,1] <= m * intervalP):  
-                nWhichP = m  
-                break  
-          
-        # 查找风速所在的区间  
-        for n in range(1, VNum + 1):  # 同样需要+1  
-            if (DzMarch809[i, 0] > (n - 1)*intervalwindspeed) and (DzMarch809[i, 0] <= n*intervalwindspeed):  
-                nWhichV = n  
-                break  
-          
-        # 如果功率和风速都在有效区间内,增加对应网格的计数  
-        if (nWhichP > 0) and (nWhichV > 0):  
-            XBoxNumber[nWhichP - 1, nWhichV - 1] += 1  # 注意Python的索引是从0开始的,所以需要减1  
-    # XBoxNumber现在包含了每个网格的计数[PNum行, VNum列]
-
-    for m in range(1,PNum+1):
-        for n in range(1,VNum+1):
-            XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
-
-    #在功率方向将网格内散点绝对个数转换为相对百分比,备用
-    PBoxPercent = np.zeros((PNum, VNum))  #(86 100) #计算后会出现浮点型,所以不能定义int类型
-    PBinSum = np.zeros((PNum,1),dtype=int)
-    for i in range(1,PNum+1):
-        for m in range(1,VNum+1):
-            PBinSum[i-1] = PBinSum[i-1] + XBoxNumber[i-1,m-1] 
-        for m in range(1,VNum+1):
-            if PBinSum[i-1]>0:
-                PBoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / PBinSum[i-1])*100
-    #在风速方向将网格内散点绝对个数转换为相对百分比,备用          
-    VBoxPercent = np.zeros((PNum, VNum))  #(86 100) #计算后会出现浮点型,所以不能定义int类型
-    VBinSum = np.zeros((VNum,1),dtype=int)
-    for i in range(1,VNum+1):
-        for m in range(1,PNum+1):
-            VBinSum[i-1] = VBinSum[i-1] + XBoxNumber[m-1,i-1] 
-        for m in range(1,PNum+1):
-            if VBinSum[i-1]>0:
-                VBoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / VBinSum[i-1])*100
-    # VBoxPercent PBoxPercent 左上-右下
-    # 将数据颠倒一下  左下-右上         第一行换为倒数第一行 方便可视化
-    InvXBoxNumber = np.zeros((PNum,VNum),dtype = int)
-    InvPBoxPercent = np.zeros((PNum,VNum),dtype = float)
-    InvVBoxPercent = np.zeros((PNum,VNum),dtype = float)
-    for m in range(1,PNum+1):
-        for n in range(1,VNum+1):
-            InvXBoxNumber[m-1,n-1] = XBoxNumber[PNum-(m-1)-1,n-1]
-            InvPBoxPercent[m-1,n-1] = PBoxPercent[PNum-(m-1)-1,n-1]
-            InvVBoxPercent[m-1,n-1] = VBoxPercent[PNum-(m-1)-1,n-1]
-    
-    #以水平功率带方向为准,分析每个水平功率带中,功率主带中心,即找百分比最大的网格位置。
-    PBoxMaxIndex = np.zeros((PNum,1),dtype = int)  #水平功率带最大网格位置索引
-    PBoxMaxP = np.zeros((PNum,1),dtype = float)       #水平功率带最大网格百分比
-    for m in range(1,PNum+1):
-        PBoxMaxIndex[m-1] = np.argmax(PBoxPercent[m-1, :])   #argmax返回最大值的索引
-        PBoxMaxP[m-1] = np.max(PBoxPercent[m-1, :])
-    #以垂直风速方向为准,分析每个垂直风速带中,功率主带中心,即找百分比最大的网格位置。
-    VBoxMaxIndex = np.zeros((VNum,1),dtype = int)  
-    VBoxMaxV = np.zeros((VNum,1),dtype = float)       
-    for m in range(1,VNum+1):
-        VBoxMaxIndex[m-1] = np.argmax(VBoxPercent[:, m-1])   
-        VBoxMaxV[m-1] = np.max(VBoxPercent[:, m-1])
-    
-    #切入风速特殊处理,如果切入风速过于偏右,向左拉回
-    if PBoxMaxIndex[0]>14:                     #第一个值对应的是风速最小处 即切入风速
-        PBoxMaxIndex[0] = 9 
-    #以水平功率带方向为基准,进行分析
-    DotDense = np.zeros(PNum)   #每一水平功率带的功率主带包含的网格数
-    DotDenseLeftRight = np.zeros((PNum,2))  #存储每一水平功率带的功率主带以最大网格为中心,向左,向右扩展的网格数
-    DotValve = 90  #从中心向左右对称扩展网格的散点百分比和的阈值。
-    PDotDenseSum = 0
-    for i in range(PNum - TopP):  # 从最下层水平功率带开始,向上分析到特定的功率带  
-        PDotDenseSum = PBoxMaxP[i]  # 以中心最大水平功率带为基准,向左向右对称扩展网格,累加各网格散点百分比  
-        iSpreadRight = 1  
-        iSpreadLeft = 1  
-          
-        while PDotDenseSum < DotValve:  
-            if (PBoxMaxIndex[i] + iSpreadRight) < VNum-1-1:  
-                PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]  # 向右侧扩展  
-                iSpreadRight += 1  
-            else:
-                break  
-              
-            if (PBoxMaxIndex[i] - iSpreadLeft) > 0:  
-                PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]  # 向左侧扩展  
-                iSpreadLeft += 1  
-            else:  
-                break  
-        iSpreadRight = iSpreadRight-1
-        iSpreadLeft = iSpreadLeft-1
-        #向左右扩展完毕
-        DotDenseLeftRight[i, 0] = iSpreadLeft  # 左  
-        DotDenseLeftRight[i, 1] = iSpreadRight  # 右  
-        DotDense[i] = iSpreadLeft + iSpreadRight + 1  # 记录向左向右扩展的个数及每个功率仓内网格的个数  
-    # 此时DotDense和DotDenseLeftRight数组已经包含了所需信息    
-    #各行功率主带右侧宽度的中位数最具有代表性(因为先右后左)
-    DotDenseWidthLeft = np.zeros((PNum-TopP))
-    for i in range(PNum-TopP):
-        DotDenseWidthLeft[i] = DotDenseLeftRight[i,1]  #DotDenseLeftRight[i,1]:向右延伸个数
-    MainBandRight = np.median(DotDenseWidthLeft) #计算中位数
-    
-    # 初始化变量  
-    PowerLimit = np.zeros(PNum, dtype=int)  # 各水平功率带是否为限功率标识,1:是;0:不是  
-    WidthAverage = 0  # 功率主带右侧平均宽度  
-    WidthAverage_L = 0  # 功率主带左侧平均宽度  
-    WidthVar = 0  # 功率主带方差(此变量在提供的代码中并未使用)  
-    PowerLimitValve = 6  # 限功率主带判别阈值  
-    N_Pcount = 20  # 阈值  
-      
-    nCounterLimit = 0  # 限功率的个数  
-    nCounter = 0  # 正常水平功率带的个数  
-      
-    # 循环遍历水平功率带,从第1个到第PNum-TopP个  
-    for i in range(PNum - TopP):  
-        # 如果向右扩展网格数大于阈值,且该水平功率带点总数大于20,则标记为限功率带  
-        if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):  
-            PowerLimit[i] = 1  
-            nCounterLimit += 1  #限功率的个数
-          
-        # 如果向右扩展网格数小于等于阈值,则累加右侧宽度(左侧宽度在代码中似乎有误)  
-        if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
-            WidthAverage += DotDenseLeftRight[i, 1]  # 统计正常水平功率带右侧宽度
-            WidthAverage_L += DotDenseLeftRight[i,1]   #统计正常水平功率带左侧宽度
-            nCounter += 1  
-    # 计算平均宽度  
-    WidthAverage /= nCounter if nCounter > 0 else 1  # 避免除以0的情况  
-    WidthAverage_L /= nCounter if nCounter > 0 else 1   
-
-    #计算正常(即非限功率)水平功率带的功率主带宽度的方差,以此来反映从下到上宽度是否一致
-    WidthVar = 0  # 功率主带宽度的方差   
-    for i in range(PNum - TopP):  
-        # 如果向右扩展网格数小于等于阈值,则计算当前宽度与平均宽度的差值平方  
-        if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
-            WidthVar += (DotDenseLeftRight[i, 1] - WidthAverage) ** 2  
-    # 计算方差(注意:除以nCounter-1是为了得到样本方差)  
-    WidthVar = np.sqrt(WidthVar / (nCounter - 1) if nCounter > 1 else 0)  # 避免除以0的情况
-
-    #各水平功率带,功率主带的风速范围,右侧扩展网格数*2*0.25
-    PowerBandWidth = WidthAverage*intervalwindspeed+WidthAverage_L*intervalwindspeed
-
-    # 对限负荷水平功率带的最大网格进行修正  
-    for i in range(1, PNum - TopP+1):  
-        if (PowerLimit[i] == 1) and (abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5):  
-            PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1  
-      
-    # 输出各层功率主带的左右边界网格索引  
-    DotDenseInverse = np.flipud(DotDenseLeftRight)  # 上下翻转数组以得到反向顺序  
-      
-    # 计算功率主带的左右边界  
-    CurveWidthR = np.ceil(WidthAverage) + 2  # 功率主带的右边界 + 2  
-    CurveWidthL = np.ceil(WidthAverage_L) + 2  # 功率主带的左边界 + 2  
-      
-    # 网格是否为限功率网格的标识数组  
-    BBoxLimit = np.zeros((PNum, VNum), dtype=int)  
-    # 标记限功率网格  
-    for i in range(2, PNum - TopP):  
-        if PowerLimit[i] == 1:
-            BBoxLimit[i, int(PBoxMaxIndex[i] + CurveWidthR + 1):VNum] = 1
-
-    # 初始化数据异常需要剔除的网格标识数组  
-    BBoxRemove = np.zeros((PNum, VNum), dtype=int)  
-    # 标记需要剔除的网格  
-    for m in range(PNum - TopP): 
-        for n in range(int(PBoxMaxIndex[m]) + int(CurveWidthR), VNum):  # 注意Python中的索引从0开始,因此需要减去1  
-            BBoxRemove[m, n] = 1 
-        # 功率主带左侧的超发网格,从最大索引向左直到第一个网格  
-        
-        for n in range(int(PBoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):  # 使用range的步长参数来实现从右向左的迭代  
-            BBoxRemove[m, n-1] = 2  # 注意Python中的索引从0开始,因此需要减去1
-
-    # 初始化变量  
-    CurveTop = np.zeros((2, 1), dtype=int)  
-    CurveTopValve = 1  # 网格的百分比阈值  
-    BTopFind = 0  
-    mm = 0  
-    #确定功率主带的左上拐点,即额定风速位置的网格索引
-    CurveTop = np.zeros((2, 1), dtype=int)  
-    CurveTopValve = 1  # 网格的百分比阈值  
-    BTopFind = 0  
-    mm = 0   
-    for m in range(PNum - TopP, 0, -1):  # 注意Python的range是左闭右开区间,所以这里从PNum-TopP开始到1(不包括0)  
-        for n in range(int(np.floor(int(VCutIn) / intervalwindspeed)), VNum - 1):  # 使用floor函数来向下取整  
-            if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):   
-                CurveTop[0] = m  
-                CurveTop[1] = n  #[第80个,第40个]
-                BTopFind = 1  
-                mm = m  # mm是拐点所在功率仓,对应其index
-                break  # 找到后退出内层循环  
-        if BTopFind == 1:  
-            break  # 找到后退出外层循环
-            
-    IsolateValve = 3  #功率主带右侧孤立点占比功率仓阈值 3%
-    # 遍历功率仓和网格  
-    for m in range(PNum - TopP):    
-        for n in range(int(PBoxMaxIndex[m]) + int(CurveWidthR), VNum):  
-            # 检查PBoxPercent是否小于阈值,如果是,则标记BBoxRemove为1  
-            if PBoxPercent[m, n] < IsolateValve:   
-                BBoxRemove[m, n] = 1
-    #功率主带顶部宽度
-    CurveWidthT = np.floor((maxP - PRated) / intervalP) + 1  
-    # 标记额定功率以上的超发点(PNum-PTop之间)  
-    for m in range(PNum - TopP, PNum):   
-        for n in range(VNum):  
-            BBoxRemove[m, n] = 3
-      
-    # 标记功率主带拐点左侧的欠发网格  
-    for m in range(mm-1, PNum - TopP): 
-        for n in range(int(CurveTop[1]) - 2):
-            BBoxRemove[m, n] = 2    # BBoxRemove数组现在包含了根据条件标记的超发点和欠发网格的信息
-
-    #以网格的标识,决定该网格内数据的标识。
-    # DzMarch809Sel数组现在包含了每个数据点的标识
-    DzMarch809Sel = np.zeros(nCounter1, dtype=int)  # 初始化标识数组   
-    nWhichP = 0  
-    nWhichV = 0  
-    nBadA = 0   
-    for i in range(nCounter1):  
-        for m in range( PNum ):   
-            if (DzMarch809[i, 1] > m * intervalP) and (DzMarch809[i, 1] <= (m+1) * intervalP):  
-                nWhichP = m  #m记录的是index
-                break  
-        for n in range( VNum ):  # 注意Python的range是左闭右开区间,所以这里到VNum+1  
-            if DzMarch809[i, 0] > ((n+1) * intervalwindspeed - intervalwindspeed/2) and DzMarch809[i, 0] <= ((n+1) * intervalwindspeed + intervalwindspeed / 2):  
-                nWhichV = n  #index
-                break  
-        if nWhichP >= 0 and nWhichV >= 0:  
-            if BBoxRemove[nWhichP, nWhichV] == 1:   
-                DzMarch809Sel[i] = 1  
-                nBadA += 1  
-            elif BBoxRemove[nWhichP, nWhichV] == 2:  
-                DzMarch809Sel[i] = 2  
-            elif BBoxRemove[nWhichP , nWhichV] == 3:  
-                DzMarch809Sel[i] = 0  # 额定风速以上的超发功率点认为是正常点,不再标识  
-    # DzMarch809Sel数组现在包含了每个数据点的标识
-
-    ##############################滑动窗口方法
-    # 存储限负荷数据  
-    PVLimit = np.zeros((nCounter1, 3))  #存储限负荷数据  %第3列用于存储限电的点所在的行数
-    nLimitTotal = 0  
-    nWindowLength = 6   #滑动窗口长度设置为6
-    LimitWindow = np.zeros(nWindowLength)  #滑动窗口空列表
-    UpLimit = 0    #上限
-    LowLimit = 0   #下限
-    PowerStd = 30  # 功率波动方差  
-    nWindowNum = np.floor(nCounter1/nWindowLength) #6587
-    PowerLimitUp = PRated - 100  
-    PowerLimitLow = 100  
-
-    # 循环遍历每个窗口  
-    for i in range(int(nWindowNum)):  
-        start_idx = i * nWindowLength  
-        end_idx = start_idx + nWindowLength  
-        LimitWindow = DzMarch809[start_idx:end_idx, 1]  # 提取当前窗口的数据  
-          
-        # 检查窗口内所有数据是否在功率范围内  
-        bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)  
-        if not bAllInAreas:  
-            continue  
-          
-        # 计算方差上下限  
-        UpLimit = LimitWindow[0] + PowerStd  
-        LowLimit = LimitWindow[0] - PowerStd  
-          
-        # 检查窗口内数据是否在方差范围内  
-        bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)  
-        if bAllInUpLow:  
-            # 标识窗口内的数据为限负荷数据  
-            DzMarch809Sel[start_idx:end_idx] = 4  
-              
-            # 存储限负荷数据  
-            for j in range(nWindowLength):  
-                PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]  
-                PVLimit[nLimitTotal, 2] = Point_line[start_idx + j]  # 对数据进行标识  
-                nLimitTotal += 1  
-    # PVLimit现在包含了限负荷数据,nLimitTotal是限负荷数据的总数
-    
-    
-    #将功率滑动窗口主带平滑化
-    # 初始化锯齿平滑的计数器  
-    nSmooth = 0  
-    # 遍历除了最后 TopP+1 个元素之外的所有 PBoxMaxIndex 元素  
-    for i in range(PNum - TopP - 1):  
-        PVLeftDown = np.zeros(2)  
-        PVRightUp = np.zeros(2)  
-        # 检查当前与下一个 PBoxMaxIndex 之间的距离是否大于等于1  
-        if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:  
-            # 计算左下和右上顶点的坐标  
-            PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125  
-            PVLeftDown[1] = (i) * 25  
-            PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125  
-            PVRightUp[1] = (i+1) * 25  
-              
-            # 遍历 DzMarch809 数组  
-            for m in range(nCounter1):  
-                # 检查当前点是否在锯齿区域内  
-                if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
-                    # 检查斜率是否大于对角连线  
-                    if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0]):
-                        # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器  
-                        DzMarch809Sel[m] = 0  
-                        nSmooth += 1  
-    # DzMarch809Sel 数组现在+包含了锯齿平滑的选择结果,nSmooth 是选中的点数
-    ###################################存储数据
-    # 存储好点  
-    nCounterPV = 0  # 初始化计数器  
-    PVDot = np.zeros((nCounter1, 3))  # 初始化存储好点的数组  nCounter1是p>0的数
-    for i in range(nCounter1):  
-        if DzMarch809Sel[i] == 0:  
-            nCounterPV += 1  
-            PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]  
-            PVDot[nCounterPV-1, 2] = Point_line[i]  # 好点 Point_line记录nCounter1在原始数据中的位置 
-    nCounterVP = nCounterPV  
-     
-    # 对所有数据中的好点进行标注    
-    for i in range(nCounterVP):  
-        Labeled_March809[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1  # 注意Python的索引从0开始,并且需要转换为整数索引  
-     
-    # 存储坏点  
-    nCounterBad = 0  # 初始化计数器  
-    PVBad = np.zeros((nCounter1, 3))  # 初始化存储坏点的数组  
-    for i in range(nCounter1):  
-        if DzMarch809Sel[i] in [1, 2, 3]:  
-            nCounterBad += 1  
-            PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]  
-            PVBad[nCounterBad-1, 2] = Point_line[i]  
-        
-    # 对所有数据中的坏点进行标注  
-    for i in range(nCounterBad):  
-        Labeled_March809[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5  # 坏点标识  
-
-    # 对所有数据中的限电点进行标注   
-    for i in range(nLimitTotal):  
-        Labeled_March809[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4  # 限电点标识  
-
-    # 对所有的数据点进行标注  
-    # Labeled_March809是array,提取所第四列的值保存为dataframe
-    A = Labeled_March809[:,3]
-    A=pd.DataFrame(A,columns=['lab'])
-    return A
-
-
-# scada_10min_category()
- 
-
-
-
-    
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-    
-
-
-

+ 0 - 193
dataAnalysisBusiness/demo/SCADA_10min_category_2.py

@@ -1,193 +0,0 @@
-import os
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.pyplot import MultipleLocator
-import math
-
-
-intervalPower = 25  # For example
-intervalWindspeed = 0.25  # For example
-
-fieldRatedPower="额定功率"
-fieldRatedWindSpeed="额定风速"
-fieldWindSpeedCutIn="切入风速"
-fieldWindSpeedCutOut="切出风速"
-
-fieldTime="时间"
-fieldWindSpeed="风速"
-fieldActivePower="变频器电网侧有功功率"
-fieldLabel="lab"
-
-# 1. 数据加载和预处理函数
-def loadData(filePathSCADA:str, filePathTurbineInfo:str):
-    dataFrameSCADA = pd.read_csv(filePathSCADA, encoding="utf-8")
-    dataFrameTurbineInfo = pd.read_csv(filePathTurbineInfo)
-    return dataFrameSCADA, dataFrameTurbineInfo
-
-def extractTurbineParameters(turbineInfo:pd.DataFrame):
-    """
-    解析风电机组参数 
-
-    参数:
-        turbineInfo 风电机组信息DataFrame
-
-    返回:
-        PRated 额定功率(kw)
-        VCutOut 切出风速(m/s)
-        VCutIn 切入风速(m/s)
-        VRated 额定风速(m/s)
-    """
-    ratedPower = turbineInfo.loc[:, [fieldRatedPower]].values
-    windSpeedCutIn = turbineInfo.loc[:, [fieldWindSpeedCutIn]].values
-    windSpeedCutOut = turbineInfo.loc[:, [fieldWindSpeedCutOut]].values
-    ratedWindSpeed = turbineInfo.loc[:, [fieldRatedWindSpeed]].values
-
-    return ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed
-
-def preprocessData(dataFrameOfSCADA:pd.DataFrame):
-    """
-    获取机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
-
-    参数:
-        dataFrameOfSCADA 机组SCADA数据
-
-    返回:
-        由机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
-
-    """
-    timeStamp = dataFrameOfSCADA.loc[:, ['时间']]
-    activePower = dataFrameOfSCADA.loc[:, ['变频器电网侧有功功率']]
-    windSpeed = dataFrameOfSCADA.loc[:, ['风速']]
-    dataFramePartOfSCADA = pd.concat([timeStamp, activePower, windSpeed], axis=1)
-
-    dataFramePartOfSCADA[fieldLabel]=0
-    dataFramePartOfSCADA[fieldLabel]=dataFramePartOfSCADA[fieldLabel].astype(int)
-
-    return dataFramePartOfSCADA
-
-# 2. 数据标签分配和分箱计算
-def calculateIntervals(activePowerMax, ratedPower, windSpeedCutOut):
-    """
-    按有功功率(以25kw为间隔)、风速(以0.25m/s为间隔)分仓
-
-    参数:
-        max_power 当前机组的有功功率最大值
-        PRated  机组额定功率
-        wind_speed_cutout  切出风速
-
-    返回:
-        interval_power 有功功率分仓间隔
-        interval_windspeed 风速分仓间隔
-        PNum  有功功率分仓数量
-        VNum 风速分仓数量
-    """
-    binNumOfPower = math.floor(activePowerMax / intervalPower) + 1 if activePowerMax >= ratedPower else math.floor(ratedPower / intervalPower)
-    binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed)
-
-    return binNumOfPower, binNumOfWindSpeed
-
-def labelData(dataFramePartOfSCADA:pd.DataFrame, conditions):
-    """
-    根据特定条件对数据进行标签分配,例如功率和风速阈值。
-    
-    参数:
-        LM (DataFrame): 包含功率和风速数据的DataFrame。
-        conditions (dict): 字典,键为条件名称,值为相应的阈值。
-    
-    返回:
-        DataFrame: 带有新的'label'列的原始DataFrame。
-    """
-    # 初始化标签列
-    dataFramePartOfSCADA['label'] = 0
-    
-    # 根据条件进行数据标签分配
-    for condition, threshold in conditions.items():
-        if condition == 'power_below':
-            dataFramePartOfSCADA.loc[dataFramePartOfSCADA[fieldActivePower] <= threshold, 'label'] = -1
-        elif condition == 'power_above':
-            dataFramePartOfSCADA.loc[dataFramePartOfSCADA[fieldActivePower] >= threshold, 'label'] = 1
-    
-    return dataFramePartOfSCADA
-
-def computeBins(data, intervals):
-    """为给定数据计算统计箱。
-    
-    参数:
-        data (DataFrame): 需要进行分箱的数据。
-        intervals (dict): 字典,为每个列指定间隔大小。
-    
-    返回:
-        DataFrame: 分箱数据作为区间内的计数或百分比。
-    """
-    binsResults = {}
-    for column, interval in intervals.items():
-        minValue = data[column].min()
-        maxValue = data[column].max()
-        bins = np.arange(minValue, maxValue + interval, interval)
-        binnedData = pd.cut(data[column], bins, include_lowest=True)
-        binCounts = pd.value_counts(binnedData, sort=False)
-        binsResults[column] = binCounts
-    
-    return pd.DataFrame(binsResults)
-
-# 3. 应用标签函数
-def applyLabels(data, labels):
-    """根据外部或计算出的标签对数据应用标签。
-    
-    参数:
-        data (DataFrame): 需要应用标签的数据。
-        labels (Series或array): 应用的标签;必须与数据的索引或长度相匹配。
-    
-    返回:
-        DataFrame: 应用标签后的数据。
-    """
-    data['label'] = labels
-    return data
-
-# 4. 数据可视化
-def plot_data(ws:list, ap:list):
-    fig = plt.figure()
-    plt.scatter(ws, ap, s=1, c='black', marker='.')
-    ax = plt.gca()
-    ax.xaxis.set_major_locator(MultipleLocator(5))
-    ax.yaxis.set_major_locator(MultipleLocator(500))
-    plt.xlim((0, 30))
-    plt.ylim((0, 2200))
-    plt.tick_params(labelsize=8)
-    plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8)
-    plt.ylabel("P/kW", fontsize=8)
-    plt.show()
-
-# 5. Main Execution
-def main():
-    turbine=82
-    filePathSCADA = r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\{}.csv'.format(turbine)
-    filePathTurbineInfo = r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\info.csv'
-    outputFilePathOfSCADA=r"E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\labeled\labeled_{}.csv".format(turbine)
-
-    dataFrameOfSCADA, turbineInfo = loadData(filePathSCADA, filePathTurbineInfo)
-    ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed = extractTurbineParameters(turbineInfo)
-    dataFramePartOfSCADA = preprocessData(dataFrameOfSCADA)
-
-    powerMax=dataFramePartOfSCADA[fieldActivePower].max()
-    binNumOfPower, binNumOfWindSpeed=calculateIntervals(powerMax,ratedPower,windSpeedCutOut)
-    
-    # 根据功率阈值对数据进行标签分配
-    conditions = {'power_below': 10, 'power_above': ratedPower[0][0]}
-    labeledData = labelData(dataFramePartOfSCADA, conditions)
-    
-    # 为功率和风速计算分箱
-    intervals = {fieldActivePower: 100, fieldWindSpeed: 1}
-    binnedData = computeBins(labeledData, intervals)
-    
-    # 应用标签(假设某些外部标签被提供或在其他地方计算)
-    externalLabels = np.random.choice([0, 1], size=len(labeledData))  # 随机示例
-    labeledData = applyLabels(labeledData, externalLabels)
-
-    labeledData.to_csv(outputFilePathOfSCADA)
-    
-    plot_data(labeledData[fieldWindSpeed], labeledData[fieldActivePower])
-
-if __name__ == '__main__':
-    main()

+ 0 - 632
dataAnalysisBusiness/demo/SCADA_10min_category_3.py

@@ -1,632 +0,0 @@
-import os
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.pyplot import MultipleLocator
-import math
-import pdb
-# pdb.set_trace()  # 设置断点
-
-intervalPower = 25  # For example
-intervalWindspeed = 0.25  # For example
-
-fieldRatedPower="额定功率"
-fieldRatedWindSpeed="额定风速"
-fieldWindSpeedCutIn="切入风速"
-fieldWindSpeedCutOut="切出风速"
-
-fieldTime="时间"
-fieldWindSpeed="风速"
-fieldActivePower="变频器电网侧有功功率"
-fieldLabel="lab"
-
-# 1. 数据加载和预处理函数
-def loadData(filePathSCADA:str, filePathTurbineInfo:str):
-    dataFrameSCADA = pd.read_csv(filePathSCADA, encoding="utf-8")
-    dataFrameTurbineInfo = pd.read_csv(filePathTurbineInfo)
-    return dataFrameSCADA, dataFrameTurbineInfo
-
-def extractTurbineParameters(turbineInfo:pd.DataFrame):
-    """
-    解析风电机组参数 
-
-    参数:
-        turbineInfo 风电机组信息DataFrame
-
-    返回:
-        PRated 额定功率(kw)
-        VCutOut 切出风速(m/s)
-        VCutIn 切入风速(m/s)
-        VRated 额定风速(m/s)
-    """
-    ratedPower = turbineInfo.loc[:, [fieldRatedPower]].values
-    windSpeedCutIn = turbineInfo.loc[:, [fieldWindSpeedCutIn]].values
-    windSpeedCutOut = turbineInfo.loc[:, [fieldWindSpeedCutOut]].values
-    ratedWindSpeed = turbineInfo.loc[:, [fieldRatedWindSpeed]].values
-
-    return ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed
-
-def preprocessData(dataFrameOfSCADA:pd.DataFrame):
-    """
-    获取机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
-
-    参数:
-        dataFrameOfSCADA 机组SCADA数据
-
-    返回:
-        由机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
-
-    """
-    timeStamp = dataFrameOfSCADA.loc[:, ['时间']]
-    activePower = dataFrameOfSCADA.loc[:, ['变频器电网侧有功功率']]
-    windSpeed = dataFrameOfSCADA.loc[:, ['风速']]
-    dataFramePartOfSCADA = pd.concat([timeStamp,activePower,windSpeed], axis=1)
-
-    # dataFramePartOfSCADA[fieldLabel]=0
-    # dataFramePartOfSCADA[fieldLabel]=dataFramePartOfSCADA[fieldLabel].astype(int)
-
-    return dataFramePartOfSCADA
-
-    
-# 2. 数据标签分配和分箱计算
-def calculateIntervals(activePowerMax, ratedPower, windSpeedCutOut):
-    """
-    按有功功率(以25kw为间隔)、风速(以0.25m/s为间隔)分仓
-
-    参数:
-        max_power 当前机组的有功功率最大值
-        PRated  机组额定功率
-        wind_speed_cutout  切出风速
-
-    返回:
-        interval_power 有功功率分仓间隔
-        interval_windspeed 风速分仓间隔
-        PNum  有功功率分仓数量
-        VNum 风速分仓数量
-    """
-    binNumOfPower = math.floor(activePowerMax / intervalPower) + 1 if activePowerMax >= ratedPower else math.floor(ratedPower / intervalPower)
-    binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed)
-
-    return binNumOfPower, binNumOfWindSpeed
-
-def calculateTopP(activePowerMax,ratedPower):
-    """
-    计算额定功率以上功率仓的个数
-
-    参数:
-        max_power 当前机组的有功功率最大值
-        PRated  机组额定功率
-        
-    返回:
-        TopP 额定功率以上功率仓的个数
-    """
-    TopP = 0   
-    if activePowerMax >= ratedPower: 
-        TopP = math.floor((activePowerMax - ratedPower) / intervalPower) + 1  
-    else:  
-        TopP = 0   
-    return TopP
-
-def chooseData(dataFramePartOfSCADA:pd.DataFrame, dataFrameOfSCADA):
-    """
-    根据特定条件对数据进行标签分配,例如功率和风速阈值。
-    
-    参数:
-        dataFramePartOfSCADA (DataFrame): 包含时间和功率和风速数据的DataFrame。
-        dataFrameOfSCADA: 原始数据
-    
-    返回:
-        DzMarch809: array:V P lab: 38181。
-        nCounter1: 个数
-        dataFramePartOfSCADA: 
-    """
-    # 初始化标签列
-    SM1 = dataFramePartOfSCADA.shape #(52561,3)
-    AA1 = SM1[0]  
-    lab = [[0] for _ in range(AA1)]
-    lab = pd.DataFrame(lab,columns=['lab'])
-    dataFramePartOfSCADA = pd.concat([dataFramePartOfSCADA,lab],axis=1)  #在tpv后加一列标签列
-    dataFramePartOfSCADA = dataFramePartOfSCADA.values
-    SM = dataFramePartOfSCADA.shape #(52561,4)
-    AA = SM[0] 
-    nCounter1 = 0 
-    DzMarch809_0 = np.zeros((AA, 3)) 
-    Point_line = np.zeros(AA, dtype=int)  
-    APower = dataFrameOfSCADA[fieldActivePower]
-    WSpeed = dataFrameOfSCADA[fieldWindSpeed]
-
-    for i in range(AA):
-        if (APower[i] > 10) & (WSpeed[i] > 0):
-            nCounter1 += 1  
-            DzMarch809_0[nCounter1-1, 0] = WSpeed[i]  
-            DzMarch809_0[nCounter1-1, 1] = APower[i] 
-            Point_line[nCounter1-1] = i+1  
-        if APower[i] <= 10: 
-            dataFramePartOfSCADA[i,SM[1]-1] = -1 
-    DzMarch809 = DzMarch809_0[:nCounter1, :] 
-    return DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM
-
-def gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809):
-    """
-    统计各网格中落入label!=-1的数据点个数
-    
-    参数:
-        binNumOfWindSpeed: 风速分仓个数。
-        binNumOfPower: 功率分仓个数。
-        DataFrame: 带有新的'label'列的原始DataFrame。
-        nCounter1: 数据个数
-        DzMarch809
-    返回:
-        XBoxNumber: 各网格中落入label!=-1的数据点个数的array。
-    """
-    # 遍历有效数据
-    XBoxNumber = np.ones((binNumOfPower, binNumOfWindSpeed),dtype=int) 
-    for i in range(nCounter1):             
-        for m in range(1, binNumOfPower + 1):  
-            if (DzMarch809[i,1] > (m - 1) * intervalPower) and (DzMarch809[i,1] <= m * intervalPower):  
-                nWhichP = m  
-                break  
-        for n in range(1, binNumOfWindSpeed + 1):  
-            if (DzMarch809[i, 0] > (n - 1) * intervalWindspeed) and (DzMarch809[i, 0] <= n * intervalWindspeed):  
-                nWhichV = n  
-                break  
-        if (nWhichP > 0) and (nWhichV > 0):  
-            XBoxNumber[nWhichP - 1][nWhichV - 1] += 1
-    for m in range(1,binNumOfPower+1):
-        for n in range(1,binNumOfWindSpeed+1):
-            XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
-    
-    return XBoxNumber
-
-def percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed,axis):
-    """
-    计算分仓(水平/竖直)后每个网格占百分比
-    
-    参数:
-        XBoxNumber: 各网格中落入label!=-1的数据点个数的array。
-        binNumOfPower: 功率分仓个数。
-        binNumOfWindSpeed: 风速分仓个数。
-        axis: "power"or"speed"分仓
-    返回:
-        BoxPercent: 占比情况array。
-    """
-    BoxPercent = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=float)     
-    BinSum = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed, 1), dtype=int)
-    for i in range(1,1+(binNumOfPower if axis == 'power' else binNumOfWindSpeed)):
-        for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):  
-            BinSum[i-1] = BinSum[i-1] + (XBoxNumber[i-1,m-1] if axis == 'power' else XBoxNumber[m-1,i-1])
-        for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):  
-            if BinSum[i-1]>0:
-                if axis == 'power':
-                    BoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / BinSum[i-1])*100
-                else:
-                    BoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / BinSum[i-1])*100
-                    
-    return BoxPercent,BinSum
-
-def maxBoxPercentage(BoxPercent, binNumOfPower, binNumOfWindSpeed, axis):
-    """
-    计算分仓(水平/竖直)后占百分比最大的网格索引及值
-    
-    参数:
-        BoxPercent: 占比情况array。
-        binNumOfPower: 功率分仓个数。
-        binNumOfWindSpeed: 风速分仓个数。
-        axis: "power"or"speed"分仓
-    返回:
-        BoxMaxIndex: 占百分比最大的网格索引。
-        BoxMax: 占百分比最大的网格值
-    """
-    BoxMaxIndex = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = int) 
-    BoxMax = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = float)  
-    for m in range(1,(binNumOfPower if axis == 'power' else binNumOfWindSpeed)+1):
-        BoxMaxIndex[m-1] = (np.argmax(BoxPercent[m-1, :])) if axis == 'power' else (np.argmax(BoxPercent[:, m-1]))
-        BoxMax[m-1] = (np.max(BoxPercent[m-1, :]))if axis == 'power' else (np.max(BoxPercent[:, m-1]))
-
-    return BoxMaxIndex, BoxMax
-
-def extendBoxPercent(m, BoxMax,TopP,BoxMaxIndex,BoxPercent,binNumOfPower,binNumOfWindSpeed):
-    """
-    以中心最大水平功率带为基准,向两侧对称扩展网格,使网格散点百分比总值达到阈值m
-    
-    参数:
-        m: 设定总和百分比阈值。
-        BoxMax: 占百分比最大的网格值。
-        TopP: 额定功率以上功率仓个数。
-        BoxMaxIndex: 占百分比最大的网格索引。
-        BoxPercent: 占比情况array。
-        binNumOfPower: 功率分仓个数。
-        binNumOfWindSpeed: 风速分仓个数。
-    返回:
-        DotDense: 每个功率仓内网格的个数。
-        DotDenseLeftRight: 向左向右拓展的网格个数
-    """
-    DotDense = np.zeros(binNumOfPower)  
-    DotDenseLeftRight = np.zeros((binNumOfPower,2))
-    DotValve = m 
-    PDotDenseSum = 0
-    for i in range(binNumOfPower - TopP):
-        PDotDenseSum = BoxMax[i] 
-        iSpreadRight = 1  
-        iSpreadLeft = 1         
-        while PDotDenseSum < DotValve:  
-            if (BoxMaxIndex[i] + iSpreadRight) < binNumOfWindSpeed-1-1:  
-                PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] + iSpreadRight] 
-                iSpreadRight += 1  
-            else:
-                break             
-            if (BoxMaxIndex[i] - iSpreadLeft) > 0:  
-                PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] - iSpreadLeft] 
-                iSpreadLeft += 1  
-            else:  
-                break  
-        iSpreadRight = iSpreadRight-1
-        iSpreadLeft = iSpreadLeft-1
-       
-        DotDenseLeftRight[i, 0] = iSpreadLeft 
-        DotDenseLeftRight[i, 1] = iSpreadRight 
-        DotDense[i] = iSpreadLeft + iSpreadRight + 1    
-
-    return DotDenseLeftRight
-
-def calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum):
-    """
-    计算功率主带的平均宽度
-    
-    参数:
-        binNumOfPower: 功率分仓个数。
-        TopP: 额定功率以上功率仓个数。
-        DotDenseLeftRight: 向左向右拓展的网格个数    
-        PBinSum: 功率仓内数据点总和
-    返回:
-        DotDense: 每个功率仓内网格的个数。
-        DotDenseLeftRight: 向左向右拓展的网格个数
-        PowerLimit: 各水平功率带是否为限功率标识,1:是;0:不是
-    """
-
-    PowerLimit = np.zeros(binNumOfPower, dtype=int)  
-    WidthAverage = 0    
-    WidthAverage_L = 0 
-    nCounter = 0  
-    PowerLimitValve = 6    
-    N_Pcount = 20  
-    for i in range(binNumOfPower - TopP):   
-        if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):  
-            PowerLimit[i] = 1  
-           
-        if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
-            WidthAverage += DotDenseLeftRight[i, 1]
-            WidthAverage_L += DotDenseLeftRight[i,1] 
-            nCounter += 1  
-    WidthAverage /= nCounter if nCounter > 0 else 1  
-    WidthAverage_L /= nCounter if nCounter > 0 else 1   
-
-    return WidthAverage, WidthAverage_L,PowerLimit
-
-def amendMaxBox(binNumOfPower,TopP,PowerLimit,BoxMaxIndex):
-    """
-    对限负荷水平功率带的最大网格进行修正
-    
-    参数:
-        binNumOfPower: 功率分仓个数。
-        TopP: 额定功率以上功率仓个数。
-        PowerLimit:标识限功率水平功率带,1:是;0:不是
-        BoxMaxIndex: 占百分比最大的网格索引
-    返回:
-        BoxMaxIndex: 修正后的最大占比网格索引
-    """
-
-    for i in range(1, binNumOfPower - TopP+1):  
-        if (PowerLimit[i] == 1) and (abs(BoxMaxIndex[i] - BoxMaxIndex[i - 1]) > 5):  
-            BoxMaxIndex[i] = BoxMaxIndex[i - 1] + 1  
-
-    return BoxMaxIndex
-
-def markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,BoxMaxIndex):
-    '''
-    标记需剔除的网格
-    
-    参数:
-        binNumOfPower: 功率分仓个数。
-        binNumOfWindSpeed:风速分仓个数
-        TopP: 额定功率以上功率仓个数。
-        CurveWidthR:功率主带轮廓
-        CurveWidthL
-        BoxMaxIndex: 修正后的最大占比网格索引
-    返回:
-        BBoxRemove: 标识需剔除的网格
-    '''
-    BBoxRemove = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)  
-    for m in range(binNumOfPower - TopP): 
-        for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
-            BBoxRemove[m, n] = 1  
-        for n in range(int(BoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):   
-            BBoxRemove[m, n-1] = 2 
-    return BBoxRemove
-
-def markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,BoxPercent,BoxMaxIndex,mm,BBoxRemove,nn):
-    '''
-    标记限功率网格 
-    1:右侧欠发 2:左侧超发 3:额定功率以上超发
-    
-    参数:
-        binNumOfPower: 功率分仓个数。
-        binNumOfWindSpeed:风速分仓个数
-        TopP: 额定功率以上功率仓个数。
-        CurveWidthR:功率主带轮廓
-        PowerLimit: 标识限功率水平功率带,1:是;0:不是
-        BoxMaxIndex: 修正后的最大占比网格索引
-        mm: 拐点所在功率仓
-        BBoxRemove:需剔除的网格
-        CurveTop1:拐点对应列
-    返回:
-        BBoxLimit:标识限功率网格
-    '''
-    BBoxLimit = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)  
-    for i in range(2, binNumOfPower - TopP):  
-        if PowerLimit[i] == 1:
-            BBoxLimit[i, int(BoxMaxIndex[i] + CurveWidthR + 1):binNumOfWindSpeed] = 1
-    IsolateValve = 3
-    for m in range(binNumOfPower - TopP):    
-        for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):    
-            if BoxPercent[m, n] < IsolateValve:   
-                BBoxRemove[m, n] = 1
-
-    for m in range(binNumOfPower - TopP, binNumOfPower):   
-        for n in range(binNumOfWindSpeed):  
-            BBoxRemove[m, n] = 3
-      
-    # 标记功率主带拐点左侧的欠发网格  
-    for m in range(mm-1, binNumOfPower - TopP): 
-        for n in range(int(nn) - 2):
-            BBoxRemove[m, n] = 2
-    
-    return BBoxLimit
-    
-def markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1):
-    '''
-    根据网格标识来标记数据点
-    
-    参数:
-        nCounter1
-        binNumOfPower: 功率分仓个数。
-        binNumOfWindSpeed:风速分仓个数
-        DzMarch809: array V P lab: 38181。
-        BBoxRemove:需剔除的网格
-        
-    返回:
-        DzMarch809Sel:数组现在包含了每个数据点的标识
-    '''
-    DzMarch809Sel = np.zeros(nCounter1, dtype=int)
-    nWhichP = 0  
-    nWhichV = 0  
-    for i in range(nCounter1):   
-        for m in range( binNumOfPower ):   
-            if ((DzMarch809[i,1])> m * intervalPower) and ((DzMarch809[i,1]) <= (m+1) * intervalPower):  
-                nWhichP = m  #m记录的是index
-                break  
-        for n in range( binNumOfWindSpeed ):    
-            if DzMarch809[i,0] > ((n+1) * intervalWindspeed - intervalWindspeed/2) and DzMarch809[i,0] <= ((n+1) * intervalWindspeed + intervalWindspeed / 2):  
-                nWhichV = n 
-                break  
-        if nWhichP >= 0 and nWhichV >= 0:  
-            if BBoxRemove[nWhichP, nWhichV] == 1:   
-                DzMarch809Sel[i] = 1  
-            elif BBoxRemove[nWhichP, nWhichV] == 2:  
-                DzMarch809Sel[i] = 2  
-            elif BBoxRemove[nWhichP , nWhichV] == 3:  
-                DzMarch809Sel[i] = 0  
-    
-    return DzMarch809Sel
-    
-
-def windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line):
-    '''
-    滑动窗口方法,进一步标记数据坏点
-    
-    参数:
-        nCounter1:
-        ratedPower:
-        Point_line:
-        
-    返回:
-        PVLimit: 限负荷数据
-        nLimitTotal: 是限负荷数据的总数
-    '''
-
-    PVLimit = np.zeros((nCounter1, 3)) 
-    nLimitTotal = 0  
-    nWindowLength = 6  
-    LimitWindow = np.zeros(nWindowLength)
-    UpLimit = 0   
-    LowLimit = 0  
-    PowerStd = 30  
-    nWindowNum = np.floor(nCounter1/nWindowLength)
-    PowerLimitUp = ratedPower - 100  
-    PowerLimitLow = 100  
-
-    # 循环遍历每个窗口  
-    for i in range(int(nWindowNum)):  
-        start_idx = i * nWindowLength  
-        end_idx = start_idx + nWindowLength  
-        LimitWindow = DzMarch809[start_idx:end_idx, 1]  
-         
-        bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)  
-        if not bAllInAreas:  
-            continue  
-        
-        UpLimit = LimitWindow[0] + PowerStd  
-        LowLimit = LimitWindow[0] - PowerStd  
-        
-        bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)  
-        if bAllInUpLow: 
-            DzMarch809Sel[start_idx:end_idx] = 4  
- 
-            for j in range(nWindowLength):  
-                PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]  
-                PVLimit[nLimitTotal, 2] = Point_line[start_idx + j]  # 对数据进行标识  
-                nLimitTotal += 1  
-    return PVLimit,nLimitTotal
-
-def store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1):  
-    """  
-    存储好点,并返回存储好的点的数组和计数。
-    
-    参数:
-        DzMarch809: array:V P lab: 38181。
-        DzMarch809Sel: 数组现在包含了每个数据点的标识
-        Point_line:
-        nCounter1:
-        axis: 'good' or 'bad'
-        
-    返回:
-        PVDot: 数据
-        nCounterPV: 数据个数
-
-    """  
-    PVDot = np.zeros((nCounter1, 3))
-    PVBad = np.zeros((nCounter1, 3))  
-
-    nCounterPV = 0  
-    nCounterBad = 0 
-    for i in range(nCounter1):
-        if DzMarch809Sel[i] == 0:   
-            nCounterPV += 1 
-            PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]
-            PVDot[nCounterPV-1, 2] = Point_line[i]  
-        elif DzMarch809Sel[i] in [1, 2, 3]:  
-            nCounterBad += 1  
-            PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]  
-            PVBad[nCounterBad-1, 2] = Point_line[i]
-                  
-    return PVDot, nCounterPV,PVBad,nCounterBad  
-
-def markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit):
-    """  
-    标记好点、坏点、限电点。
-    
-    参数:
-        nCounterPV
-        nCounterBad
-        dataFramePartOfSCADA
-        PVDot
-        PVBad
-        SM
-        nLimitTotal
-        PVLimit
-        
-    返回:
-        dataFramePartOfSCADA
-
-    """  
-
-    for i in range(nCounterPV):
-        dataFramePartOfSCADA[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1   
-    #坏点  
-    for i in range(nCounterBad):  
-        dataFramePartOfSCADA[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5  # 坏点标识  
-
-    # 对所有数据中的限电点进行标注   
-    for i in range(nLimitTotal):  
-        dataFramePartOfSCADA[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4  # 限电点标识  
-
-    return dataFramePartOfSCADA
-# 4. 数据可视化
-def plot_data(ws:list, ap:list):
-    fig = plt.figure()
-    plt.scatter(ws, ap, s=1, c='black', marker='.')
-    ax = plt.gca()
-    ax.xaxis.set_major_locator(MultipleLocator(5))
-    ax.yaxis.set_major_locator(MultipleLocator(500))
-    plt.xlim((0, 30))
-    plt.ylim((0, 2200))
-    plt.tick_params(labelsize=8)
-    plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8)
-    plt.ylabel("P/kW", fontsize=8)
-    plt.show()
-
-# 5. Main Execution
-def main():
-    turbine=85
-    basePath=r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72'
-    filePathSCADA = r'{}\{}.csv'.format(basePath,turbine)
-    filePathTurbineInfo = r'{}\info.csv'.format(basePath)
-    outputFilePathOfSCADA=r"{}\labeled\labeled_{}.csv".format(basePath,turbine)
-
-    dataFrameOfSCADA, turbineInfo = loadData(filePathSCADA, filePathTurbineInfo)
-    ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed = extractTurbineParameters(turbineInfo)
-    dataFramePartOfSCADA = preprocessData(dataFrameOfSCADA)
-    powerMax=dataFramePartOfSCADA[fieldActivePower].max()
-
-    binNumOfPower, binNumOfWindSpeed = calculateIntervals(powerMax,ratedPower,windSpeedCutOut)
-    TopP = calculateTopP(powerMax,ratedPower)
-    # 根据功率阈值对数据进行标签分配
-    DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM = chooseData(dataFramePartOfSCADA, dataFrameOfSCADA)  
-    XBoxNumber = gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809)
-    PBoxPercent,PBinSum = percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'power')
-    VBoxPercent,VBinSum = percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'speed')
-
-    PBoxMaxIndex, PBoxMaxP = maxBoxPercentage(PBoxPercent, binNumOfPower, binNumOfWindSpeed, 'power')
-    VBoxMaxIndex, VBoxMaxV = maxBoxPercentage(VBoxPercent, binNumOfPower, binNumOfWindSpeed, 'speed')
-    if PBoxMaxIndex[0] > 14: PBoxMaxIndex[0] = 9
-    DotDenseLeftRight = extendBoxPercent(90, PBoxMaxP,TopP,PBoxMaxIndex,PBoxPercent,binNumOfPower,binNumOfWindSpeed)
-    # pdb.set_trace()  # 设置断点
-    WidthAverage, WidthAverage_L,PowerLimit = calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum)
-    PBoxMaxIndex = amendMaxBox(binNumOfPower,TopP,PowerLimit,PBoxMaxIndex)
-    # 计算功率主带的左右边界  
-    CurveWidthR = np.ceil(WidthAverage) + 2  
-    CurveWidthL = np.ceil(WidthAverage_L) + 2 
-    #确定功率主带的左上拐点,即额定风速位置的网格索引
-    CurveTop = np.zeros((2, 1), dtype=int)  
-    BTopFind = 0  
-    for m in range(binNumOfPower - TopP, 0, -1):
-        for n in range(int(np.floor(int(windSpeedCutIn) / intervalWindspeed)), binNumOfWindSpeed - 1):   
-            if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):   
-                CurveTop[0] = m  
-                CurveTop[1] = n  #[第80个,第40个]
-                BTopFind = 1
-                mm = m
-                nn = n
-                break 
-        if BTopFind == 1:  
-            break 
-    #标记网格
-    BBoxRemove = markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,PBoxMaxIndex)
-    BBoxLimit = markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,PBoxPercent,PBoxMaxIndex,mm,BBoxRemove,nn)
-    DzMarch809Sel = markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1)
-    PVLimit,nLimitTotal = windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line)
-    #将功率滑动窗口主带平滑化
-    nSmooth = 0   
-    for i in range(binNumOfPower - TopP - 1):  
-        PVLeftDown = np.zeros(2)  
-        PVRightUp = np.zeros(2)   
-        if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:  
-            # 计算左下和右上顶点的坐标  
-            PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125  
-            PVLeftDown[1] = (i) * 25  
-            PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125  
-            PVRightUp[1] = (i+1) * 25  
-                
-            for m in range(nCounter1):  
-                # 检查当前点是否在锯齿区域内  
-                if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
-                    # 检查斜率是否大于对角连线  
-                    if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0]):
-                        # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器  
-                        DzMarch809Sel[m] = 0  
-                        nSmooth += 1  
-    # DzMarch809Sel 数组现在包含了锯齿平滑的选择结果,nSmooth 是选中的点数
-    PVDot, nCounterPV,PVBad,nCounterBad = store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1)
-    #标注   
-    dataFramePartOfSCADA = markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit)
-    A = dataFramePartOfSCADA[:,3]
-    A=pd.DataFrame(A,columns=['lab'])
-
-    labeledData = pd.concat([dataFrameOfSCADA,A],axis=1)
-    D = labeledData[labeledData['lab'].isin([-1,0,1,2,3,4,5])]#选择为1的行
-    labeledData.to_csv(outputFilePathOfSCADA,encoding='utf-8')
-    plot_data(D[fieldWindSpeed], D[fieldActivePower])
-
-
-if __name__ == '__main__':
-    main()

+ 0 - 0
dataAnalysisBusiness/demo/__init__.py


+ 0 - 16
dataAnalysisBusiness/demo/demoDataFrame.py

@@ -1,16 +0,0 @@
-import pandas as pd
-
-df1 = pd.DataFrame({'A': [1, 2, 3]})
-df2 = df1  # df2 引用 df1 的同一块内存
-
-def modify1(df2):
-	df2['A'][0] = 99  # 修改 df2
-	
-def modify2(df2):
-	df2= df2[df2['A'][0]==99]  # 修改 df2
-	
-modify1(df2)
-modify2(df2)
-
-print(df1)  
-

+ 0 - 62
dataAnalysisBusiness/demo/scatter3D_plotly.py

@@ -1,62 +0,0 @@
-import pandas as pd  
-import plotly.graph_objects as go  
-  
-# 示例数据  
-data = {  
-    '机组名': ['机组A', '机组B', '机组C', '机组D'],  
-    '时间': ['2024-01-09 09:13:29', '2024-01-10 10:14:30', '2024-02-09 08:13:29', '2024-02-10 09:14:30'],  
-    '年月': ['2024-01', '2024-01', '2024-02', '2024-02'],  
-    '风速': [5.0, 6.0, 4.5, 5.5],  
-    '有功功率': [1000, 1200, 900, 1100]  
-}  
-  
-df = pd.DataFrame(data)  
-  
-# 按风速升序排列数据  
-df_sorted = df.sort_values(by='风速')  
-  
-# 获取唯一年月  
-unique_months = df_sorted['年月'].unique()  
-  
-# 自定义颜色列表(确保颜色数量与唯一月份的数量相匹配)  
-colors = ['red', 'blue', 'green', 'purple']  # 根据实际唯一月份数量调整颜色数量  
-  
-# 创建颜色映射  
-color_map = dict(zip(unique_months, colors))  
-  
-# 使用go.Scatter3d创建3D散点图  
-trace = go.Scatter3d(  
-    x=df_sorted['风速'],  
-    y=df_sorted['有功功率'],  
-    z=[color_map[month] for month in df_sorted['年月']],  
-    mode='markers',  
-    marker=dict(  
-        color=[color_map[month] for month in df_sorted['年月']],  
-        size=10,  
-        line=dict(color='rgba(255, 255, 255, 0.8)', width=0.5),  
-        opacity=0.8  
-    )  
-)  
-  
-# 创建图形  
-fig = go.Figure(data=[trace])  
-  
-# 更新图形的布局  
-fig.update_layout(  
-    title='按风速升序排列的3D散点图:风速、有功功率与年月',  
-    margin=dict(l=0, r=0, b=0, t=0),  
-    scene=dict(  
-        xaxis=dict(title='风速'),  
-        yaxis=dict(title='有功功率'),  
-        zaxis=dict(  
-            title='年月',  
-            tickmode='array',  
-            tickvals=unique_months,  
-            ticktext=unique_months,  
-            categoryorder='category ascending'  
-        )  
-    )  
-)  
-  
-# 显示图形  
-fig.show()

+ 0 - 50
dataAnalysisBusiness/demo/scatter3D_plotly_make_subplots.py

@@ -1,50 +0,0 @@
-import pandas as pd  
-import plotly.graph_objects as go  
-from plotly.subplots import make_subplots  
-  
-# 假设你的DataFrame叫做df,并且已经包含了所需字段  
-# 如果你的数据是CSV文件,可以使用pd.read_csv('your_file.csv')来加载数据  
-# df = pd.read_csv('your_file.csv')  
-  
-# 示例数据  
-data = {  
-    '机组名': ['机组A', '机组B', '机组C', '机组D'],  
-    '时间': ['2024-01-09 09:13:29', '2024-01-10 10:14:30', '2024-02-09 08:13:29', '2024-02-10 09:14:30'],  
-    '年月': ['2024-01', '2024-01', '2024-02', '2024-02'],  
-    '风速': [5.0, 6.0, 4.5, 5.5],  
-    '有功功率': [1000, 1200, 900, 1100]  
-}  
-  
-df = pd.DataFrame(data)  
-  
-# 创建颜色映射,将每个年月映射到一个唯一的颜色  
-unique_months = df['年月'].unique()  
-colors = [f'rgb({i}, {150 - i}, 50)' for i in range(len(unique_months))]  
-color_map = dict(zip(unique_months, colors))  
-  
-# 使用make_subplots创建3D散点图  
-fig = make_subplots(rows=1, cols=1, specs=[[{"type": "scatter3d"}]])  
-  
-# 遍历DataFrame的每一行,为每个点添加数据  
-for index, row in df.iterrows():  
-    x = row['风速']  
-    y = row['年月']  
-    z = row['有功功率']  
-    color = color_map[y]  
-      
-    # 添加散点到子图  
-    fig.add_trace(go.Scatter3d(x=[x], y=[y], z=[z], mode='markers', marker=dict(color=color)), row=1, col=1)  
-  
-# 更新子图的布局,设置y轴为category类型,并设置其类别顺序  
-fig.update_layout(  
-    title='3D散点图:风速、年月与有功功率',  
-    margin=dict(l=0, r=0, b=0, t=0),  
-    scene=dict(  
-        xaxis=dict(title='风速'),  
-        yaxis=dict(title='年月', tickmode='array', tickvals=unique_months, ticktext=unique_months, categoryorder='category ascending'),  
-        zaxis=dict(title='有功功率')  
-    )  
-)  
-  
-# 显示图形  
-fig.show()

+ 0 - 19
dataAnalysisBusiness/demo/test.py

@@ -1,19 +0,0 @@
-import plotly.express as px
-import pandas as pd
-
-# 创建一个示例数据框架,包含单月数据
-data_single_month = {
-    '时间': ['2023-03', '2023-03', '2023-03', '2023-03', '2023-03'],
-    '发电机转速': [1000, 1500, 2000, 2500, 3000],
-    '功率': [120, 180, 160, 210, 230]
-}
-
-df_single = pd.DataFrame(data_single_month)
-
-# 绘制3D散点图
-fig_single = px.scatter_3d(df_single, x='发电机转速', y='时间', z='功率', 
-                           title='3D 散点图 - 单月数据',
-                           labels={'发电机转速': '转速', '时间': '月份', '功率': '输出功率'})
-
-# 显示图形
-fig_single.show()

+ 0 - 58
dataAnalysisBusiness/demo/testBoxLine.py

@@ -1,58 +0,0 @@
-import plotly.graph_objects as go
-import numpy as np
-
-# 生成一些随机数据
-np.random.seed(10)
-data = np.random.randn(100)
-
-# 计算均值
-mean_value = np.mean(data)
-
-# 创建箱线图
-fig = go.Figure()
-
-# 添加箱线图
-fig.add_trace(go.Box(
-    y=data,
-    boxmean=False,  # 不显示均值线
-    marker_color='blue',  # 箱体颜色
-    line_color='blue',  # 边框颜色
-    width=0.4  # 调整箱体宽度
-))
-
-# 添加均值线
-fig.add_shape(
-    type='line',
-    x0=0,
-    y0=mean_value,
-    x1=1,
-    y1=mean_value,
-    xref='paper',
-    yref='y',
-    line=dict(color='green', width=2)
-)
-
-# 更新中位数线颜色及其宽度
-fig.update_traces(
-    marker=dict(color='blue'),
-    line=dict(color='blue'),
-    width=0.4,
-    # 这里使用线条属性更新中位数线
-    quartilemethod="exclusive",  # 使用独占四分位数法
-)
-
-# 手动添加中位数线
-median_value = np.median(data)
-fig.add_shape(
-    type="line",
-    x0=0,
-    y0=median_value,
-    x1=1,
-    y1=median_value,
-    xref="paper",
-    yref="y",
-    line=dict(color="red", width=1)
-)
-
-# 显示图表
-fig.show()

+ 0 - 113
dataAnalysisBusiness/demo/testDataProcess.py

@@ -1,113 +0,0 @@
-import os  
-import pandas as pd  
-import numpy as np
-import matplotlib.pyplot as plt  
-  
-def process_scada_data(fpath, turbine_number, fn_start, fn_end, status_normal):  
-    """  
-    处理SCADA数据的函数。  
-      
-    参数:  
-        fpath (str): 文件存放位置的路径。  
-        turbine_number (int): 风机数量(尽管此参数在此函数中未使用,但可以保留以匹配MATLAB代码)。  
-        fn_start (int): 开始处理的文件编号。  
-        fn_end (int): 结束处理的文件编号(不包含)。  
-        status_normal (int): 风机正常并网状态的状态字(尽管此参数在此函数中未使用,但可以保留以匹配MATLAB代码)。  
-    """  
-    # 循环处理每个文件  
-    for fn in range(fn_start, fn_end):  
-        fname = os.path.join(fpath, f"{fn}.csv")  
-          
-        # 读取CSV文件  
-        scada_10min = pd.read_csv(fname)  
-          
-        # 提取所需列  
-        time_stamp = scada_10min["时间"]  
-        active_power = scada_10min["变频器电网侧有功功率"]  
-        wind_speed = scada_10min["风速"]  
-          
-        # 创建包含所需列的DataFrame  
-        LM = pd.DataFrame({  
-            "时间戳": time_stamp,  
-            "有功功率": active_power,  
-            "风速": wind_speed  
-        })  
-          
-        # 调用数据标签处理函数(需要您根据MATLAB实现来编写此函数)  
-        xx = data_label(LM,fpath)  
-          
-        # 合并标签数据到原始DataFrame  
-        merged_df = pd.concat([scada_10min, xx], axis=1)  
-          
-        # 筛选出标签为1的行  
-        D = merged_df[merged_df["lab"] == 1]  
-          
-        # 绘制散点图  
-        plt.scatter(D["风速"], D["变频器电网侧有功功率"], s=50, fillstyle='full')  
-        plt.title(f"风机 {fn} 散点图")  
-        plt.xlabel("风速")  
-        plt.ylabel("变频器电网侧有功功率")  
-        plt.show()  
-          
-        # 创建保存结果的目录(如果不存在)  
-        labeled_dir = os.path.join(fpath, "labeled")  
-        os.makedirs(labeled_dir, exist_ok=True)  
-          
-        # 将处理后的数据保存到CSV文件  
-        labeled_fname = os.path.join(labeled_dir, f"{fn}_10s_n.csv")  
-        merged_df.to_csv(labeled_fname, index=False)  
-  
-# 假设data_label函数已经实现,这里只是一个示例的占位符  
-def data_label(df:pd.DataFrame,fpath):  
-    # 在这里实现您的数据标签处理逻辑  
-    # 返回带有新标签的Series或DataFrame  
-     # 读取风机参数数据
-    fname2 = fpath + "info.csv"
-    turbine_info = pd.read_csv(fname2, keep_default_na=False)
-    PRated = turbine_info["额定功率"].values[0]
-    VCutOut = turbine_info["切出风速"].values[0]
-    VCutIn = turbine_info["切入风速"].values[0]
-    VRated = turbine_info["额定风速"].values[0]
-    
-    # 读入有功功率和风速数据
-    Labeled_March809 = df
-    APower = Labeled_March809["active_power"]
-    WSpeed = Labeled_March809["wind_speed"]
-
-    # 初始化计算用的变量
-    maxP = APower.max()
-    intervalP = 25  # 功率分区间隔为25
-    intervalwindspeed = 0.25  # 风速分区间隔为0.25m/s
-    
-    # 根据最大功率和额定功率,计算功率和风速的区间数
-    PNum = (maxP // intervalP) + 1 if maxP >= PRated else (PRated // intervalP)
-    TopP = ((maxP - PRated) // intervalP) + 1 if maxP >= PRated else 0
-    VNum = np.ceil(VCutOut / intervalwindspeed).astype(int)
-
-    # 初始化标签列
-    Labeled_March809['label'] = 0
-
-    # 数据预处理:标记功率小于等于10的点
-    Labeled_March809.loc[APower <= 10, 'label'] = -1
-
-    # 下面是逻辑处理的示例,涉及到循环、条件判断和数据标记
-    # 示例:标记风速和功率在特定范围内的点
-    for i, row in Labeled_March809.iterrows():
-        if row['active_power'] > 10 and row['wind_speed'] > 0:
-            # 这里可以根据需要添加更多的处理逻辑
-            pass
-
-    # 以下是更高级的数据处理示例,这部分代码需要您根据实际逻辑继续开发
-    # 示例:根据风速和功率的分布对数据进行进一步的标记
-    # 请注意,这里需要你根据上面 MATLAB 代码的具体逻辑来实现相应的Python代码
-
-    return Labeled_March809
-  
-# 设置文件路径和其他参数  
-fpath = "E:\\BaiduNetdiskDownload\\test\\min_scada_LuoTuoGou\\72\\"  
-# 注意:turbine_number 在此函数中未使用,但保持以匹配MATLAB代码  
-turbine_number = 24  
-status_normal = 8  
-  
-# 调用函数处理文件,假设从编号82的文件开始,只处理这一个文件  
-process_scada_data(fpath, turbine_number, 82, 83, status_normal)

+ 0 - 85
dataAnalysisBusiness/demo/testDatabase.py

@@ -1,85 +0,0 @@
-
-import random
-from datetime import datetime, timedelta
-from sqlalchemy import create_engine, Column, Integer, Float, DateTime, PrimaryKeyConstraint,String
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import sessionmaker
-from concurrent.futures import ThreadPoolExecutor, as_completed
-
-# 数据库连接字符串,根据实际情况进行修改
-DATABASE_URI = 'mysql+pymysql://admin:admin123456@192.168.50.233:3306/test'
-
-# 创建数据库引擎
-engine = create_engine(DATABASE_URI, pool_size=10, max_overflow=20)
-
-# 创建基类
-Base = declarative_base()
-
-# 定义表结构
-class TurbineData(Base):
-    __tablename__ = 'turbine_data'
-    batchNO = Column(Integer, nullable=False)
-    turbineNO = Column(String, nullable=False)
-    time = Column(DateTime, nullable=False)
-    active_power = Column(Float, nullable=False)
-    wind_speed = Column(Float, nullable=False)
-    
-    __table_args__ = (
-        PrimaryKeyConstraint('batchNO', 'turbineNO', 'time'),
-    )
-
-# 创建表(如果不存在)
-Base.metadata.create_all(engine)
-
-# 创建Session
-Session = sessionmaker(bind=engine)
-
-# 生成并插入数据
-num_records = 8000000
-batch_size = 200000
-
-batchNOs = ['B001','B002','B003']
-turbineNOs = ['WT001','WT002','WT003','WT004','WT005','WT006','WT007','WT008','WT009','WT010','WT011']
-
-# batchNOs = ['B001','B002']
-# turbineNOs = ['WT001','WT002']
-
-# 辅助函数生成随机数据
-def generate_data(num_records,batchNO,turbineNO,time):
-    data = []
-    dateTime=time
-    for _ in range(num_records):
-        active_power = round(random.uniform(0, 5000), 2)
-        wind_speed = round(random.uniform(0, 25), 2)
-        data.append(TurbineData(batchNO=batchNO, turbineNO=turbineNO, time=dateTime, active_power=active_power, wind_speed=wind_speed))
-        dateTime += timedelta(seconds=1)  # 保证时间字段不重复
-    return data,dateTime
-
-# 插入数据的任务
-def insert_data(batchNO, turbineNO, start_index, end_index, dateTime):
-    
-    session = Session()
-    data_batch,dateTime = generate_data(end_index - start_index, batchNO, turbineNO, dateTime)
-    session.bulk_save_objects(data_batch)
-    session.commit()
-    session.close()
-    return dateTime
-
-# 使用线程池批量插入数据
-with ThreadPoolExecutor(max_workers=5) as executor:
-    futures = []
-    for batchNO in batchNOs:
-        dateTime = datetime.now()  # 随机生成时间
-        for turbineNO in turbineNOs:
-            for i in range(0, num_records, batch_size):
-                dateTime += timedelta(seconds=i) 
-                print(f'batchNO: {batchNO} turbineNO: {turbineNO} Scheduling records {i} to {i + batch_size}')
-                futures.append(executor.submit(insert_data, batchNO, turbineNO, i, i + batch_size, dateTime))
-
-    for future in as_completed(futures):
-        try:
-            dateTime = future.result()
-        except Exception as exc:
-            print(f'Generated an exception: {exc}')
-
-print('Data insertion complete.')

+ 0 - 50
dataAnalysisBusiness/demo/testLargeComponetTemp.py

@@ -1,50 +0,0 @@
-import pandas as pd
-import numpy as np
-import plotly.graph_objects as go
-
-# 示例数据
-data = {
-    'power': np.random.uniform(0, 500, 1000),  # 假设功率范围是 0 到 500 kW
-    'time': pd.date_range(start='2024-01-01', periods=240, freq='H'),  # 假设时间范围是 1000 小时
-    'temperature': np.random.uniform(0, 100, 1000)  # 假设温度范围是 0 到 100 °C
-}
-df = pd.DataFrame(data)
-
-# 功率分仓
-df['power_bin'] = (df['power'] // 25) * 25
-
-# 时间分仓(按天)
-df['date'] = df['time'].dt.date
-
-# 温度分仓
-df['temperature_bin'] = (df['temperature'] // 5) * 5
-
-# 计算每个分仓的出现次数
-heatmap_data = df.groupby(['power_bin', 'date', 'temperature_bin']).size().reset_index(name='count')
-
-# 创建3D热图
-fig = go.Figure(data=[
-    go.Scatter3d(
-        x=heatmap_data['power_bin'],
-        y=heatmap_data['date'],
-        z=heatmap_data['temperature_bin'],
-        mode='markers',
-        marker=dict(
-            size=5,
-            color=heatmap_data['count'],
-            colorscale='Viridis',
-            colorbar=dict(title='Count'),
-        )
-    )
-])
-
-fig.update_layout(
-    title='功率,时间和温度的3D热图',
-    scene=dict(
-        xaxis_title='功率 (kW)',
-        yaxis_title='日期',
-        zaxis_title='温度 (°C)'
-    )
-)
-
-fig.show()

+ 0 - 57
dataAnalysisBusiness/demo/testLegend.py

@@ -1,57 +0,0 @@
-import plotly.graph_objects as go
-import plotly.io as pio
-
-# 创建示例数据
-x = [1, 2, 3, 4, 5]
-y1 = [10, 15, 13, 17, 16]
-y2 = [16, 5, 11, 9, 10]
-y3 = [6, 8, 9, 5, 4]
-y4 = [10, 15, 13, 17, 16]
-y5 = [16, 5, 11, 9, 10]
-y6 = [6, 8, 9, 5, 4]
-y7 = [10, 15, 13, 17, 16]
-y8 = [16, 5, 11, 9, 10]
-y9 = [6, 8, 9, 5, 4]
-y10 = [10, 15, 13, 17, 16]
-y11 = [16, 5, 11, 9, 10]
-y12 = [6, 8, 9, 5, 4]
-y13 = [10, 15, 13, 17, 16]
-y14 = [16, 5, 11, 9, 10]
-y15 = [6, 8, 9, 5, 4]
-
-# 创建图表
-fig = go.Figure()
-
-# 添加多个系列
-fig.add_trace(go.Scatter(x=x, y=y1, mode='lines+markers', name='Series 1'))
-fig.add_trace(go.Scatter(x=x, y=y2, mode='lines+markers', name='Series 2'))
-fig.add_trace(go.Scatter(x=x, y=y3, mode='lines+markers', name='Series 3'))
-fig.add_trace(go.Scatter(x=x, y=y4, mode='lines+markers', name='Series 4'))
-fig.add_trace(go.Scatter(x=x, y=y5, mode='lines+markers', name='Series 5'))
-fig.add_trace(go.Scatter(x=x, y=y6, mode='lines+markers', name='Series 6'))
-fig.add_trace(go.Scatter(x=x, y=y7, mode='lines+markers', name='Series 7'))
-fig.add_trace(go.Scatter(x=x, y=y8, mode='lines+markers', name='Series 8'))
-fig.add_trace(go.Scatter(x=x, y=y9, mode='lines+markers', name='Series 9'))
-fig.add_trace(go.Scatter(x=x, y=y10, mode='lines+markers', name='Series 10'))
-fig.add_trace(go.Scatter(x=x, y=y11, mode='lines+markers', name='Series 11'))
-fig.add_trace(go.Scatter(x=x, y=y12, mode='lines+markers', name='Series 12'))
-fig.add_trace(go.Scatter(x=x, y=y13, mode='lines+markers', name='Series 13'))
-fig.add_trace(go.Scatter(x=x, y=y14, mode='lines+markers', name='Series 14'))
-fig.add_trace(go.Scatter(x=x, y=y15, mode='lines+markers', name='Series 15'))
-
-# 更新布局以解决图例展示不全问题
-fig.update_layout(
-    legend=dict(
-        orientation="h",
-        yanchor="bottom",
-        y=1.02,
-        xanchor="right",
-        x=1
-    ),
-    margin=dict(b=40, t=40)
-)
-
-# 使用 write_image 方法输出图形
-pio.write_image(fig, 'line_chart.png')
-
-fig.show()

+ 0 - 12
dataAnalysisBusiness/demo/testPandas.py

@@ -1,12 +0,0 @@
-import pandas as pd
-import numpy as np
-
-df=pd.read_csv(r"E:/BaiduNetdiskDownload/DTSXJK_WJWFC_Q1_W001_2023-10-01_last_1seconds.csv",header=1)
-
-print(df.head())
-
-
-df["WNAC_WdDir"]=df["WNAC_WdDir"].astype("Float32")
-df["弧度"]=df["WNAC_WdDir"]/360*2*np.pi
-
-print(df.head())

+ 0 - 0
dataContract/algorithmContract/__init__.py


+ 0 - 169
dataContract/algorithmContract/confBusiness.py

@@ -1,169 +0,0 @@
-from enum import Enum
-
-# 全局变量
-charset_unify = 'utf-8'
-CSVSuffix = '.csv'
-Const_Output_Total='total'
-Const_TimeGranularity_Second="second"
-Const_TimeGranularity_Minute="minute"
-
-### SCADA ###
-Field_DeviceCode="wind_turbine_number" #风机编号
-Field_ActiverPower="active_power"       #有功功率
-Field_GeneratorTorque = "actual_torque" #实际扭矩
-Field_GeneratorSpeed = "generator_speed" #发电机转速
-Field_RotorSpeed = "rotor_speed"       #风轮转速
-Field_WindSpeed = "wind_velocity"     #风速
-Field_WindDirection="true_wind_direction"  # 绝对风向
-Field_AngleIncluded = "yaw_error1"  # 对风角度
-Field_PitchAngel1="pitch_angle_blade_1"  # 桨距角1
-Field_PitchAngel2="pitch_angle_blade_2"  # 桨距角2
-Field_PitchAngel3="pitch_angle_blade_3"  # 桨距角3
-Field_Time="time_stamp"  # .strftime('%Y-%m-%d %H:%M:%S')
-Field_EnvTemp="outside_cabin_temperature"  # 环境温度
-Field_NacTemp="cabin_temperature"  # 机舱内温度
-Field_NacPos="cabin_position"  # 机舱位置
-Field_GeneratorDE="generatordrive_end_bearing_temperature"  # 发电机驱动端轴承温度/发电机低速轴温度
-Field_GeneratorNDE="generatornon_drive_end_bearing_temperature"  # 发电机非驱动端轴承温度/发电机高速轴温度
-Field_MainBearTemp="main_bearing_temperature"  # 主轴承轴承温度
-Field_GbHsBearTemp="gearbox_high_speed_shaft_bearing_temperature"  # 齿轮箱高速轴轴承温度
-Field_GbMsBearTemp="gearboxmedium_speed_shaftbearing_temperature"  # 齿轮箱中速轴轴承温度
-Field_GbLsBearTemp="gearbox_low_speed_shaft_bearing_temperature"  # 齿轮箱低速轴轴承温度
-Field_GenWiTemp1="generator_winding1_temperature"  # 发电机绕组1温度
-Field_GenWiTemp2="generator_winding2_temperature"  # 发电机绕组2温度
-Field_GenWiTemp3="generator_winding3_temperature"  # 发电机绕组3温度
-Field_GbOilTemp="gearbox_oil_temperature"  # 齿轮箱油温
-Field_PCA="power_curve_available"  # 功率曲线可用
-Field_APSet="set_value_of_active_power"  # 有功功率设定值
-Field_NacFbVib="front_back_vibration_of_the_cabin"  # 机舱前后振动
-Field_NacLrVib="side_to_side_vibration_of_the_cabin"  # 机舱左右振动
-Field_StatusOfTurbine="wind_turbine_status"  # 风机状态1
-
-# 故障/告警数据
-Field_DeviceCode="wind_turbine_number" #风机编号
-Field_DeviceName="wind_turbine_name"#风机名称
-Field_FaultTime="time_diff"#故障时长
-Field_FaultDetail="fault_detail"#故障类型
-Field_BeginTime="begin_time"#故障开始时间
-Field_EndTime="end_time"#故障开始时间
-
-
-### 二次计算 ###
-Field_Cp = "cp"
-Field_CpMedian = "cp_median"
-Field_CpMax = "cp_max"
-Field_CpMin = "cp_min"
-Field_TSR = "tsr"
-Field_TSRModified = "tsr_modified"
-Field_TSRMax = "tsr_max"
-Field_TSRMin = "tsr_min"
-Field_TSRMedian = "tsr_median"
-Field_Year="year"
-Field_Month="month"
-Field_UnixYearMonth="monthIntTime"
-Field_YearMonth = "year-month"
-Field_YearMonthDay = "year-month-day"
-Field_PowerFloor= "power_floor"
-Field_Power="power"
-Field_WindSpeedFloor= "wind_speed_floor"
-Field_YawError="yaw_error1"
-Field_LableFlag="lab"
-
-### 风场信息表 	wind_field ###
-Field_PowerFarmCode="field_code"
-Field_PowerFarmName="field_name"
-Field_ProvinceID="province_id"
-Field_ProvinceName="province_name"
-Field_CityID="city_id"
-Field_CityName="city_name"
-Field_CompanyCode="company_code"  # 企业编号
-Field_CreateBy="create_by"  # 创建人
-Field_CreateTime="create_time"  # 创建时间
-Field_AirDensity="density"  # 空气密度-合同功率曲线
-Field_NumberOfTurbine="engine_number"  # 风机数量
-Field_PowerContractURL="power_contract_url"  # 合同功率曲线地址
-Field_RatedPowerSUM="rated_capacity_number"  # 总额定容量-sum机组
-
-### 风机机组信息表 	wind_engine_group ###
-Field_NameOfTurbine="engine_name"
-Field_CodeOfTurbine="engine_code"
-Field_RatedPower="rated_capacity"  # 额定功率
-Field_RatedWindSpeed="rated_wind_speed" # 额定风速
-Field_Elevation="elevation_height"
-Field_HubHeight="hub_height"  # 轮毂高度
-Field_Latitude="latitude"  # wind_engine_mill 也有
-Field_Longitude="longitude"  # wind_engine_mill 也有
-Field_Sightcing="sightcing"  # 是否标杆风机
-
-### 风机机型信息表	wind_engine_mill ###
-Field_RotorDiameter="rotor_diameter" # 叶轮直径
-Field_Brand="brand"  # 品牌名称(风机)
-Field_Combination="combination"  # 组合字段
-Field_MotionType="curved_motion_type"  # 驱动方式
-Field_DelState="del_state"  # 删除方式
-Field_MachineTypeCode="machine_type_code"  # 机型型号
-Field_MillTypeCode="mill_type_code"  # 编号
-Field_ManufacturerCode="manufacturer_code"  # 厂商编号
-Field_ManufacturerName="manufacturer_name"  # 厂商名称
-Field_PowerCriterionURL="power_criterion_url"  # 标准功率曲线地址
-Field_TowerHeight="tower_height"
-Field_VaneLong="vane_long"  # 叶片长度
-Field_RSR="rotational_speed_ratio"  # 传动比-转速比
-Field_CutInWS="rated_cut_in_windspeed"  # 切入风速
-Field_CutOutWS="rated_cut_out_windspeed" # 切出风速
-
-### 测风塔信息 	anemometer_tower -> anemometer_tower_relation  ###
-Field_AnemometerCode="anemometer_code"
-Field_AnemometerName="anemometer_name"
-
-### 数据转换
-Field_TransferType="transfer_type"  # 转换类型 (枚举值:second、minute)
-Field_TimeGranularity="time_granularity"  # 时间粒度(秒)
-
-Field_State="state"
-Field_UpdateTime="update_time"
-Field_UpdateBy="update_by"
-
-Field_Return_TypeAnalyst="typeAnalyst"
-Field_Return_BatchCode="batch_code"
-Field_Return_FilePath="localFilePath"
-Field_Return_IsSaveDatabase:bool=True
-
-class ErrorState(Enum):
-    NotErr=0
-    Err=1     # 异常
-
-class AnalysisState(Enum):
-    NotAnalyzed=-1 # 未分析
-    RequstQueue=10 # 请求队列中
-    Analyzing=20    # 分析中
-    Analyzed=30     # 已分析
-
-class CustomError(Exception):
-    ERROR_CODES = {
-        -1:"未知异常,请联系技术人员排查问题",
-        100: "未获得业务基础数据,或基础数据缺失",
-        101: "未获得业务数据,或业务数据与基础数据不匹配",
-        102: "缺少风电机组运行数据",
-        103: "算法模型未输出结果",
-        104: "缺失场站基础信息",
-        105: "缺失机组基础信息",
-        106: "缺失场站的数据批次信息",
-        107: "场站存在未配置机型信息(额定风速、切入风速、切出风速)的机组",
-        108: "缺失机组的合同功率曲线信息",
-        109: "场站存在未配置额定功率的机组",
-        110: "场站未配置空气密度",
-        111: "场站所属风电机组存在未配置叶轮直径",
-        112: "SCADA数据不包含风速或其全无值",
-        113: "SCADA数据不包含有功功率或其全无值",
-        114: "机组未配置机型信息",
-        115: "机组未配置基础信息",
-    }
-
-    def __init__(self, code,msg=""):
-        self.code = code
-        self.message =f"{msg} {self.ERROR_CODES.get(code, CustomError.ERROR_CODES.get(-1))}"
-        super().__init__(self.message)
-
-    def __str__(self):
-        return f"[Error Code: {self.code}] {self.message}"

+ 0 - 14
dataContract/algorithmContract/configAnalysis.py

@@ -1,14 +0,0 @@
-class ConfigAnalysis:
-    def __init__(self, package: str, className: str, methodName: str,scada:str):
-        self.package = package
-        self.className = className
-        self.methodName = methodName
-        self.scada=scada
-
-    def to_dict(self):
-        return {
-            "package": self.package,
-            "className": self.className,
-            "methodName": self.methodName,
-            "scada":self.scada
-        }

+ 0 - 3
dataContract/algorithmContract/const.py

@@ -1,3 +0,0 @@
-
-DATABASE_BusinessFoundationDb="businessFoundationDb"
-DATABASE_businessDb="businessDb"

+ 0 - 199
dataContract/algorithmContract/contract.py

@@ -1,199 +0,0 @@
-import traceback
-import json
-from algorithmContract.dataContractType import DataContractType
-from algorithmContract.customDataContract import DataContract
-from algorithmContract.dataSource import DataSource
-from algorithmContract.dataFilter import DataFilter
-from algorithmContract.customFilter import CustomFilter
-from algorithmContract.configAnalysis import ConfigAnalysis
-from algorithmContract.graphSet import GraphSet
-
-
-class Contract:
-    def __init__(self, contractType: DataContractType, dataContract: DataContract):
-        self.contractType = contractType
-        self.dataContract = dataContract
-
-    def to_dict(self):
-        return {
-            "dataContractType": self.contractType.to_dict(),
-            "dataContract": self.dataContract.to_dict()
-        }
-
-
-def LoadAnalysisInput(jsonString: str):
-    # Load the JSON string into a Python dictionary
-    try:
-        jsonData = json.loads(jsonString)
-
-        jsonDataType = jsonData["dataContractType"]
-        contractType = DataContractType(
-            jsonDataType["type"], jsonDataType["version"])
-
-        jsonDataContract = jsonData["dataContract"]
-
-        autoOrManual = jsonDataContract["autoOrManual"]
-        # Initialize the entity classes using the JSON data
-        # data_source = DataSource(jsonDataContract["dataSource"]["scada"])
-
-        custom_filters = {}
-        for key, value in jsonDataContract["dataFilter"]["customFilter"].items():
-            custom_filter = CustomFilter(value.get("min"), value.get("max"))
-            custom_filters[key] = custom_filter
-            
-        if (not jsonDataContract["dataFilter"]["turbines"] is None) and (not isinstance(jsonDataContract["dataFilter"]["turbines"],list)):
-              raise Exception("输入参数中机组数据类型不合法.")  
-
-        data_filter = DataFilter(
-            jsonDataContract["dataFilter"]["powerFarmID"],
-            jsonDataContract["dataFilter"]["turbines"],
-            jsonDataContract["dataFilter"]["dataBatchNum"],
-            jsonDataContract["dataFilter"]["beginTime"],
-            jsonDataContract["dataFilter"]["endTime"],
-            jsonDataContract["dataFilter"]["excludingMonths"],
-            custom_filters
-        )
-
-        config_analysis = []
-        for item in jsonDataContract["configAnalysis"]:
-            analysis = ConfigAnalysis(
-                item["package"], item["className"], item["methodName"], item["scada"])
-            config_analysis.append(analysis)
-
-        graph_sets = {}
-        for key, value in jsonDataContract["graphSets"].items():
-            graph_set = GraphSet(value["step"], value["min"], value["max"])
-            graph_sets[key] = graph_set
-
-        data_contract = DataContract(autoOrManual,
-                                     data_filter, config_analysis, graph_sets)
-
-        contract = Contract(contractType, data_contract)
-    except Exception as e:
-        print("exception: {}".format(e))
-        traceback.print_exc()  # 打印异常的堆栈跟踪
-        raise e
-
-    return contract
-
-
-# JSON string representing the data contract
-jsonString = '''
-{
-	"dataContractType": {
-		"type": "analysisExecuteOrder",
-		"version": "1.2.0"
-	},
-	"dataContract": {
-		"autoOrManual":"automatic",
-		"dataSource": {
-			"scada": "second"
-		},
-		"dataFilter": {
-			"powerFarmID": "010-00001",
-			"turbines": [
-				"010-00001-0001",
-				"010-00001-0002"
-			],
-			"dataBatchNum": "B2024042211-0",
-			"beginTime": "2023-01-01 00:00:00",
-			"endTime": "2023-12-31 23:59:59",
-			"excludingMonths": [
-				"2023-12",
-				"2023-09"
-			],
-			"customFilter": {
-				"valueWindSpeed": {
-					"min": 3.0,
-					"max": 25.0
-				},
-				"valuePitchAngle": {
-					"min": 2,
-					"max": null
-				},
-				"valueActivePower": {
-					"min": 10,
-					"max": 2500
-				},
-				"valueGeneratorSpeed": {
-					"min": 10,
-					"max": 2500
-				}
-			}
-		},
-		"configAnalysis": [
-			{
-				"package": "algorithm.powerCurveAnalyst",
-				"className": "PowerCurveAnalyst",
-				"methodName": "executeAnalysis"
-			},
-			{
-				"package": "algorithm.powerScatter2DAnalyst",
-				"className": "PowerScatter2DAnayst",
-				"methodName": "executeAnalysis"
-			},
-			{
-				"package": "algorithm.powerScatterAnalyst",
-				"className": "PowerScatterAnalyst",
-				"methodName": "executeAnalysis"
-			},
-			{
-				"package": "algorithm.windSpeedFrequencyAnalyst",
-				"className": "WindSpeedFrequencyAnalyst",
-				"methodName": "executeAnalysis"
-			},
-			{
-				"package": "algorithm.generatorSpeedPowerAnalyst",
-				"className": "GeneratorSpeedPowerAnalyst",
-				"methodName": "executeAnalysis"
-			}
-		],
-		"graphSets": {
-			"generatorSpeed": {
-				"step": 200,
-				"min": 1000,
-				"max": 2000
-			},
-			"generatorTorque": {
-				"step": 2000,
-				"min": 0,
-				"max": 12000
-			},
-			"cp": {
-				"step": 0.5,
-				"min": 0,
-				"max": 2
-			},
-			"tsr": {
-				"step": 5,
-				"min": 0,
-				"max": 30
-			},
-			"pitchAngle": {
-				"step": 1,
-				"min": -1,
-				"max": 20
-			},
-			"activePower": {
-				"step": 250,
-				"min": 0,
-				"max": 2000
-			}
-		}
-	}
-}
-'''
-
-
-def load():
-    return LoadAnalysisInput(jsonString)
-
-
-def Analysis(contract: Contract):
-    jsonString = ""
-    try:
-        contractDict = contract.to_dict()  # 先转换为字典
-        jsonString = json.dumps(contractDict)
-    except:
-        traceback.print_exc()
-    return jsonString

+ 0 - 35
dataContract/algorithmContract/customDataContract.py

@@ -1,35 +0,0 @@
-import types
-from algorithmContract.dataSource import DataSource
-from algorithmContract.dataFilter import DataFilter
-from algorithmContract.customFilter import CustomFilter
-from algorithmContract.configAnalysis import ConfigAnalysis
-from algorithmContract.graphSet import GraphSet
-
-
-class DataContract:
-    def __init__(self,autoOrManual:str , dataFilter: DataFilter, configAnalysis: list[ConfigAnalysis], graphSets: dict[str,GraphSet]):
-        self.autoOrManual=autoOrManual
-        # self.dataSource = dataSource
-        self.dataFilter = dataFilter
-        self.configAnalysis = configAnalysis
-        self.graphSets = graphSets
-
-    def _graph_sets_to_dict(self, graph_sets):
-        result = {}
-        for key, value in graph_sets.items():
-            if isinstance(value, GraphSet):
-                result[key] = value.to_dict()
-            elif isinstance(value, dict):
-                result[key] = self._graph_sets_to_dict(value)
-            else:
-                raise TypeError(f"Unsupported type for graph set value: {type(value)}")
-        return result
-
-    def to_dict(self):
-        return {
-            "autoOrManual":self.autoOrManual,
-            # "dataSource": self.dataSource.to_dict(),
-            "dataFilter": self.dataFilter.to_dict(),            
-            "configAnalysis": [analysis.to_dict() for analysis in self.configAnalysis],  # 修改这一行
-            "graphSets": {key: graph.to_dict() for key, graph in self.graphSets.items()}  # 假设 graphSets 是一个字典 # self._graph_sets_to_dict(self.graphSets) # 
-        }

+ 0 - 7
dataContract/algorithmContract/customFilter.py

@@ -1,7 +0,0 @@
-class CustomFilter:
-    def __init__(self, min_val:float, max_val:float):
-        self.min = min_val
-        self.max = max_val
-
-    def to_dict(self):
-        return {"min": self.min,"max":self.max}

+ 0 - 7
dataContract/algorithmContract/dataContractType.py

@@ -1,7 +0,0 @@
-class DataContractType:
-    def __init__(self, type:str, version:str):
-        self.type = type
-        self.version = version
-    
-    def to_dict(self):
-        return {"type": self.type,"version":self.version}

+ 0 - 23
dataContract/algorithmContract/dataFilter.py

@@ -1,23 +0,0 @@
-from algorithmContract.customFilter import CustomFilter
-
-
-class DataFilter:
-    def __init__(self, powerFarmID: str, turbines: list, dataBatchNum: str, beginTime: str, endTime: str, excludingMonths: list, customFilter: dict[str,CustomFilter]):
-        self.powerFarmID = powerFarmID
-        self.turbines = turbines
-        self.dataBatchNum = dataBatchNum
-        self.beginTime = beginTime
-        self.endTime = endTime
-        self.excludingMonths = excludingMonths
-        self.customFilter = customFilter
-
-    def to_dict(self):
-        return {
-            "powerFarmID": self.powerFarmID,
-            "turbines": self.turbines,
-            "dataBatchNum": self.dataBatchNum,
-            "beginTime": self.beginTime,
-            "endTime": self.endTime,
-            "excludingMonths": self.excludingMonths,
-            "customFilter": {key: filter.to_dict() for key, filter in self.customFilter.items()}
-        }

+ 0 - 6
dataContract/algorithmContract/dataSource.py

@@ -1,6 +0,0 @@
-class DataSource:
-    def __init__(self, scada:str):
-        self.scada = scada
-        
-    def to_dict(self):
-        return {"scada": self.scada}

+ 0 - 8
dataContract/algorithmContract/graphSet.py

@@ -1,8 +0,0 @@
-class GraphSet:
-    def __init__(self, step:float, min_val:float, max_val:float):
-        self.step = step
-        self.min = min_val
-        self.max = max_val
-
-    def to_dict(self):
-        return {"step": self.step,"min":self.min,"max":self.max}

+ 0 - 107
dataContract/algorithmContract/testDataContract.py

@@ -1,107 +0,0 @@
-import json
-from algorithmContract.contract import LoadAnalysisInput,Analysis
-
-
-
-# JSON string representing the data contract
-jsonString = '''
-{
-	"dataContractType": {
-		"type": "analysisExecuteOrder",
-		"version": "1.2.0"
-	},
-	"dataContract": {
-		"autoOrManual": "automatic",
-		"dataFilter": {
-			"powerFarmID": "WOF01000002",
-			"turbines": null,
-			"dataBatchNum": "zhaoyuan_20240528",
-			"beginTime": "2023-01-01 00:00:00",
-			"endTime": "2024-12-31 23:59:59",
-			"excludingMonths": [
-				"2023-12",
-				"2023-09"
-			],
-			"customFilter": {
-				"valueWindSpeed": {
-					"min": 3.0,
-					"max": 25.0
-				},
-				"valuePitchAngle": {
-					"min": 2,
-					"max": null
-				},
-				"valueActivePower": {
-					"min": 10,
-					"max": 2500
-				},
-				"valueGeneratorSpeed": {
-					"min": 10,
-					"max": 2500
-				}
-			}
-		},
-		"configAnalysis": [
-			{
-				"package": "algorithm.temperatureLargeComponentsAnalyst",
-				"className": "TemperatureLargeComponentsAnalyst",
-				"methodName": "executeAnalysis",
-				"scada": "minute"
-			}
-		],
-		"graphSets": {
-			"directDrive": {
-				"generatorSpeed": {
-					"step": 5,
-					"min": 0,
-					"max": 30
-				},
-				"generatorTorque": {
-					"step": 10000,
-					"min": 0,
-					"max": 100000
-				}
-			},
-			"indirectDrive": {
-				"generatorSpeed": {
-					"step": 200,
-					"min": 1000,
-					"max": 2000
-				},
-				"generatorTorque": {
-					"step": 2000,
-					"min": 0,
-					"max": 12000
-				}
-			},
-			"tsr": {
-				"step": 5,
-				"min": 0,
-				"max": 30
-			},
-			"pitchAngle": {
-				"step": 1,
-				"min": -1,
-				"max": 20
-			},
-			"activePower": {
-				"step": 250,
-				"min": 0,
-				"max": 2000
-			}
-		}
-	}
-}
-'''
-
-
-data=LoadAnalysisInput(jsonString)
-
-print(data.dataContract.graphSets["directDrive"]["generatorSpeed"].step)
-print(data.dataContract.graphSets["directDrive"]["generatorTorque"].step)
-
-string=Analysis(data)
-print(string)
-
-
-