chenhongyan1989 10 mesiacov pred
rodič
commit
241972368e
50 zmenil súbory, kde vykonal 6841 pridanie a 2 odobranie
  1. 0 0
      dataAnalysisBehavior/behavior/__init__.py
  2. 11 0
      dataAnalysisBehavior/behavior/analyst.py
  3. 9 0
      dataAnalysisBehavior/behavior/analystNotFilter.py
  4. 9 0
      dataAnalysisBehavior/behavior/analystWithGoodBadLimitPoint.py
  5. 10 0
      dataAnalysisBehavior/behavior/analystWithGoodBadPoint.py
  6. 9 0
      dataAnalysisBehavior/behavior/analystWithGoodPoint.py
  7. 153 0
      dataAnalysisBehavior/behavior/dalAnalyst.py
  8. 435 0
      dataAnalysisBehavior/behavior/dataMarker.py
  9. 540 0
      dataAnalysisBehavior/behavior/dataMarkerOfScada3.py
  10. 177 0
      dataAnalysisBehavior/behavior/outputProcessor.py
  11. 0 0
      dataAnalysisBehavior/common/__init__.py
  12. 106 0
      dataAnalysisBehavior/common/appConfig.py
  13. 20 0
      dataAnalysisBehavior/common/turbineInfo.py
  14. 4 0
      dataAnalysisBusiness/__init__.py
  15. 1 0
      dataAnalysisBusiness/algorithm/__init__.py
  16. 269 0
      dataAnalysisBusiness/algorithm/cpWindSpeedAnalyst.py
  17. 21 0
      dataAnalysisBusiness/algorithm/dataIntegrityOfMinuteAnalyst.py
  18. 271 0
      dataAnalysisBusiness/algorithm/dataIntegrityOfSecondAnalyst.py
  19. 43 0
      dataAnalysisBusiness/algorithm/formula_cp.py
  20. 168 0
      dataAnalysisBusiness/algorithm/pitchGeneratorSpeedAnalyst.py
  21. 382 0
      dataAnalysisBusiness/algorithm/powerCurveAnalyst.py
  22. 107 0
      dataAnalysisBusiness/algorithm/powerOscillationAnalyst.py
  23. 278 0
      dataAnalysisBusiness/algorithm/ratedPowerWindSpeedAnalyst.py
  24. 138 0
      dataAnalysisBusiness/algorithm/ratedWindSpeedAnalyst.py
  25. 319 0
      dataAnalysisBusiness/algorithm/temperatureEnvironmentAnalyst.py
  26. 365 0
      dataAnalysisBusiness/algorithm/tsrAnalyst.py
  27. 379 0
      dataAnalysisBusiness/algorithm/tsrWindSpeedAnalyst.py
  28. 450 0
      dataAnalysisBusiness/demo/SCADA_10min_category_0.py
  29. 507 0
      dataAnalysisBusiness/demo/SCADA_10min_category_1.py
  30. 193 0
      dataAnalysisBusiness/demo/SCADA_10min_category_2.py
  31. 632 0
      dataAnalysisBusiness/demo/SCADA_10min_category_3.py
  32. 62 0
      dataAnalysisBusiness/demo/scatter3D_plotly.py
  33. 50 0
      dataAnalysisBusiness/demo/scatter3D_plotly_make_subplots.py
  34. 19 0
      dataAnalysisBusiness/demo/test.py
  35. 113 0
      dataAnalysisBusiness/demo/testDataProcess.py
  36. 12 0
      dataAnalysisBusiness/demo/testPandas.py
  37. 0 0
      dataContract/algorithmContract/__init__.py
  38. 169 0
      dataContract/algorithmContract/confBusiness.py
  39. 14 0
      dataContract/algorithmContract/configAnalysis.py
  40. 3 0
      dataContract/algorithmContract/const.py
  41. 199 0
      dataContract/algorithmContract/contract.py
  42. 35 0
      dataContract/algorithmContract/customDataContract.py
  43. 7 0
      dataContract/algorithmContract/customFilter.py
  44. 7 0
      dataContract/algorithmContract/dataContractType.py
  45. 23 0
      dataContract/algorithmContract/dataFilter.py
  46. 6 0
      dataContract/algorithmContract/dataSource.py
  47. 8 0
      dataContract/algorithmContract/graphSet.py
  48. 107 0
      dataContract/algorithmContract/testDataContract.py
  49. 1 1
      repositoryZN/utils/minioUtil/data/upload/path-to-your-local-file2.txt
  50. 0 1
      wtoaamapi/apps/viewDemo/viewUser.py

+ 0 - 0
dataAnalysisBehavior/behavior/__init__.py


+ 11 - 0
dataAnalysisBehavior/behavior/analyst.py

@@ -0,0 +1,11 @@
+from .baseAnalyst import BaseAnalyst
+import os
+import pandas as pd
+import numpy as np
+from utils.directoryUtil import DirectoryUtil as dir
+from algorithmContract.confBusiness import *
+
+
+class Analyst(BaseAnalyst):
+    def typeAnalyst(self):
+        pass

+ 9 - 0
dataAnalysisBehavior/behavior/analystNotFilter.py

@@ -0,0 +1,9 @@
+import pandas as pd
+from algorithmContract.contract import Contract
+
+from .analyst import Analyst
+
+
+class AnalystNotFilter(Analyst):
+    def filterCommon(self,dataFrame:pd.DataFrame, conf: Contract):
+        return dataFrame

+ 9 - 0
dataAnalysisBehavior/behavior/analystWithGoodBadLimitPoint.py

@@ -0,0 +1,9 @@
+from algorithmContract.confBusiness import *
+
+from .analyst import Analyst
+
+
+class AnalystWithGoodBadLimitPoint(Analyst):
+
+    def selectLabCondition(self,conditions: list[str]):
+        conditions.append(f"{Field_LableFlag} in (0,1,2,3,4)")

+ 10 - 0
dataAnalysisBehavior/behavior/analystWithGoodBadPoint.py

@@ -0,0 +1,10 @@
+from algorithmContract.confBusiness import *
+
+from .analyst import Analyst
+
+
+class AnalystWithGoodBadPoint(Analyst):
+
+    def selectLabCondition(self, conditions: list[str]):
+        conditions.append(f"{Field_LableFlag} in (0,1,2,3)")
+

+ 9 - 0
dataAnalysisBehavior/behavior/analystWithGoodPoint.py

@@ -0,0 +1,9 @@
+from algorithmContract.confBusiness import *
+
+from .analyst import Analyst
+
+
+class AnalystWithGoodPoint(Analyst):
+
+    def selectLabCondition(self, conditions: list[str]):
+        conditions.append(f"{Field_LableFlag} = 0")

+ 153 - 0
dataAnalysisBehavior/behavior/dalAnalyst.py

@@ -0,0 +1,153 @@
+from logging import Logger
+import pandas as pd
+from common.commonBusiness import CommonBusiness
+from algorithmContract.const import *
+from algorithmContract.confBusiness import *
+from utils.rdbmsUtil.databaseUtil import DatabaseUtil
+from sqlalchemy.orm import Session
+from sqlalchemy.sql import text
+
+
+class DALAnalyst:
+    def __init__(self, logger: Logger, dbUtil: dict[str, DatabaseUtil]) -> None:
+        self.logger = logger
+        self.dbUtil = dbUtil
+
+    def loadPowerFarmInfos(self, powerFarmID: str):
+        """
+        获取场站基础信息
+        """
+        dbUtil: DatabaseUtil = self.dbUtil[DATABASE_BusinessFoundationDb]
+
+        with dbUtil.session_scope() as session:
+            # 执行原生 SQL 查询
+            result = session.execute(text(
+                f"SELECT field_code,company_code,field_name,density,state,engine_number,rated_capacity_number,province_id,province_name,city_id,city_name,longitude,latitude,elevation_height,power_contract_url FROM wind_field where del_state=0 and field_code='{powerFarmID}'")).fetchall()
+            # 获取查询结果的列名
+            columns = ['field_code', 'company_code', 'field_name', 'density', 'state', 'engine_number', 'rated_capacity_number',
+                       'province_id', 'province_name', 'city_id', 'city_name', 'longitude', 'latitude', 'elevation_height', 'power_contract_url']
+
+            # 将查询结果转换为 DataFrame
+            dataFrame = pd.DataFrame(result, columns=columns)
+
+        return dataFrame
+
+    def loadTurbineInfos(self, powerFarmID: str):
+        """
+        获取风电机组基础信息
+        """
+        dbUtil: DatabaseUtil = self.dbUtil[DATABASE_BusinessFoundationDb]
+
+        with dbUtil.session_scope() as session:
+            # 执行原生 SQL 查询
+            result = session.execute(text(
+                f"SELECT field_code,engine_code,engine_name,mill_type_code,rated_capacity,elevation_height,hub_height,state,longitude,latitude,sightcing FROM wind_engine_group where del_state=0 and field_code='{powerFarmID}'")).fetchall()
+            # 获取查询结果的列名
+            columns = ['field_code', 'engine_code', 'engine_name', Field_MillTypeCode, 'rated_capacity', 'elevation_height', 'hub_height',
+                       'state', 'longitude', 'latitude', 'sightcing']
+
+            # 将查询结果转换为 DataFrame
+            dataFrame = pd.DataFrame(result, columns=columns)
+
+        return dataFrame
+
+    def loadDataTransfer(self, powerFarmID: str, dataBatchNum: str):
+        """
+        获取数据操作信息
+        """
+        dbUtil: DatabaseUtil = self.dbUtil[DATABASE_BusinessFoundationDb]
+
+        with dbUtil.session_scope() as session:
+            # 执行原生 SQL 查询
+            result = session.execute(text(
+                f"SELECT field_code, batch_code, engine_count, transfer_type, transfer_addr, time_granularity FROM data_transfer where field_code='{powerFarmID}' and batch_code='{dataBatchNum}'  ")).fetchall()
+            # 获取查询结果的列名
+            columns = ['field_code', 'batch_code', 'engine_count',
+                       'transfer_type', 'transfer_addr', 'time_granularity']
+
+            # 将查询结果转换为 DataFrame
+            dataFrame = pd.DataFrame(result, columns=columns)
+
+        return dataFrame
+
+    def loadTurbineModelInfos(self, turbineModels: list):
+        """
+        获取型号基础信息
+        """
+        if len(turbineModels) <= 0:
+            return pd.DataFrame()
+
+        turbineModelStr = ", ".join(
+            f"'{model}'" for model in turbineModels)  # 使用%s作为占位符,稍后可以替换为实际值
+
+        dbUtil: DatabaseUtil = self.dbUtil[DATABASE_BusinessFoundationDb]
+
+        with dbUtil.session_scope() as session:
+            # 执行原生 SQL 查询
+            result = session.execute(text(f"SELECT mill_type_code,machine_type_code,manufacturer_name,manufacturer_code,brand,tower_height,vane_long,curved_motion_type,combination,power_criterion_url,rotor_diameter,rotational_speed_ratio,rated_wind_speed,rated_cut_in_windspeed,rated_cut_out_windspeed FROM wind_engine_mill where del_state=0 and state=1 and mill_type_code in ({turbineModelStr})"
+                                          )).fetchall()
+            # 获取查询结果的列名
+            columns = [Field_MillTypeCode, Field_MachineTypeCode, Field_ManufacturerName, Field_ManufacturerCode, Field_Brand, Field_HubHeight, Field_VaneLong, Field_MotionType,
+                       Field_Combination, Field_PowerCriterionURL, Field_RotorDiameter,  Field_RSR, Field_RatedWindSpeed, Field_CutInWS, Field_CutOutWS]
+
+            # 将查询结果转换为 DataFrame
+            dataFrame = pd.DataFrame(result, columns=columns)
+
+        return dataFrame
+
+    def loadWeatherStationInfos(self, powerFarmID: str):
+        """
+        获取气象站(测风塔)基础信息
+        """
+        dbUtil: DatabaseUtil = self.dbUtil[DATABASE_BusinessFoundationDb]
+
+        with dbUtil.session_scope() as session:
+            # 执行原生 SQL 查询
+            result = session.execute(text(
+                f"select y.field_code ,x.anemometer_code ,x.anemometer_name ,x.longitude ,x.latitude  from anemometer_tower as x inner join anemometer_tower_relation as y on x.anemometer_code =y.tower_code where x.del_state=0 and x.state=1 and y.field_code='{powerFarmID}'")).fetchall()
+            # 获取查询结果的列名
+            columns = ['field_code', 'anemometer_code',
+                       'anemometer_name', 'longitude', 'latitude']
+
+            # 将查询结果转换为 DataFrame
+            dataFrame = pd.DataFrame(result, columns=columns)
+
+        return dataFrame
+
+    def processContractData(self, common: CommonBusiness, powerFarmID: str, airDensity: float, turbineModelInfo: pd.DataFrame):
+        """
+        获取合同功率曲线数据
+        """
+        dataFrameMerge = pd.DataFrame()
+        turbineModels = turbineModelInfo[Field_MillTypeCode]
+        turbineModelStr = ", ".join(f"'{model}'" for model in turbineModels)
+
+        dbUtil: DatabaseUtil = self.dbUtil[DATABASE_BusinessFoundationDb]
+
+        with dbUtil.session_scope() as session:
+            # 执行原生 SQL 查询
+            result = session.execute(text(
+                f"SELECT  field_code, mill_type_code,active_power,wind_speed as wind_velocity FROM power_word_relation_contract  where field_code='{powerFarmID}' and mill_type_code in ({turbineModelStr})")).fetchall()
+
+            # 获取查询结果的列名
+            columns = [Field_PowerFarmCode, Field_MillTypeCode,
+                       Field_ActiverPower, Field_WindSpeed]
+
+            # 将查询结果转换为 DataFrame
+            contractPowerCurves = pd.DataFrame(result, columns=columns)
+
+        grouped = contractPowerCurves.groupby(Field_MillTypeCode)
+
+        for name, group in grouped:
+            print("current turbine model :", name)
+            model = turbineModelInfo[turbineModelInfo[Field_MillTypeCode] == name]
+            if len(model) <= 0:
+                continue
+
+            dataFrame = common.calculateCp2(
+                group, airDensity, model[Field_RotorDiameter].iloc[0], Field_WindSpeed, Field_ActiverPower)
+
+            dataFrameMerge = pd.concat(
+                [dataFrameMerge, dataFrame], axis=0, sort=False)
+
+        return dataFrameMerge

+ 435 - 0
dataAnalysisBehavior/behavior/dataMarker.py

@@ -0,0 +1,435 @@
+import os
+import re
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.pyplot import MultipleLocator
+import math
+import pdb
+from algorithmContract.confBusiness import *  #将这个包里的全部加载
+
+intervalPower = 25
+intervalWindspeed = 0.25
+
+# 限功率识别策略(参考):1. 功率<1100,and  叶片角度>0.5 ; 2. 功率<1250 and 叶片角度>1.5 ; 3. 功率<1400 and 叶片角度>2.5 ;
+
+class DataMarker:
+    
+    #选取时间、风速、功率数据
+    def preprocessData(self,dataFrame:pd.DataFrame):
+        timeStamp = dataFrame[Field_Time]
+        activePower = dataFrame[Field_ActiverPower]
+        windSpeed = dataFrame[Field_WindSpeed]
+        dataFramePartOfSCADA = pd.concat([timeStamp,activePower,windSpeed], axis=1)
+        return dataFramePartOfSCADA
+    
+    #计算分仓数目
+    def calculateIntervals(self,activePowerMax, ratedPower, windSpeedCutOut):
+        binNumOfPower = math.floor((activePowerMax) / intervalPower) + 1 if (activePowerMax) >= ratedPower else math.floor(ratedPower / intervalPower)
+        binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed)
+        return binNumOfPower, binNumOfWindSpeed
+
+    def calculateTopP(self,activePowerMax,ratedPower):
+        
+        TopP = 0   
+        if activePowerMax >= ratedPower: 
+            TopP = math.floor((activePowerMax - ratedPower) / intervalPower) + 1  
+        else:  
+            TopP = 0
+        return TopP
+
+    def chooseData(self,dataFramePartOfSCADA:pd.DataFrame,dataFrame:pd.DataFrame):
+        lowLimitActivePower=10.0
+        lowLimitWindSpeed=0.0
+        # 初始化标签列
+        # SM1 = dataFramePartOfSCADA.shape 
+        # AA1 = SM1[0]  
+        # lab = [[0] for _ in range(AA1)]
+        # lab = pd.DataFrame(lab,columns=['lab'])
+        # dataFramePartOfSCADA = pd.concat([dataFramePartOfSCADA,lab],axis=1)  #在tpv后加一列标签列
+        dataFramePartOfSCADA[Field_LableFlag]=0
+        dataFramePartOfSCADA = dataFramePartOfSCADA.values
+        SM = dataFramePartOfSCADA.shape #(52561,4)
+        # SM = dataFramePartOfSCADA[((dataFramePartOfSCADA[Field_ActiverPower].notna()) & (dataFramePartOfSCADA[Field_WindSpeed].notna()))].shape #(52561,4)
+        AA = SM[0] -1
+        nCounter1 = 0 
+        DzMarch809_0 = np.zeros((AA, 3)) 
+        Point_line = np.zeros(AA, dtype=int)  
+        APower = dataFrame[Field_ActiverPower].values
+        WSpeed = dataFrame[Field_WindSpeed ].values
+
+        for i in range(AA):
+            if (APower[i] > lowLimitActivePower) & (WSpeed[i] > lowLimitWindSpeed):
+                nCounter1 += 1  
+                DzMarch809_0[nCounter1-1, 0] = WSpeed[i]  
+                DzMarch809_0[nCounter1-1, 1] = APower[i] 
+                Point_line[nCounter1-1] = i+1  
+            if APower[i] <= 10: 
+                dataFramePartOfSCADA[i,SM[1]-1] = -1
+                
+            DzMarch809 = DzMarch809_0[:nCounter1, :] 
+            
+        return DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM
+
+    def gridCount(self,binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809):  
+        # 遍历有效数据
+        XBoxNumber = np.ones((binNumOfPower, binNumOfWindSpeed),dtype=int) 
+        for i in range(nCounter1):             
+            for m in range(1, binNumOfPower + 1):  
+                if (DzMarch809[i,1] > (m - 1) * intervalPower) and (DzMarch809[i,1] <= m * intervalPower):  
+                    nWhichP = m  
+                    break  
+            for n in range(1, binNumOfWindSpeed + 1):  
+                if (DzMarch809[i, 0] > (n - 1) * intervalWindspeed) and (DzMarch809[i, 0] <= n * intervalWindspeed):  
+                    nWhichV = n  
+                    break  
+            if (nWhichP > 0) and (nWhichV > 0):  
+                XBoxNumber[nWhichP - 1][nWhichV - 1] += 1
+        for m in range(1,binNumOfPower+1):
+            for n in range(1,binNumOfWindSpeed+1):
+                XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
+        
+        return XBoxNumber
+
+    def percentageDots(self,XBoxNumber, binNumOfPower, binNumOfWindSpeed,axis):
+        
+        BoxPercent = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=float)     
+        BinSum = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed, 1), dtype=int)
+        for i in range(1,1+(binNumOfPower if axis == 'power' else binNumOfWindSpeed)):
+            for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):  
+                BinSum[i-1] = BinSum[i-1] + (XBoxNumber[i-1,m-1] if axis == 'power' else XBoxNumber[m-1,i-1])
+            for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):  
+                if BinSum[i-1]>0:
+                    if axis == 'power':
+                        BoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / BinSum[i-1])*100
+                    else:
+                        BoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / BinSum[i-1])*100
+                        
+        return BoxPercent,BinSum
+
+    def maxBoxPercentage(self,BoxPercent, binNumOfPower, binNumOfWindSpeed, axis):
+        
+        BoxMaxIndex = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = int) 
+        BoxMax = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = float)  
+        for m in range(1,(binNumOfPower if axis == 'power' else binNumOfWindSpeed)+1):
+            BoxMaxIndex[m-1] = (np.argmax(BoxPercent[m-1, :])) if axis == 'power' else (np.argmax(BoxPercent[:, m-1]))
+            BoxMax[m-1] = (np.max(BoxPercent[m-1, :]))if axis == 'power' else (np.max(BoxPercent[:, m-1]))
+
+        return BoxMaxIndex, BoxMax
+
+    def extendBoxPercent(self,m, BoxMax,TopP,BoxMaxIndex,BoxPercent,binNumOfPower,binNumOfWindSpeed):
+        
+        DotDense = np.zeros(binNumOfPower)  
+        DotDenseLeftRight = np.zeros((binNumOfPower,2))
+        DotValve = m 
+        PDotDenseSum = 0
+        for i in range(binNumOfPower - TopP):
+            PDotDenseSum = BoxMax[i] 
+            iSpreadRight = 1  
+            iSpreadLeft = 1         
+            while PDotDenseSum < DotValve:  
+                if (BoxMaxIndex[i] + iSpreadRight) < binNumOfWindSpeed-1-1:  
+                    PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] + iSpreadRight] 
+                    iSpreadRight += 1  
+                else:
+                    break             
+                if (BoxMaxIndex[i] - iSpreadLeft) > 0:  
+                    PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] - iSpreadLeft] 
+                    iSpreadLeft += 1  
+                else:  
+                    break  
+            iSpreadRight = iSpreadRight-1
+            iSpreadLeft = iSpreadLeft-1
+        
+            DotDenseLeftRight[i, 0] = iSpreadLeft 
+            DotDenseLeftRight[i, 1] = iSpreadRight 
+            DotDense[i] = iSpreadLeft + iSpreadRight + 1    
+
+        return DotDenseLeftRight
+
+    def calculatePWidth(self,binNumOfPower,TopP,DotDenseLeftRight,PBinSum):
+        
+
+        PowerLimit = np.zeros(binNumOfPower, dtype=int)  
+        WidthAverage = 0    
+        WidthAverage_L = 0 
+        nCounter = 0  
+        PowerLimitValve = 6    
+        N_Pcount = 20  
+        for i in range(binNumOfPower - TopP):   
+            if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):  
+                PowerLimit[i] = 1  
+            
+            if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
+                WidthAverage += DotDenseLeftRight[i, 1]
+                WidthAverage_L += DotDenseLeftRight[i,1] 
+                nCounter += 1  
+        WidthAverage /= nCounter if nCounter > 0 else 1  
+        WidthAverage_L /= nCounter if nCounter > 0 else 1   
+
+        return WidthAverage, WidthAverage_L,PowerLimit
+
+    def amendMaxBox(self,binNumOfPower,TopP,PowerLimit,BoxMaxIndex):
+        end=binNumOfPower - TopP
+        for i in range(1, binNumOfPower - TopP):  
+            if i>=end:
+                continue
+            
+            if (PowerLimit[i] == 1) and (abs(BoxMaxIndex[i] - BoxMaxIndex[i - 1]) > 5):  
+                BoxMaxIndex[i] = BoxMaxIndex[i - 1] + 1  
+
+        return BoxMaxIndex
+
+    def markBoxLimit(self,binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,BoxMaxIndex):
+        
+        BBoxRemove = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)  
+        for m in range(binNumOfPower - TopP): 
+            for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
+                BBoxRemove[m, n] = 1  
+            for n in range(int(BoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):   
+                if n-1>=binNumOfWindSpeed:
+                    continue
+                BBoxRemove[m, n-1] = 2 
+        return BBoxRemove
+
+    def markBoxPLimit(self,binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,BoxPercent,BoxMaxIndex,mm_value:int,BBoxRemove,nn_value:int):
+        
+        BBoxLimit = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)  
+        for i in range(2, binNumOfPower - TopP):  
+            if PowerLimit[i] == 1:
+                BBoxLimit[i, int(BoxMaxIndex[i] + CurveWidthR + 1):binNumOfWindSpeed] = 1
+        IsolateValve = 3
+        for m in range(binNumOfPower - TopP):    
+            for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):    
+                if BoxPercent[m, n] < IsolateValve:   
+                    BBoxRemove[m, n] = 1
+
+        for m in range(binNumOfPower - TopP, binNumOfPower):   
+            for n in range(binNumOfWindSpeed):  
+                BBoxRemove[m, n] = 3
+        
+        # 标记功率主带拐点左侧的欠发网格  
+        for m in range(mm_value - 1, binNumOfPower - TopP): 
+            for n in range(int(nn_value) - 2):
+                BBoxRemove[m, n] = 2
+        
+        return BBoxLimit
+        
+    def markData(self,binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1):
+        
+        DzMarch809Sel = np.zeros(nCounter1, dtype=int)
+        nWhichP = 0  
+        nWhichV = 0  
+        for i in range(nCounter1):   
+            for m in range( binNumOfPower ):   
+                if ((DzMarch809[i,1])> m * intervalPower) and ((DzMarch809[i,1]) <= (m+1) * intervalPower):  
+                    nWhichP = m  #m记录的是index
+                    break  
+            for n in range( binNumOfWindSpeed ):    
+                if DzMarch809[i,0] > ((n+1) * intervalWindspeed - intervalWindspeed/2) and DzMarch809[i,0] <= ((n+1) * intervalWindspeed + intervalWindspeed / 2):  
+                    nWhichV = n 
+                    break  
+            if nWhichP >= 0 and nWhichV >= 0:  
+                if BBoxRemove[nWhichP, nWhichV] == 1:   
+                    DzMarch809Sel[i] = 1  
+                elif BBoxRemove[nWhichP, nWhichV] == 2:  
+                    DzMarch809Sel[i] = 2  
+                elif BBoxRemove[nWhichP , nWhichV] == 3:  
+                    DzMarch809Sel[i] = 0  
+
+        return DzMarch809Sel
+        
+
+    def windowFilter(self,nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line):
+        
+
+        PVLimit = np.zeros((nCounter1, 3)) 
+        nLimitTotal = 0  
+        nWindowLength = 6  
+        LimitWindow = np.zeros(nWindowLength)
+        UpLimit = 0   
+        LowLimit = 0  
+        PowerStd = 30  
+        nWindowNum = np.floor(nCounter1/nWindowLength)
+        PowerLimitUp = ratedPower - 100  
+        PowerLimitLow = 100  
+
+        # 循环遍历每个窗口  
+        for i in range(int(nWindowNum)):  
+            start_idx = i * nWindowLength  
+            end_idx = start_idx + nWindowLength  
+            LimitWindow = DzMarch809[start_idx:end_idx, 1]  
+            
+            bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)  
+            if not bAllInAreas:  
+                continue  
+            
+            UpLimit = LimitWindow[0] + PowerStd  
+            LowLimit = LimitWindow[0] - PowerStd  
+            
+            bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)  
+            if bAllInUpLow: 
+                DzMarch809Sel[start_idx:end_idx] = 4  
+    
+                for j in range(nWindowLength):  
+                    PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]  
+                    PVLimit[nLimitTotal, 2] = Point_line[start_idx + j]  # 对数据进行标识  
+                    nLimitTotal += 1  
+        return PVLimit,nLimitTotal
+
+    def store_points(self,DzMarch809, DzMarch809Sel,Point_line, nCounter1):  
+          
+        PVDot = np.zeros((nCounter1, 3))
+        PVBad = np.zeros((nCounter1, 3))  
+
+        nCounterPV = 0  
+        nCounterBad = 0 
+        for i in range(nCounter1):
+            if DzMarch809Sel[i] == 0:   
+                nCounterPV += 1 
+                PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]
+                PVDot[nCounterPV-1, 2] = Point_line[i]  
+            elif DzMarch809Sel[i] in [1, 2, 3]:  
+                nCounterBad += 1  
+                PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]  
+                PVBad[nCounterBad-1, 2] = Point_line[i]
+                    
+        return PVDot, nCounterPV,PVBad,nCounterBad  
+
+    def markAllData(self,nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit):
+
+        for i in range(nCounterPV):
+            dataFramePartOfSCADA[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1   
+        #坏点  
+        for i in range(nCounterBad):  
+            dataFramePartOfSCADA[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5  # 坏点标识  
+
+        # 对所有数据中的限电点进行标注   
+        for i in range(nLimitTotal):  
+            dataFramePartOfSCADA[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4  # 限电点标识  
+
+        return dataFramePartOfSCADA
+    
+    # 4. 数据可视化
+    def plotData(self,turbineName:str,ws:list, ap:list):
+        fig = plt.figure()
+        plt.scatter(ws, ap, s=1, c='black', marker='.')
+        ax = plt.gca()
+        ax.xaxis.set_major_locator(MultipleLocator(5))
+        ax.yaxis.set_major_locator(MultipleLocator(500))
+        plt.title(turbineName)
+        plt.xlim((0, 30))
+        plt.ylim((0, 2200))
+        plt.tick_params(labelsize=8)
+        plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8)
+        plt.ylabel("P/kW", fontsize=8)
+        plt.show()
+    
+    
+    def main(self,dataFrame:pd.DataFrame,ratedPower,cusInWS,cusOutWS):
+        dataFramePartOfSCADA = self.preprocessData(dataFrame)
+        powerMax = dataFrame[Field_ActiverPower].max()
+        
+        # ratedPower=dataFrame[Field_RatedPower].iloc[0]
+        # cusInWS=dataFrame[Field_CutInWS].iloc[0]
+        # cusOutWS=dataFrame[Field_CutOutWS].iloc[0]
+        
+        if pd.isna(ratedPower):
+            raise CustomError(109)
+        if pd.isna(cusInWS):
+            raise CustomError(107)
+        if pd.isna(cusOutWS):
+            raise CustomError(107)
+    
+        binNumOfPower, binNumOfWindSpeed = self.calculateIntervals(powerMax,ratedPower, cusOutWS)
+        TopP = self.calculateTopP(powerMax,ratedPower)
+        # 根据功率阈值对数据进行标签分配
+        DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM = self.chooseData(dataFramePartOfSCADA,dataFrame)
+        XBoxNumber = self.gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809)
+        PBoxPercent,PBinSum = self.percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'power')
+        VBoxPercent,VBinSum = self.percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'speed')
+
+        PBoxMaxIndex, PBoxMaxP = self.maxBoxPercentage(PBoxPercent, binNumOfPower, binNumOfWindSpeed, 'power')
+        VBoxMaxIndex, VBoxMaxV = self.maxBoxPercentage(VBoxPercent, binNumOfPower, binNumOfWindSpeed, 'speed')
+        if PBoxMaxIndex[0] > 14: PBoxMaxIndex[0] = 9
+        DotDenseLeftRight = self.extendBoxPercent(90, PBoxMaxP,TopP,PBoxMaxIndex,PBoxPercent,binNumOfPower,binNumOfWindSpeed)
+        WidthAverage, WidthAverage_L,PowerLimit = self.calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum)
+        PBoxMaxIndex = self.amendMaxBox(binNumOfPower,TopP,PowerLimit,PBoxMaxIndex)
+        # 计算功率主带的左右边界  
+        CurveWidthR = np.ceil(WidthAverage) + 2  
+        CurveWidthL = np.ceil(WidthAverage_L) + 2 
+        #确定功率主带的左上拐点,即额定风速位置的网格索引
+        CurveTop = np.zeros((2, 1), dtype=int)  
+        BTopFind = 0  
+        mm_value = None
+        nn_value = None
+        mEnd=binNumOfPower - TopP
+        for m in range(binNumOfPower - TopP, 0, -1):
+            if m>=mEnd:
+                continue
+
+            for n in range(int(np.floor(int(cusInWS) / intervalWindspeed)), binNumOfWindSpeed - 1):   
+                if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):   
+                    CurveTop[0] = m  
+                    CurveTop[1] = n  #[第80个,第40个]
+                    BTopFind = 1
+                    mm_value = m
+                    nn_value = n
+                    break 
+            if BTopFind == 1:  
+                break 
+        #标记网格
+        BBoxRemove = self.markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,PBoxMaxIndex)
+        if mm_value is not None and nn_value is not None:
+            BBoxLimit = self.markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,PBoxPercent,PBoxMaxIndex,mm_value,BBoxRemove,nn_value)
+        DzMarch809Sel = self.markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1)
+        PVLimit,nLimitTotal = self.windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line)
+        #将功率滑动窗口主带平滑化
+        nSmooth = 0   
+        for i in range(binNumOfPower - TopP - 1):  
+            PVLeftDown = np.zeros(2)  
+            PVRightUp = np.zeros(2)   
+            if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:  
+                # 计算左下和右上顶点的坐标  
+                PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125  
+                PVLeftDown[1] = (i) * 25  
+                PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125  
+                PVRightUp[1] = (i+1) * 25  
+                    
+                for m in range(nCounter1):  
+                    # 检查当前点是否在锯齿区域内  
+                    if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
+                        # 检查斜率是否大于对角连线  
+                        if ((DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0])) > ((PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0])):
+                            # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器  
+                            DzMarch809Sel[m] = 0  
+                            nSmooth += 1  
+        # DzMarch809Sel 数组现在包含了锯齿平滑的选择结果,nSmooth 是选中的点数
+        PVDot, nCounterPV,PVBad,nCounterBad = self.store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1)
+        #标注   
+        dataFramePartOfSCADA = self.markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit)
+        A = dataFramePartOfSCADA[:,-1]
+        A=pd.DataFrame(A,columns=[Field_LableFlag])
+
+        dataFrame = pd.concat([dataFrame,A],axis=1) 
+        dataFrame[Field_LableFlag]=dataFrame[Field_LableFlag].fillna(0)
+        """
+        标识	说明
+        5	坏点
+        4	限功率点
+        1	好点
+        0	null
+        -1	P<=10
+        """
+        print("lab unique :",dataFrame[Field_LableFlag].unique())
+        # data=dataFrame[dataFrame[Field_LableFlag]==1]        
+        # self.plotData(data[Field_NameOfTurbine].iloc[0],data[Field_WindSpeed],data[Field_ActiverPower])
+
+        return dataFrame
+        
+
+    if __name__ == '__main__':
+        main()
+
+
+

+ 540 - 0
dataAnalysisBehavior/behavior/dataMarkerOfScada3.py

@@ -0,0 +1,540 @@
+import numpy as np
+import pandas as pd
+import os
+import plotly.graph_objects as go
+
+# 声明全局变量
+fieldActivePower = "功率"
+fieldWindSpeed = "风速"
+fieldPitchAngle = "叶片角度"
+PRated = 1500  # 风机额定功率
+VCutOut = 25  # 切出风速
+VCutIn = 3  # 切入风速
+VRated = 10  # 额定风速
+VNum = int(VCutOut / 0.25)  # 风速分区数量
+
+# 读取数据函数
+
+
+def read_data():
+    # 读取 CSV 文件
+    March809 = pd.read_csv(
+        "./data/DataClassificationIdentification/A01-G.csv", encoding="utf-8")
+    IdealCurve = pd.read_csv(
+        "./data/DataClassificationIdentification/A型风机设计功率曲线.csv", encoding="utf-8")
+    return March809, IdealCurve
+
+# 计算统计信息函数
+
+
+def calculate_statistics(March809, IdealCurve):
+    AA = len(March809)
+    BB = len(IdealCurve)
+    PowerMax = March809[fieldActivePower].max()
+    PowerRated = int(np.ceil(PowerMax / 100) * 100)
+    PNum = PowerRated // 25  # 功率分区数量
+
+    # 计算实际发电量
+    EPActualTotal = March809[March809[fieldActivePower]
+                             >= 0][fieldActivePower].sum() / 6
+    WindSpeedAvr = March809[fieldWindSpeed].mean()
+
+    # 计算风机可利用率
+    nShouldGP = np.sum(March809[fieldWindSpeed] >= VCutIn)
+    nRealGP = np.sum((March809[fieldWindSpeed] >= VCutIn)
+                     & (March809[fieldActivePower] > 0))
+    TurbineRunRate = (nRealGP / nShouldGP * 100) if nShouldGP > 0 else 0
+
+    # 计算理论发电量
+    EPIdealTotalAAA = 0
+    for i in range(AA):
+        nWhichBin = 0
+        for m in range(BB - 1):
+            if IdealCurve.iloc[m][fieldWindSpeed] < March809.iloc[i][fieldWindSpeed] <= IdealCurve.iloc[m + 1][fieldWindSpeed]:
+                nWhichBin = m
+                break
+        if nWhichBin > 0:
+            IdealPower = (March809.iloc[i][fieldWindSpeed] - IdealCurve.iloc[nWhichBin][fieldWindSpeed]) / (IdealCurve.iloc[nWhichBin + 1][fieldWindSpeed] - IdealCurve.iloc[nWhichBin]
+                                                                                                            [fieldWindSpeed]) * (IdealCurve.iloc[nWhichBin + 1][fieldActivePower] - IdealCurve.iloc[nWhichBin][fieldActivePower]) + IdealCurve.iloc[nWhichBin][fieldActivePower]
+            EPIdealTotalAAA += IdealPower / 6
+
+    return AA, BB, PNum, EPActualTotal, WindSpeedAvr, TurbineRunRate, EPIdealTotalAAA
+
+# 分类数据函数
+
+
+def classify_data(March809, PNum, VNum):
+    DzMarch809 = March809[March809[fieldActivePower] > 0]
+    nCounter1 = len(DzMarch809)
+
+    XBoxNumber = np.ones((PNum, VNum), dtype=int)
+    for i in range(nCounter1):
+        nWhichP = np.digitize(
+            DzMarch809.iloc[i][fieldActivePower], np.arange(0, PNum * 25, 25)) - 1
+        nWhichV = np.digitize(
+            DzMarch809.iloc[i][fieldWindSpeed], np.arange(0.125, VNum * 0.25, 0.25))
+        if nWhichP < PNum and nWhichV < VNum:
+            XBoxNumber[nWhichP, nWhichV] += 1
+    XBoxNumber -= 1
+
+    return DzMarch809, XBoxNumber
+
+# 计算百分比函数
+
+
+def compute_percentages(XBoxNumber, PNum, VNum):
+    PBoxPercent = np.zeros((PNum, VNum))
+    PBinSum = XBoxNumber.sum(axis=1)
+    for i in range(PNum):
+        if PBinSum[i] > 0:
+            PBoxPercent[i, :] = XBoxNumber[i, :] / PBinSum[i] * 100
+
+    VBoxPercent = np.zeros((PNum, VNum))
+    VBinSum = XBoxNumber.sum(axis=0)
+    for i in range(VNum):
+        if VBinSum[i] > 0:
+            VBoxPercent[:, i] = XBoxNumber[:, i] / VBinSum[i] * 100
+
+    return PBoxPercent, VBoxPercent
+
+# 查找主带函数
+
+
+def find_main_band(PBoxPercent, PNum, VNum, XBoxNumber):
+    PBoxMaxIndex = np.argmax(PBoxPercent, axis=1)
+    PBoxMaxP = np.max(PBoxPercent, axis=1)
+
+    DotDense = np.zeros(PNum)
+    DotDenseLeftRight = np.zeros((PNum, 2), dtype=int)
+    DotValve = 90
+    for i in range(PNum - 6):
+        PDotDenseSum = PBoxMaxP[i]
+        iSpreadRight = iSpreadLeft = 1
+        while PDotDenseSum < DotValve:
+            if (PBoxMaxIndex[i] + iSpreadRight) < VNum - 1:
+                PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]
+                iSpreadRight += 1
+            if (PBoxMaxIndex[i] + iSpreadRight) > VNum - 1:
+                break
+            if (PBoxMaxIndex[i] - iSpreadLeft) > 0:
+                PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]
+                iSpreadLeft += 1
+            if (PBoxMaxIndex[i] - iSpreadLeft) <= 0:
+                break
+        iSpreadRight -= 1
+        iSpreadLeft -= 1
+        DotDenseLeftRight[i, :] = [iSpreadLeft, iSpreadRight]
+        DotDense[i] = iSpreadLeft + iSpreadRight + 1
+
+    DotDenseWidthLeft = DotDenseLeftRight[:, 1]
+    MainBandRight = np.median(DotDenseWidthLeft)
+    PowerLimitValve = int(np.ceil(MainBandRight)) + 3
+
+    PowerLimit = np.zeros(PNum)
+    nCounterLimit = nCounter = 0
+    WidthAverage = 0
+    for i in range(PNum - 6):
+        if DotDenseLeftRight[i, 1] > PowerLimitValve and XBoxNumber[i, :].sum() > 20:
+            PowerLimit[i] = 1
+            nCounterLimit += 1
+        else:
+            WidthAverage += DotDenseLeftRight[i, 1]
+            nCounter += 1
+    WidthAverage /= nCounter
+
+    WidthVar = np.sqrt(np.mean((DotDenseLeftRight[:, 1] - WidthAverage) ** 2))
+    PowerBandWidth = WidthAverage * 2 * 0.25
+
+    return DotDense, DotDenseLeftRight, PowerLimit, WidthAverage, WidthVar, PowerBandWidth, PBoxMaxIndex
+
+# 标记坏点函数
+
+
+def mark_bad_points(DzMarch809, DotDenseLeftRight, PBoxMaxIndex, PowerLimit, PNum, VNum, XBoxNumber, PBoxPercent):
+    CurveWidthR = int(np.ceil(DotDenseLeftRight[:, 1].mean())) + 2
+    CurveWidthL = CurveWidthR
+    BBoxLimit = np.zeros((PNum, VNum))
+    for i in range(3, PNum - 6):
+        if PowerLimit[i] == 1:
+            BBoxLimit[i, PBoxMaxIndex[i] + CurveWidthR + 1: VNum] = 1
+
+    BBoxRemove = np.zeros((PNum, VNum))
+    for m in range(PNum - 6):
+        BBoxRemove[m, PBoxMaxIndex[m] + CurveWidthR: VNum] = 1
+        BBoxRemove[m, :PBoxMaxIndex[m] - CurveWidthL + 1] = 2
+
+    CurveTop = [0, 0]
+    CurveTopValve = 3
+    BTopFind = False
+    for m in range(PNum - 4, 0, -1):
+        for n in range(VNum):
+            if PBoxPercent[m, n] > CurveTopValve and XBoxNumber[m, n] >= 10:
+                CurveTop = [m, n]
+                BTopFind = True
+                break
+        if BTopFind:
+            break
+
+    IsolateValve = 3
+    for m in range(PNum - 6):
+        for n in range(PBoxMaxIndex[m] + CurveWidthR, VNum):
+            if PBoxPercent[m, n] < IsolateValve:
+                BBoxRemove[m, n] = 1
+
+    for m in range(PNum - 6, PNum):
+        BBoxRemove[m, :] = 3
+    for m in range(PNum - 5, PNum):
+        BBoxRemove[m, :CurveTop[1] - 2] = 2
+
+    DzMarch809Sel = np.zeros(len(DzMarch809), dtype=int)
+    for i in range(len(DzMarch809)):
+        nWhichP = np.digitize(
+            DzMarch809.iloc[i][fieldActivePower], np.arange(0, PNum * 25, 25)) - 1
+        nWhichV = np.digitize(
+            DzMarch809.iloc[i][fieldWindSpeed], np.arange(0.125, VNum * 0.25, 0.25))
+        
+        # if (
+        #     (DzMarch809.iloc[i][fieldActivePower] < PRated * 0.75) and
+        #     (DzMarch809.iloc[i][fieldPitchAngle] > 0.5) or
+        #     (DzMarch809.iloc[i][fieldActivePower] < PRated * 0.85) and
+        #     (DzMarch809.iloc[i][fieldPitchAngle] > 1.5) or
+        #     (DzMarch809.iloc[i][fieldActivePower] < PRated * 0.9) and
+        #     (DzMarch809.iloc[i][fieldPitchAngle] > 2.5)
+        # ):
+        #     continue
+
+        if nWhichP < PNum and nWhichV < VNum:
+            if BBoxRemove[nWhichP, nWhichV] == 1:
+                DzMarch809Sel[i] = 1
+            elif BBoxRemove[nWhichP, nWhichV] == 2:
+                DzMarch809Sel[i] = 2
+            elif BBoxRemove[nWhichP, nWhichV] == 3:
+                DzMarch809Sel[i] = 0
+
+    return DzMarch809Sel
+
+
+def identify_limit_load_data(DzMarch809: pd.DataFrame, DzMarch809Sel, PRated):
+    nCounter1 = len(DzMarch809)
+    PVLimit = np.zeros((nCounter1, 2))
+    nLimitTotal = 0
+    nWindowLength = 3
+    PowerStd = 15  # 功率波动方差
+    PowerLimitUp =  PRated - 300
+    PowerLimitLow = 5  # 200kW 
+
+    nWindowNum = nCounter1 // nWindowLength
+
+    for i in range(nWindowNum):
+        LimitWindow = DzMarch809.iloc[i * nWindowLength:(i + 1) * nWindowLength][fieldActivePower].values
+               
+        # 检查所有数据是否在 PowerLimitLow值~PowerLimitUp值范围内
+        if not ((LimitWindow >= PowerLimitLow) & (LimitWindow <= PowerLimitUp)).all():
+            continue
+        
+        """
+        限功率识别策略(参考):
+        1. 功率<额定功率(PRated)*0.75 and  叶片角度>0.5 ; 2. 功率<额定功率*0.85 and 叶片角度>1.5 ; 3. 功率<额定功率*0.9 and 叶片角度>2.5;
+        示例:
+        1. 功率<1100,and  叶片角度>0.5 ; 2. 功率<1250 and 叶片角度>1.5 ; 3. 功率<1400 and 叶片角度>2.5 ;
+        """        
+        # 额外的限功率识别策略
+        pitch_angle_window = DzMarch809.iloc[i * nWindowLength:(i + 1) * nWindowLength][fieldPitchAngle].values
+
+        if (
+            (DzMarch809.iloc[i * nWindowLength:(i + 1) * nWindowLength][fieldActivePower] < PRated * 0.75).any() and
+            (pitch_angle_window > 0.5).any() or
+            (DzMarch809.iloc[i * nWindowLength:(i + 1) * nWindowLength][fieldActivePower] < PRated * 0.85).any() and
+            (pitch_angle_window > 1.5).any() or
+            (DzMarch809.iloc[i * nWindowLength:(i + 1) * nWindowLength][fieldActivePower] < PRated * 0.9).any() and
+            (pitch_angle_window > 2.5).any()
+        ):
+            # 标识限负荷数据
+            DzMarch809Sel[i * nWindowLength:(i + 1) * nWindowLength] = 4
+
+        UpLimit = LimitWindow[0] + PowerStd
+        LowLimit = LimitWindow[0] - PowerStd
+
+        # 检查所有数据是否在方差范围内
+        if not ((LimitWindow[1:] >= LowLimit) & (LimitWindow[1:] <= UpLimit)).all():
+            continue
+
+        # 标识限负荷数据
+        DzMarch809Sel[i * nWindowLength:(i + 1) * nWindowLength] = 4
+
+        for j in range(nWindowLength):
+            # 只提取功率和风速数据
+            PVLimit[nLimitTotal] = DzMarch809.iloc[i * nWindowLength + j][[fieldWindSpeed, fieldActivePower]]
+            nLimitTotal += 1
+
+    PVLimit = PVLimit[:nLimitTotal]  # 截取实际数据部分
+
+    return PVLimit, DzMarch809Sel
+
+# 计算能量损失函数
+def calculate_energy_loss(DzMarch809Sel, DzMarch809, IdealCurve, PNum, BB, PRated, EPIdealTotalAAA, EPActualTotal):
+    EPLostStopTotal = 0
+    nStopTotal = 0
+    for i in range(len(DzMarch809)):
+        if DzMarch809.iloc[i][fieldActivePower] <= 0:
+            nWhichBin = 0
+            for m in range(BB - 1):
+                if IdealCurve.iloc[m][fieldWindSpeed] < DzMarch809.iloc[i][fieldWindSpeed] <= IdealCurve.iloc[m + 1][fieldWindSpeed]:
+                    nWhichBin = m
+                    break
+            if nWhichBin > 0:
+                IdealPower = (DzMarch809.iloc[i][fieldWindSpeed] - IdealCurve.iloc[nWhichBin][fieldWindSpeed]) / (IdealCurve.iloc[nWhichBin + 1][fieldWindSpeed] - IdealCurve.iloc[nWhichBin]
+                                                                                                                  [fieldWindSpeed]) * (IdealCurve.iloc[nWhichBin + 1][fieldActivePower] - IdealCurve.iloc[nWhichBin][fieldActivePower]) + IdealCurve.iloc[nWhichBin][fieldActivePower]
+                EPLostStopTotal += IdealPower / 6
+                nStopTotal += 1
+
+    EPLostBadTotal = 0
+    EPLost = 0
+    nBadTotal = 0
+    for i in range(len(DzMarch809)):
+        if DzMarch809Sel[i] == 1:
+            nWhichBin = 0
+            for m in range(BB - 1):
+                if IdealCurve.iloc[m][fieldWindSpeed] < DzMarch809.iloc[i][fieldWindSpeed] <= IdealCurve.iloc[m + 1][fieldWindSpeed]:
+                    nWhichBin = m
+                    break
+            if nWhichBin > 0:
+                IdealPower = (DzMarch809.iloc[i][fieldWindSpeed] - IdealCurve.iloc[nWhichBin][fieldWindSpeed]) / (IdealCurve.iloc[nWhichBin + 1][fieldWindSpeed] - IdealCurve.iloc[nWhichBin]
+                                                                                                                  [fieldWindSpeed]) * (IdealCurve.iloc[nWhichBin + 1][fieldActivePower] - IdealCurve.iloc[nWhichBin][fieldActivePower]) + IdealCurve.iloc[nWhichBin][fieldActivePower]
+                EPLost += abs(IdealPower -
+                              DzMarch809.iloc[i][fieldActivePower]) / 6
+                EPLostBadTotal += EPLost
+                nBadTotal += 1
+
+    EPOverTotal = 0
+    nOverTotal = 0
+    for i in range(len(DzMarch809)):
+        if DzMarch809Sel[i] == 3:
+            EPOver = (DzMarch809.iloc[i][fieldActivePower] - PRated) / 6
+            EPOverTotal += EPOver
+            nOverTotal += 1
+
+    EPLostPerformTotal = 0
+    for i in range(len(DzMarch809)):
+        nWhichBinI = 0
+        for m in range(BB - 1):
+            if IdealCurve.iloc[m][fieldWindSpeed] < DzMarch809.iloc[i][fieldWindSpeed] <= IdealCurve.iloc[m + 1][fieldWindSpeed]:
+                nWhichBinI = m
+                break
+        if nWhichBinI > 0:
+            IdealPower = (DzMarch809.iloc[i][fieldWindSpeed] - IdealCurve.iloc[nWhichBinI][fieldWindSpeed]) / (IdealCurve.iloc[nWhichBinI + 1][fieldWindSpeed] - IdealCurve.iloc[nWhichBinI]
+                                                                                                               [fieldWindSpeed]) * (IdealCurve.iloc[nWhichBinI + 1][fieldActivePower] - IdealCurve.iloc[nWhichBinI][fieldActivePower]) + IdealCurve.iloc[nWhichBinI][fieldActivePower]
+            EPLostPerformTotal += (IdealPower -
+                                   DzMarch809.iloc[i][fieldActivePower]) / 6
+
+    EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostBadTotal + \
+        EPLostPerformTotal if EPLostPerformTotal >= 0 else EPActualTotal + \
+        EPLostStopTotal + EPLostBadTotal
+
+    RemoveOverEP = 0
+    for i in range(len(DzMarch809)):
+        if DzMarch809Sel[i] == 2:
+            nWhichBin = 0
+            for m in range(BB - 1):
+                if IdealCurve.iloc[m][fieldWindSpeed] < DzMarch809.iloc[i][fieldWindSpeed] <= IdealCurve.iloc[m + 1][fieldWindSpeed]:
+                    nWhichBin = m
+                    break
+            if nWhichBin > 0:
+                IdealPower = (DzMarch809.iloc[i][fieldWindSpeed] - IdealCurve.iloc[nWhichBin][fieldWindSpeed]) / (IdealCurve.iloc[nWhichBin + 1][fieldWindSpeed] - IdealCurve.iloc[nWhichBin]
+                                                                                                                  [fieldWindSpeed]) * (IdealCurve.iloc[nWhichBin + 1][fieldActivePower] - IdealCurve.iloc[nWhichBin][fieldActivePower]) + IdealCurve.iloc[nWhichBin][fieldActivePower]
+                RemoveOverEP += (DzMarch809.iloc[i]
+                                 [fieldActivePower] - IdealPower) / 6
+
+    for i in range(len(DzMarch809)):
+        if DzMarch809.iloc[i][fieldActivePower] > PRated:
+            RemoveOverEP += (DzMarch809.iloc[i][fieldActivePower] - PRated) / 6
+
+    return EPLostStopTotal, EPLostBadTotal, EPOverTotal, EPLostPerformTotal, EPIdealTotal - RemoveOverEP
+
+# 计算实测功率曲线函数
+
+
+def calculate_measured_power_curve(PVDot, VRated, PRated):
+    XBinNumber = np.ones(50)
+    PCurve = np.zeros((50, 2))
+    PCurve[:, 0] = np.arange(0.5, 25.5, 0.5)
+    XBinSum = np.zeros((50, 2))
+
+    for i in range(len(PVDot)):
+        nWhichBin = 0
+        for b in range(50):
+            if (b * 0.5 - 0.25) < PVDot.iloc[i][fieldWindSpeed] <= (b * 0.5 + 0.25):
+                nWhichBin = b
+                break
+
+        if nWhichBin > 0:
+            XBinSum[nWhichBin, 0] += PVDot.iloc[i][fieldWindSpeed]
+            XBinSum[nWhichBin, 1] += PVDot.iloc[i][fieldActivePower]
+            XBinNumber[nWhichBin] += 1
+
+    XBinNumber -= 1
+
+    for b in range(50):
+        if XBinNumber[b] > 0:
+            PCurve[b, 0] = XBinSum[b, 0] / XBinNumber[b]
+            PCurve[b, 1] = XBinSum[b, 1] / XBinNumber[b]
+
+    VRatedNum = int(VRated / 0.5)
+    for m in range(VRatedNum, 50):
+        if PCurve[m, 1] == 0:
+            PCurve[m, 1] = PRated
+
+    return PCurve
+
+# 计算标准正则功率曲线函数
+def calculate_normalized_power_curve(IdealCurve, VRated, PRated):
+    PCurveNorm = np.zeros((50, 2))
+    VRatedNum = int(VRated / 0.5)
+
+    # 15m/s以上为额定功率
+    high_wind_speeds = np.arange(15, 25.5, 0.5)
+    PCurveNorm[VRatedNum:VRatedNum+len(high_wind_speeds), 0] = high_wind_speeds
+    PCurveNorm[VRatedNum:VRatedNum+len(high_wind_speeds), 1] = PRated
+
+    # 15m/s以下正则功率曲线
+    VSpeed = np.arange(0.5, 15.5, 0.5)
+    CurveData = IdealCurve[IdealCurve[fieldWindSpeed]
+                           <= 15].to_numpy()  # 提取风速<=15的数据
+
+    for i, v in enumerate(VSpeed):
+        if i < len(CurveData) - 1:
+            # 插值计算
+            x0, y0 = CurveData[i]
+            x1, y1 = CurveData[i + 1]
+            PCurveNorm[i, 0] = v
+            PCurveNorm[i, 1] = y0 + (v - x0) * (y1 - y0) / (x1 - x0)
+        else:
+            PCurveNorm[i, 0] = v
+            PCurveNorm[i, 1] = PRated  # 防止超出范围的情况
+
+    return PCurveNorm
+
+# 保存结果函数
+
+
+def save_results(PCurve, PCurveNorm, EPKW, EPPer, outputDir='./output/A01'):
+    if not os.path.exists(outputDir):
+        os.makedirs(outputDir)
+    pd.DataFrame(PCurve, columns=[fieldWindSpeed, fieldActivePower]).to_csv(
+        os.path.join(outputDir, 'PCurve.csv'), index=False)
+    pd.DataFrame(PCurveNorm, columns=[fieldWindSpeed, fieldActivePower]).to_csv(
+        os.path.join(outputDir, 'PCurveNorm.csv'), index=False)
+    pd.DataFrame([EPKW], columns=['EPIdealTotal', 'EPActualTotal', 'EPLostStopTotal', 'EPLostBadLimitTotal', 'EPLostPerformTotal', 'EPLostBadTotal',
+                 'EPLostLimitTotal', 'EPOverTotal', 'WindSpeedAvr', 'TurbineRunRate']).to_csv(os.path.join(outputDir, 'EPKW.csv'), index=False)
+    pd.DataFrame([EPPer], columns=['Percent1', 'Percent2', 'Percent3', 'Percent4', 'Percent5', 'Percent6', 'Percent7',
+                 'Percent8', 'WindSpeedAvr', 'TurbineRunRate']).to_csv(os.path.join(outputDir, 'EPPer.csv'), index=False)
+
+# 绘制结果函数,使用 Plotly
+
+
+def plot_results(PVBad, PVLimit, PVDot, PCurve, IdealCurve):
+    fig = go.Figure()
+
+    # 添加坏点数据
+    if not PVBad.empty:
+        fig.add_trace(go.Scatter(x=PVBad[fieldWindSpeed], y=PVBad[fieldActivePower],
+                      mode='markers', name='坏点', marker=dict(color='red')))
+
+    # 添加限电点数据
+    if not PVLimit.empty:
+        fig.add_trace(go.Scatter(x=PVLimit[fieldWindSpeed], y=PVLimit[fieldActivePower],
+                      mode='markers', name='限功率', marker=dict(color='blue')))
+
+    # 添加正常点数据
+    if not PVDot.empty:
+        fig.add_trace(go.Scatter(x=PVDot[fieldWindSpeed], y=PVDot[fieldActivePower],
+                      mode='markers', name='好点', marker=dict(color='black')))
+
+    # 添加实测功率曲线
+    if PCurve.shape[0] > 0:
+        fig.add_trace(go.Scatter(
+            x=PCurve[:, 0], y=PCurve[:, 1], mode='lines+markers', name='实测功率曲线', line=dict(color='green')))
+
+    # 添加设计功率曲线
+    if IdealCurve.shape[0] > 0:
+        fig.add_trace(go.Scatter(x=IdealCurve[:, 0], y=IdealCurve[:, 1],
+                      mode='lines+markers', name='合同功率曲线', line=dict(color='yellow')))
+
+    # 更新布局
+    fig.update_layout(
+        title={'text':'风力机功率散点数据分类标识','x':0.5},
+        xaxis_title=fieldWindSpeed,
+        yaxis_title=fieldActivePower,
+        legend=dict(x=0.01, y=0.99),
+        template='plotly_white'
+    )
+
+    # fig.show()
+    outputDir = './output/A01'
+    filePath = os.path.join(outputDir, f'power_scatter.html')
+    fig.write_html(filePath)
+
+
+# 主函数
+def main():
+    # 读取数据
+    March809, IdealCurve = read_data()
+    # 计算统计信息
+    AA, BB, PNum, EPActualTotal, WindSpeedAvr, TurbineRunRate, EPIdealTotalAAA = calculate_statistics(
+        March809, IdealCurve)
+    # 分类数据
+    DzMarch809, XBoxNumber = classify_data(March809, PNum, VNum)
+    # 计算百分比
+    PBoxPercent, VBoxPercent = compute_percentages(XBoxNumber, PNum, VNum)
+    # 查找主带
+    DotDense, DotDenseLeftRight, PowerLimit, WidthAverage, WidthVar, PowerBandWidth, PBoxMaxIndex = find_main_band(
+        PBoxPercent, PNum, VNum, XBoxNumber)
+    # 标记坏点
+    DzMarch809Sel = mark_bad_points(
+        DzMarch809, DotDenseLeftRight, PBoxMaxIndex, PowerLimit, PNum, VNum, XBoxNumber, PBoxPercent)
+    # 标识限负荷数据点
+    PVLimitArray, DzMarch809Sel = identify_limit_load_data(
+        DzMarch809, DzMarch809Sel, PRated)
+
+    # 初始化存储好点和坏点的 DataFrame
+    PVBad = pd.DataFrame(columns=DzMarch809.columns)
+    PVLimit = pd.DataFrame(columns=DzMarch809.columns)
+    PVDot = pd.DataFrame(columns=DzMarch809.columns)
+
+    # 存储好点和坏点
+    for i in range(len(DzMarch809)):
+        if DzMarch809Sel[i] in [1, 2, 3]:
+            if not DzMarch809.iloc[[i]].isna().all().all():
+                PVBad = pd.concat(
+                    [PVBad, DzMarch809.iloc[[i]]], ignore_index=True)
+        elif DzMarch809Sel[i] == 4:
+            if not DzMarch809.iloc[[i]].isna().all().all():
+                PVLimit = pd.concat(
+                    [PVLimit, DzMarch809.iloc[[i]]], ignore_index=True)
+        else:
+            if not DzMarch809.iloc[[i]].isna().all().all():
+                PVDot = pd.concat(
+                    [PVDot, DzMarch809.iloc[[i]]], ignore_index=True)
+
+    # 计算能量损失
+    EPLostStopTotal, EPLostBadTotal, EPOverTotal, EPLostPerformTotal, EPIdealTotal = calculate_energy_loss(
+        DzMarch809Sel, DzMarch809, IdealCurve, PNum, BB, PRated, EPIdealTotalAAA, EPActualTotal)
+
+    EPKW = [EPIdealTotal, EPActualTotal, EPLostStopTotal, EPLostBadTotal,
+            EPLostPerformTotal, EPLostBadTotal, 0, EPOverTotal, WindSpeedAvr, TurbineRunRate]
+    EPPer = [100, EPActualTotal / EPIdealTotal * 100, EPLostStopTotal / EPIdealTotal * 100, EPLostBadTotal / EPIdealTotal * 100,
+             EPLostPerformTotal / EPIdealTotal * 100, EPLostBadTotal / EPIdealTotal * 100, 0, EPOverTotal / EPActualTotal * 100, WindSpeedAvr, TurbineRunRate]
+
+    # 计算实测功率曲线
+    PCurve = calculate_measured_power_curve(PVDot, VRated, PRated)
+    # 计算标准正则功率曲线
+    PCurveNorm = calculate_normalized_power_curve(IdealCurve, VRated, PRated)
+
+    # 保存结果
+    save_results(PCurve, PCurveNorm, EPKW, EPPer)
+    # 绘制结果
+    plot_results(PVBad, PVLimit, PVDot, PCurve, IdealCurve.to_numpy())
+
+
+if __name__ == "__main__":
+    main()

+ 177 - 0
dataAnalysisBehavior/behavior/outputProcessor.py

@@ -0,0 +1,177 @@
+import os
+from datetime import datetime, timezone, timedelta
+from logging import Logger
+import pandas as pd
+from sqlalchemy.orm import Session
+from sqlalchemy.sql import text
+import shutil
+from urllib.parse import quote, unquote
+from common.commonBusiness import CommonBusiness
+from common.appConfig import GetBusinessFoundationDbUtil
+from algorithmContract.confBusiness import *
+from algorithmContract.contract import Contract
+from algorithmContract.const import *
+from utils.minioUtil.threadSafeMinioClient import ThreadSafeMinioClient
+from utils.rdbmsUtil.databaseUtil import DatabaseUtil
+
+Const_FileURL = "url"
+
+
+class OutputProcessor:
+    def __init__(self, conf: Contract, logger: Logger, dbUtil: dict[str, DatabaseUtil], minioUtil: ThreadSafeMinioClient) -> None:
+        self.conf = conf
+        self.autoOrManual = 1 if self.conf.dataContract.autoOrManual == 'automatic' else 0
+        self.powerFarmID = self.conf.dataContract.dataFilter.powerFarmID
+        self.dataBatchNum = self.conf.dataContract.dataFilter.dataBatchNum
+
+        self.logger = logger
+        self.dbUtil = dbUtil
+        self.minioUtil = minioUtil
+
+        self.common = CommonBusiness()
+
+    def uploadOfMioIO(self, bucketName: str, uploadFiles):
+        """
+        上传文件到minio
+        """
+        bucketName = bucketName.lower()
+        if not self.minioUtil.bucket_exists(bucketName):
+            self.minioUtil.create_bucket(bucketName)
+            self.minioUtil.set_bucket_policy(bucketName)
+
+        # Upload files
+        upload_results = self.minioUtil.upload_files(bucketName, uploadFiles)
+
+    def removeLocalFiles(self, powerFarmID: str, dataBatchNum: str):
+        directory = f"output/{powerFarmID}/{dataBatchNum}"
+        shutil.rmtree(directory)
+
+    def analysisState(self, session: Session, batchNO: str, analysisState: int, errorState: int = ErrorState.NotErr.value, errorCode: str = None, errorInfo: str = None, analysisProgress: float = 0,analysis_finish_time: datetime=datetime.now(timezone.utc) + timedelta(hours=8)):
+        """
+        写处理状态 至主表 analysis_result
+        写总图(多机组一张图表)上传文件 至子表 analysis_general_file
+        写单机组图(单机组一张图表)上传文件 至子表 analysis_diagram_relation
+        """
+        sql = text(f"INSERT INTO analysis_result(batch_code, analysis_state, err_state, err_code, err_info,  analysis_progress, analysis_finish_time) \
+                  VALUES(:batch_code, :analysis_state, :err_state, :err_code, :err_info, :analysis_progress, :analysis_finish_time) \
+                  ON DUPLICATE KEY \
+                  UPDATE \
+                  analysis_state=VALUES(analysis_state), \
+                  err_state=VALUES(err_state), \
+                  err_code=VALUES(err_code), \
+                  err_info=VALUES(err_info), \
+                  analysis_progress=VALUES(analysis_progress), \
+                  analysis_finish_time=VALUES(analysis_finish_time);")
+
+        params = {
+            "batch_code": None if self.common.isNone(batchNO) else batchNO,
+            "analysis_state": None if self.common.isNone(analysisState) else analysisState,
+            "err_state": None if self.common.isNone(analysisState) else errorState,
+            "err_code": None if self.common.isNone(errorCode) else errorCode,
+            "err_info": None if self.common.isNone(errorInfo) else errorInfo,
+            "analysis_progress": analysisProgress,
+            "analysis_finish_time":analysis_finish_time if analysis_finish_time is not None else None
+        }
+
+        session.execute(sql, params)
+
+    def analysisResultForTurbine(self, session: Session, returnDataFrame: pd.DataFrame):
+        dataFrame = returnDataFrame[(returnDataFrame[Field_CodeOfTurbine] != 'total') & (
+            returnDataFrame[Field_Return_IsSaveDatabase])]
+
+        for index, row in dataFrame.iterrows():
+            sql = text(f"""
+                INSERT INTO analysis_diagram_relation 
+                (batch_code, field_engine_code, analysis_type_code, file_addr, auto_analysis) 
+                VALUES (:batch_code, :field_engine_code, :analysis_type_code, :file_addr, :auto_analysis)
+                ON DUPLICATE KEY UPDATE 
+                field_engine_code=VALUES(field_engine_code),
+                analysis_type_code=VALUES(analysis_type_code),
+                file_addr=VALUES(file_addr),
+                auto_analysis=VALUES(auto_analysis);
+            """)
+
+            params = {
+                "batch_code": row[Field_Return_BatchCode],
+                "field_engine_code": row[Field_CodeOfTurbine],
+                "analysis_type_code": row[Field_Return_TypeAnalyst],
+                "file_addr": row[Const_FileURL],
+                "auto_analysis": self.autoOrManual
+            }
+
+            session.execute(sql, params)
+
+    def analysisResultForTotal(self, session: Session, returnDataFrame: pd.DataFrame):
+        dataFrame = returnDataFrame[(returnDataFrame[Field_CodeOfTurbine] == 'total') & (
+            returnDataFrame[Field_Return_IsSaveDatabase])]
+
+        for index, row in dataFrame.iterrows():
+            sql = text(f"""
+                INSERT INTO analysis_general_file 
+                (batch_code, analysis_type_code, engine_type_code, file_addr, auto_analysis) 
+                VALUES (:batch_code, :analysis_type_code, :engine_type_code, :file_addr, :auto_analysis)
+                ON DUPLICATE KEY UPDATE 
+                analysis_type_code=VALUES(analysis_type_code),
+                engine_type_code=VALUES(engine_type_code),
+                file_addr=VALUES(file_addr),
+                auto_analysis=VALUES(auto_analysis);
+            """)
+
+            params = {
+                "batch_code": row[Field_Return_BatchCode],
+                "analysis_type_code": row[Field_Return_TypeAnalyst],
+                "engine_type_code": row[Field_MillTypeCode],
+                "file_addr": row[Const_FileURL],
+                "auto_analysis": self.autoOrManual
+            }
+
+            session.execute(sql, params)
+
+    def process(self, powerFarmID: str, dataBatchNum: str, returnDataFrame: pd.DataFrame, timestamp: datetime = datetime.now(timezone.utc)+timedelta(hours=8)):
+        try:
+            uploadFiles = []
+            if not returnDataFrame.empty:
+                returnDataFrame[Const_FileURL] = None
+
+            if Field_Return_IsSaveDatabase in returnDataFrame.columns:
+                returnDataFrame[Field_Return_IsSaveDatabase].fillna(
+                    True, inplace=True)
+            else:
+                returnDataFrame[Field_Return_IsSaveDatabase] = True
+
+            for index, row in returnDataFrame.iterrows():
+                directory, fileName = os.path.split(row[Field_Return_FilePath])
+                basePath = f"output/{powerFarmID}"
+                subPath = os.path.relpath(directory, basePath)
+                remoteFilePath = os.path.join(
+                    subPath, fileName).replace("\\", "/")
+                # arr=["http://",self.minioUtil.client_pool.get_ip_address(),"/",powerFarmID.lower(),"/",remoteFilePath]
+                arr = [powerFarmID.lower(), "/", remoteFilePath]
+                fileURL = "".join(arr)
+                returnDataFrame.at[index, Const_FileURL] = quote(fileURL)
+                uploadFiles.append(
+                    (remoteFilePath, row[Field_Return_FilePath]))
+
+            self.uploadOfMioIO(self.powerFarmID, uploadFiles)
+
+            foundationDB = GetBusinessFoundationDbUtil()
+
+            with foundationDB.session_scope() as session:
+                # 第一次调用,设置进度为50%
+                self.analysisState(session, self.dataBatchNum, AnalysisState.Analyzed.value,
+                                   ErrorState.NotErr.value, None, None, 50)
+                self.analysisResultForTotal(
+                    session, returnDataFrame)
+                self.analysisResultForTurbine(
+                    session, returnDataFrame)
+                  # 获取分析完成的时间
+            finish_time = datetime.now(timezone.utc) + timedelta(hours=8)
+
+            # 第二次调用:更新分析状态为已完成,进度为100%,并记录完成时间
+            self.analysisState(session, self.dataBatchNum, AnalysisState.Analyzed.value,
+                               ErrorState.NotErr.value, None, None, 100, analysis_finish_time=finish_time)
+
+            self.removeLocalFiles(powerFarmID, dataBatchNum)
+        except Exception as e:
+            self.logger.error(e)
+            raise e

+ 0 - 0
dataAnalysisBehavior/common/__init__.py


+ 106 - 0
dataAnalysisBehavior/common/appConfig.py

@@ -0,0 +1,106 @@
+import os
+from utils.jsonUtil import JsonUtil
+from dataclasses import dataclass
+from utils.logUtil import LogUtil
+from utils.rdbmsUtil.databaseUtil import DatabaseUtil
+from utils.minioUtil.minioClientPool import MinioClientPool
+from utils.minioUtil.threadSafeMinioClient import ThreadSafeMinioClient
+from algorithmContract.const import *
+
+
+@dataclass
+class DatabaseConfig:
+    type: str
+    url: str
+    timeout: int  # Timeout in seconds
+    poolSize: int
+    maxPoolSize: int
+    minPoolSize: int
+    maxIdleTime: int  # Max idle time in seconds
+
+
+@dataclass
+class MinioConfig:
+    endpoint: str
+    accessKey: str
+    secretKey: str
+    secure: bool
+    poolSize: int
+    timeout: int  # Timeout in seconds
+
+
+@dataclass
+class LoggingConfig:
+    logFilePath: str
+    maxFileSize: int
+    maxTotalSize: int
+    backupCount: int
+    logFormat: str
+    level: str
+
+
+@dataclass
+class AppConfig:
+    databaseConfig: dict
+    minioConfig: MinioConfig
+    loggingConfig: LoggingConfig
+
+    @staticmethod
+    def loadFromFile(filePath: str):
+        configJson = JsonUtil.read_json(filePath)
+        businessFoundationDb = DatabaseConfig(
+            **configJson['databaseConfig'][DATABASE_BusinessFoundationDb])
+        businessDb = DatabaseConfig(
+            **configJson['databaseConfig'][DATABASE_businessDb])
+        minio = MinioConfig(**configJson['minioConfig'])
+        logging = LoggingConfig(**configJson['loggingConfig'])
+        return AppConfig({
+            "businessFoundationDb": businessFoundationDb,
+            "businessDb": businessDb
+        }, minio, logging)
+
+
+# Example usage
+configFilePath = os.environ.get('APP_CONFIG_FILE', r"conf/appConfig.json")
+config = AppConfig.loadFromFile(configFilePath)
+# config = AppConfig.loadFromFile(r'conf/appConfig.json')
+
+
+def GetLogger():
+    return LogUtil(config.loggingConfig.logFilePath, config.loggingConfig.logFormat,
+                   config.loggingConfig.maxTotalSize, config.loggingConfig.backupCount).getLogger()
+
+
+def GetBusinessFoundationDbUtil():
+    return DatabaseUtil(url=config.databaseConfig[DATABASE_BusinessFoundationDb].url,
+                        pool_size=config.databaseConfig[DATABASE_BusinessFoundationDb].poolSize,
+                        max_overflow=config.databaseConfig[DATABASE_BusinessFoundationDb].maxPoolSize,
+                        connect_timeout=config.databaseConfig[DATABASE_BusinessFoundationDb].timeout)
+
+
+def GetBusinessDbUtil():
+    return DatabaseUtil(url=config.databaseConfig[DATABASE_businessDb].url,
+                        pool_size=config.databaseConfig[DATABASE_businessDb].poolSize,
+                        max_overflow=config.databaseConfig[DATABASE_businessDb].maxPoolSize,
+                        connect_timeout=config.databaseConfig[DATABASE_businessDb].timeout)
+
+
+def GetDbUtil():
+    dbInstance = {}
+    return {
+        DATABASE_BusinessFoundationDb: DatabaseUtil(url=config.databaseConfig[DATABASE_BusinessFoundationDb].url,
+                                                    pool_size=config.databaseConfig[DATABASE_BusinessFoundationDb].poolSize,
+                                                    max_overflow=config.databaseConfig[
+                                                        DATABASE_BusinessFoundationDb].maxPoolSize,
+                                                    connect_timeout=config.databaseConfig[DATABASE_BusinessFoundationDb].timeout),
+        DATABASE_businessDb: DatabaseUtil(url=config.databaseConfig[DATABASE_businessDb].url,
+                                          pool_size=config.databaseConfig[DATABASE_businessDb].poolSize,
+                                          max_overflow=config.databaseConfig[DATABASE_businessDb].maxPoolSize,
+                                          connect_timeout=config.databaseConfig[DATABASE_businessDb].timeout)
+    }
+
+
+def GetMinIOUtil():
+    pool = MinioClientPool(config.minioConfig.endpoint, config.minioConfig.accessKey,
+                           config.minioConfig.secretKey, config.minioConfig.poolSize)
+    return ThreadSafeMinioClient(pool)

+ 20 - 0
dataAnalysisBehavior/common/turbineInfo.py

@@ -0,0 +1,20 @@
+from algorithmContract.confBusiness import *
+import pandas as pd
+
+
+def loadTurbineInfo(turbineInfoFilePathCSV,charset=charset_unify):
+    """
+    通过机组信息csv文件,载入机组信息
+
+    参数:
+    turbineInfoFilePathCSV (str): 机组信息csv文件路径
+    charset (str): 读取文件编码字符集,默认为utf-8
+
+    返回:
+    pandas.DataFrame : 机组信息
+    """
+    dataFrameOfTurbine= pd.read_csv(turbineInfoFilePathCSV,encoding=charset)
+
+    return dataFrameOfTurbine
+    
+    

+ 4 - 0
dataAnalysisBusiness/__init__.py

@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+
+from . import *
+__all__=['algorithm','common']

+ 1 - 0
dataAnalysisBusiness/algorithm/__init__.py

@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-

+ 269 - 0
dataAnalysisBusiness/algorithm/cpWindSpeedAnalyst.py

@@ -0,0 +1,269 @@
+import os
+
+import pandas as pd
+import plotly.graph_objects as go
+from algorithmContract.confBusiness import *
+from algorithmContract.contract import Contract
+from behavior.analystWithGoodPoint import AnalystWithGoodPoint
+
+
+class CpWindSpeedAnalyst(AnalystWithGoodPoint):
+    """
+    风电机组风能利用系数分析
+    """
+
+    def typeAnalyst(self):
+        return "cp_windspeed"
+
+    def dataReprocess(self, dataFrameTurbines: pd.DataFrame) -> pd.DataFrame:
+        dataFrame = dataFrameTurbines.groupby([Field_CodeOfTurbine, Field_WindSpeedFloor]).agg(
+            cp=(Field_Cp, 'median'),
+            cp_max=(Field_Cp, 'max'),
+            cp_min=(Field_Cp, 'min'),
+        ).reset_index()
+
+        dataFrame.columns = [Field_CodeOfTurbine, Field_WindSpeedFloor,
+                             Field_Cp, Field_CpMax, Field_CpMin]
+        dataFrame = dataFrame.sort_values(
+            by=[Field_CodeOfTurbine, Field_WindSpeedFloor], ascending=[True, True])
+
+        return dataFrame
+
+    def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes):
+        dictionary = self.processTurbineData(turbineCodes, conf, [
+                                             Field_DeviceCode, Field_Time, Field_WindSpeed, Field_ActiverPower])
+        dataFrameOfTurbines = self.userDataFrame(
+            dictionary, conf.dataContract.configAnalysis, self)
+        # 检查所需列是否存在
+        required_columns = {Field_WindSpeedFloor, Field_Cp}
+        if not required_columns.issubset(dataFrameOfTurbines.columns):
+            raise ValueError(f"DataFrame缺少必要的列。需要的列有: {required_columns}")
+
+        turbrineInfos = self.common.getTurbineInfos(
+            conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
+
+        groupedOfTurbineModel = turbrineInfos.groupby(Field_MillTypeCode)
+
+        returnDatas = []
+        for turbineModelCode, group in groupedOfTurbineModel:
+            currTurbineCodes = group[Field_CodeOfTurbine].unique().tolist()
+            currTurbineModeInfo = self.common.getTurbineModelByCode(
+                turbineModelCode, self.turbineModelInfo)
+
+            currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
+                currTurbineCodes)]
+
+            dataFrame = self.dataReprocess(currDataFrameOfTurbines)
+
+            returnData = self.buildChart(
+                dataFrame, outputAnalysisDir, conf, currTurbineModeInfo)
+            returnDatas.append(returnData)
+
+        returnResult = pd.concat(returnDatas, ignore_index=True)
+
+        return returnResult
+
+    def buildChart(self, dataFrameOfTurbines: pd.DataFrame, outputAnalysisDir, conf: Contract, turbineModelInfo: pd.Series):
+        # Create the main Cp distribution plot using Plotly
+        fig = go.Figure()
+        # colors = px.colors.sequential.Turbo
+
+        # 创建一个列表来存储各个风电机组的数据
+        turbine_data_list = []
+
+        for turbineCode in dataFrameOfTurbines[Field_CodeOfTurbine].unique():
+            group = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine] == turbineCode]
+            currTurbineInfo = self.common.getTurbineInfo(
+                conf.dataContract.dataFilter.powerFarmID, turbineCode, self.turbineInfo)
+            fig.add_trace(go.Scatter(x=group[Field_WindSpeedFloor], y=group[Field_Cp],
+                                     mode='lines',
+                                     # line=dict(color=colors[idx % len(colors)]),
+                                     name=currTurbineInfo[Field_NameOfTurbine]))
+            # 提取数据
+            turbine_data_total = {
+                "engineName": currTurbineInfo[Field_NameOfTurbine],
+                "engineCode": turbineCode,
+                "xData": group[Field_WindSpeedFloor].tolist(),
+                "yData": group[Field_Cp].tolist(),
+                }
+            turbine_data_list.append(turbine_data_total)
+
+
+
+        fig.update_layout(title={'text': f'风能利用系数分布-{turbineModelInfo[Field_MachineTypeCode]}', 'x': 0.5},
+                          xaxis_title='风速', yaxis_title='风能利用系数',
+                          legend=dict(
+            orientation="h",  # Horizontal orientation
+            xanchor="center",  # Anchor the legend to the center
+            x=0.5,  # Position legend at the center of the x-axis
+            y=-0.2,  # Position legend below the x-axis
+            # itemsizing='constant',  # Keep the size of the legend entries constant
+            # itemwidth=50
+        ),
+            xaxis=dict(range=[0, 26], tickmode='linear',
+                       dtick=1, tickangle=-45),
+            yaxis=dict(
+            dtick=self.axisStepCp,
+            range=[self.axisLowerLimitCp,
+                   self.axisUpperLimitCp]
+        )
+        )
+
+        engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+        if isinstance(engineTypeCode, pd.Series):
+            engineTypeCode = engineTypeCode.iloc[0]
+
+        engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+        if isinstance(engineTypeName, pd.Series):
+            engineTypeName = engineTypeName.iloc[0]
+        # 构建最终的JSON对象
+        json_output = {
+            "analysisTypeCode": "风电机组风能利用系数分析",
+            "typecode": turbineModelInfo[Field_MillTypeCode],
+            "engineCode": engineTypeCode,
+            "engineTypeName": engineTypeName,
+            "title": f'风能利用系数分布-{turbineModelInfo[Field_MachineTypeCode]}',
+            "xaixs": "风速",
+            "yaixs": "风能利用系数",
+            "data": turbine_data_list
+        }
+        output_json_path = os.path.join(outputAnalysisDir, f"{turbineModelInfo[Field_MillTypeCode]}.json")
+        with open(output_json_path, 'w', encoding='utf-8') as f:
+            import json
+            json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+        # 保存HTML
+        # htmlFileName = f"{self.powerFarmInfo[Field_PowerFarmName].iloc[0]}-{turbineModelInfo[Field_MillTypeCode]}-Cp-Distribution.html"
+        # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+        # fig.write_html(htmlFilePath)
+
+        result_rows = []
+        # result_rows.append({
+        #    Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #    Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #    Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #    Field_CodeOfTurbine: Const_Output_Total,
+        #    Field_Return_FilePath: htmlFilePath,
+        #    Field_Return_IsSaveDatabase: True
+        # })
+
+        # 如果需要返回DataFrame,可以包含文件路径
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: 'total',
+            Field_MillTypeCode: turbineModelInfo[Field_MillTypeCode],
+            Field_Return_FilePath: output_json_path,
+            Field_Return_IsSaveDatabase: True
+        })
+
+        # Generate individual turbine plots
+        for turbineCode, group in dataFrameOfTurbines.groupby(Field_CodeOfTurbine):
+            currTurbineInfo = self.common.getTurbineInfo(
+                conf.dataContract.dataFilter.powerFarmID, turbineCode, self.turbineInfo)
+
+            # 创建一个列表来存储各个风电机组的数据
+            turbine_data_list_each = []
+
+            fig = go.Figure()
+            for turbineCode in dataFrameOfTurbines[Field_CodeOfTurbine].unique():
+                tempDataFrame = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine] == turbineCode]
+                tempTurbineInfo = self.common.getTurbineInfo(
+                    conf.dataContract.dataFilter.powerFarmID, turbineCode, self.turbineInfo)
+                fig.add_trace(go.Scatter(x=tempDataFrame[Field_WindSpeedFloor],
+                                         y=tempDataFrame[Field_Cp],
+                                         mode='lines',
+                                         line=dict(color='lightgray', width=1),
+                                         showlegend=False))
+
+                # 提取数据
+                turbine_data_other_each = {
+                    "engineName": tempTurbineInfo[Field_NameOfTurbine],
+                    "engineCode": turbineCode,
+                    "xData": tempDataFrame[Field_WindSpeedFloor].tolist(),
+                    "yData": tempDataFrame[Field_Cp].tolist(),
+                }
+                turbine_data_list_each.append(turbine_data_other_each)
+
+            fig.add_trace(go.Scatter(x=group[Field_WindSpeedFloor], y=group[Field_Cp], mode='lines', line=dict(
+                color='darkblue'), showlegend=False))
+            fig.update_layout(title=f'风机: {currTurbineInfo[Field_NameOfTurbine]}',
+                              xaxis_title='风速', yaxis_title='风能利用系数',
+                              xaxis=dict(
+                                  range=[0, 26], tickmode='linear', dtick=1, tickangle=-45),
+                              yaxis=dict(
+                                  dtick=self.axisStepCp,
+                                  range=[self.axisLowerLimitCp,
+                                         self.axisUpperLimitCp]
+                              )
+                              )
+
+            engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+            if isinstance(engineTypeCode, pd.Series):
+                engineTypeCode = engineTypeCode.iloc[0]
+
+            engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+            if isinstance(engineTypeName, pd.Series):
+                engineTypeName = engineTypeName.iloc[0]
+            # 构建最终的JSON对象
+            json_output = {
+                "analysisTypeCode": "风电机组风能利用系数分析",
+                "typecode": turbineModelInfo[Field_MillTypeCode],
+                "engineCode": engineTypeCode,
+                "engineTypeName": engineTypeName,
+                "title": f'风机: {currTurbineInfo[Field_NameOfTurbine]}',
+                "xaixs": "风速",
+                "yaixs": "风能利用系数",
+                "data": turbine_data_list_each
+
+            }
+
+            # 将JSON对象保存到文件
+            output_json_path_each = os.path.join(outputAnalysisDir,
+                                                 f"{currTurbineInfo[Field_NameOfTurbine]}.json")
+            with open(output_json_path_each, 'w', encoding='utf-8') as f:
+                import json
+                json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+            # 保存图像
+            # pngFileName = f"{currTurbineInfo[Field_NameOfTurbine]}.png"
+            # pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
+            # fig.write_image(pngFilePath, scale=3)
+
+            # 保存HTML
+            # htmlFileName = f"{currTurbineInfo[Field_NameOfTurbine]}.html"
+            # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+            # fig.write_html(htmlFilePath)
+
+            # result_rows.append({
+            #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+            #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            #     Field_CodeOfTurbine: currTurbineInfo[Field_CodeOfTurbine],
+            #     Field_Return_FilePath: pngFilePath,
+            #     Field_Return_IsSaveDatabase: False
+            # })
+
+            # 如果需要返回DataFrame,可以包含文件路径
+            result_rows.append({
+                Field_Return_TypeAnalyst: self.typeAnalyst(),
+                Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+                Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+                Field_CodeOfTurbine: turbineCode,
+                Field_Return_FilePath: output_json_path_each,
+                Field_Return_IsSaveDatabase: True
+            })
+
+            # result_rows.append({
+            #    Field_Return_TypeAnalyst: self.typeAnalyst(),
+            #    Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            #    Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            #    Field_CodeOfTurbine: currTurbineInfo[Field_CodeOfTurbine],
+            #    Field_Return_FilePath: htmlFilePath,
+            #    Field_Return_IsSaveDatabase: True
+            # })
+
+        result_df = pd.DataFrame(result_rows)
+
+        return result_df

+ 21 - 0
dataAnalysisBusiness/algorithm/dataIntegrityOfMinuteAnalyst.py

@@ -0,0 +1,21 @@
+import os
+import pandas as pd
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from geopy.distance import geodesic
+from utils.directoryUtil import DirectoryUtil as dir
+from algorithmContract.confBusiness import *
+from .dataIntegrityOfSecondAnalyst import DataIntegrityOfSecondAnalyst
+
+
+class DataIntegrityOfMinuteAnalyst(DataIntegrityOfSecondAnalyst):
+    """
+    风电机组秒级数据完整度分析
+    """
+
+    def typeAnalyst(self):
+        return "data_integrity_minute"

+ 271 - 0
dataAnalysisBusiness/algorithm/dataIntegrityOfSecondAnalyst.py

@@ -0,0 +1,271 @@
+import os
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from geopy.distance import geodesic
+from behavior.analystNotFilter import AnalystNotFilter
+from utils.directoryUtil import DirectoryUtil as dir
+from algorithmContract.confBusiness import *
+import calendar
+import random
+from datetime import datetime
+from algorithmContract.contract import Contract
+
+
+class DataIntegrityOfSecondAnalyst(AnalystNotFilter):
+    """
+    风电机组秒级数据完整度分析
+    """
+
+    def typeAnalyst(self):
+        return "data_integrity_second"
+
+    def turbinesAnalysis(self,  outputAnalysisDir, conf: Contract, turbineCodes):
+        select = [Field_DeviceCode, Field_Time, Field_ActiverPower, Field_WindSpeed, Field_NacPos, Field_WindDirection, Field_RotorSpeed, Field_GeneratorSpeed, Field_GeneratorTorque, Field_AngleIncluded, Field_EnvTemp, Field_NacTemp, Field_PitchAngel1, Field_PitchAngel2, Field_PitchAngel3]
+
+        dictionary = self.processTurbineData(turbineCodes, conf, select)
+        dataFrameOfTurbines = self.userDataFrame(
+            dictionary, conf.dataContract.configAnalysis, self)
+
+        turbrineInfos = self.common.getTurbineInfos(
+            conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
+
+        currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
+            turbineCodes)]
+        # 将 currTurbineInfos 转换为字典
+        currTurbineInfosDict = turbrineInfos.set_index(
+            Field_CodeOfTurbine)[Field_NameOfTurbine].to_dict()
+        # 使用 map 函数来填充 Field_NameOfTurbine 列
+        currDataFrameOfTurbines[Field_NameOfTurbine] = currDataFrameOfTurbines[Field_CodeOfTurbine].map(
+            currTurbineInfosDict).fillna("")
+
+        groupedDataFrame = self.dataIntegrityByMonth(
+            dataFrameOfTurbines, conf, Field_NameOfTurbine)
+        print("groupedDataFrame : \n {}".format(groupedDataFrame.head()))
+
+        return self.plotByAllMonth(groupedDataFrame, outputAnalysisDir, self.powerFarmInfo[Field_PowerFarmName].iloc[0], Field_NameOfTurbine, conf)
+
+    def fullMonthIndex(self, start_time, end_time, turbine_name, new_frame):
+        months = (end_time.year - start_time.year) * \
+            12 + end_time.month - start_time.month
+        month_range = ['%04d-%02d' % (int(start_time.year + mon//12), int(mon % 12+1))
+                       for mon in range(start_time.month-1, start_time.month+months)]
+        month_index = pd.DataFrame(month_range, columns=[Field_YearMonth])
+
+        plot_res = pd.DataFrame()
+        grouped = new_frame.groupby(turbine_name)
+        for name, group in grouped:
+            group = pd.merge(group, month_index,
+                             on=Field_YearMonth, how='outer')
+            group['数据完整度%'] = group['数据完整度%'].fillna(0)
+            group[turbine_name] = name
+            group['year'] = group[Field_YearMonth].apply(
+                lambda x: str(x).split('-')[0])
+            group['month'] = group[Field_YearMonth].apply(
+                lambda x: str(x).split('-')[1])
+            plot_res = pd.concat([plot_res, group], axis=0, sort=False)
+
+        return plot_res
+
+    def get_time_space(self,df, time_str):
+        """
+        :return: 查询时间间隔(单位:秒)
+        """
+        df1 = pd.DataFrame(df[time_str])
+        df1['chazhi'] = df1[time_str].shift(-1) - df1[time_str]
+        result = df1.sample(int(df1.shape[0] / 100))['chazhi'].value_counts().idxmax().seconds
+        del df1
+        return result
+
+    def dataIntegrityByMonth(self, dataFrameMerge: pd.DataFrame, conf: Contract, Field_NameOfTurbine):
+        grouped = dataFrameMerge.groupby([dataFrameMerge.loc[:, Field_Time].dt.year.rename('year'),
+                                          dataFrameMerge.loc[:, Field_Time].dt.month.rename(
+                                              'month'),
+                                          dataFrameMerge.loc[:, Field_NameOfTurbine]]).agg({'count'})[Field_Time].rename({'count': '长度'}, axis=1)
+
+        new_frame = grouped.reset_index('month')
+        # timeGranularity = self.dataTransfer[self.dataTransfer[Field_TransferType] == Const_TimeGranularity_Second][Field_TimeGranularity].iloc[0] if self.typeAnalyst(
+        # ) == "data_integrity_second" else self.dataTransfer[self.dataTransfer[Field_TransferType] == Const_TimeGranularity_Minute][Field_TimeGranularity].iloc[0]
+        timeGranularity=self.get_time_space(dataFrameMerge,Field_Time)
+        self.logger.info(f"{self.typeAnalyst()}   timeGranularity-->{timeGranularity}")
+
+        new_frame = new_frame.reset_index()
+
+        new_frame['数据完整度'] = (100 * new_frame['长度'] / (new_frame.apply(lambda row: calendar.monthrange(
+            row['year'], row['month'])[1] * 24 * 3600 / timeGranularity, axis=1))).round(decimals=0)
+
+        new_frame = new_frame.rename(columns={'数据完整度': '数据完整度%'})
+        new_frame['month'] = new_frame['month'].astype(
+            str).apply(lambda x: x.zfill(2))
+        new_frame[Field_YearMonth] = new_frame['year'].astype(
+            str) + '-' + new_frame['month'].astype(str)
+
+        beginTime = None
+        if not self.common.isNone(conf.dataContract.dataFilter.beginTime):
+            beginTime = conf.dataContract.dataFilter.beginTime
+        else:
+            beginTime = dataFrameMerge[Field_Time].min().strftime(
+                '%Y-%m-%d %H:%M:%S')
+        endTime = None
+        if not self.common.isNone(conf.dataContract.dataFilter.endTime):
+            endTime = conf.dataContract.dataFilter.endTime
+        else:
+            endTime = dataFrameMerge[Field_Time] .max().strftime(
+                '%Y-%m-%d %H:%M:%S')
+
+        beginTime = datetime.strptime(beginTime, '%Y-%m-%d %H:%M:%S')
+        endTime = datetime.strptime(endTime, '%Y-%m-%d %H:%M:%S')
+
+        new_frame = self.fullMonthIndex(
+            beginTime, endTime, Field_NameOfTurbine, new_frame)
+
+        return new_frame
+
+    def plotByAllMonth(self, groupedDataFrame, outputAnalysisDir, farmName, fieldTurbineName, conf: Contract):
+        title = '数据完整度检测(%)'
+        # 根据场景决定索引和列的方向
+        if len(set(groupedDataFrame[Field_YearMonth])) > len(set(groupedDataFrame[fieldTurbineName])):
+            result = groupedDataFrame.pivot(
+                values="数据完整度%", index=fieldTurbineName, columns=Field_YearMonth)
+            x_labels = result.columns.tolist()  # 月份
+            y_labels = result.index.tolist()    # 风机名
+            x_axis_title = "日期"
+            y_axis_title = "机组"
+            # 构建最终的JSON对象
+            json_output = {
+                "analysisTypeCode": "数据完整度检测(%)",
+                "engineCode": "",
+                "engineTypeName": "",
+                "xaixs": "日期",
+                "yaixs": "机组",
+                "data": [{
+                    "engineName": "",
+                    "engineCode": "",
+                    "title": f' 数据完整度%',
+                    "xData": x_labels,
+                    "yData": y_labels,
+                    "ZData": result.values.tolist(),
+                }]
+            }
+        else:
+            result = groupedDataFrame.pivot(
+                values="数据完整度%", index=Field_YearMonth, columns=fieldTurbineName)
+            x_labels = result.columns.tolist()  # 风机名
+            y_labels = result.index.tolist()    # 月份
+            x_axis_title = "机组"
+            y_axis_title = "日期"
+            # 构建最终的JSON对象
+            json_output = {
+                "analysisTypeCode": "数据完整度检测(%)",
+                "engineCode": "",
+                "engineTypeName": "",
+                "xaixs": "机组",
+                "yaixs": "日期",
+                "data": [{
+                    "engineName": "",
+                    "engineCode": "",
+                    "title": f' 数据完整度%',
+                    "xData": x_labels,
+                    "yData": y_labels,
+                    "ZData": result.values.tolist(),
+                }]
+            }
+
+        # # 创建热图
+        # fig = go.Figure(data=go.Heatmap(
+        #     z=result.values,
+        #     x=x_labels,
+        #     y=y_labels,
+        #     colorscale='Viridis',
+        #     # colorbar=dict(title='数据完整度%'),
+        #     showscale=False,  # 显示颜色条
+        #     text=result.values,
+        #     texttemplate="%{text}",  # Format the text display inside cells
+        #     # hoverinfo='text'
+        # ))
+        # 创建热图
+        fig = go.Figure(data=go.Heatmap(
+            z=result.values,
+            x=x_labels,
+            y=y_labels,
+            colorscale=[
+                [0.0, 'rgb(255, 102, 102)'],  # 柔和的红色
+                [0.5, 'rgb(255, 102, 102)'],
+                [0.5, 'rgb(255, 255, 153)'],  # 柔和的黄色
+                [0.85, 'rgb(255, 255, 153)'],
+                [0.85, 'rgb(153, 255, 153)'],  # 柔和的绿色
+                [1.0, 'rgb(153, 255, 153)']
+            ],
+            zmin=0,  # 设置颜色范围的最小值
+            zmax=100,  # 设置颜色范围的最大值
+            showscale=True,  # 显示颜色条
+            text=result.values,
+            texttemplate="%{text}",  # Format the text display inside cells
+        ))
+
+        # 更新图形布局
+        fig.update_layout(
+            title={'text': title, 'x': 0.5},
+            # xaxis_nticks=len(x_labels),
+            xaxis=dict(tickmode='array', tickvals=x_labels,
+                       ticktext=x_labels, tickangle=-45, title=x_axis_title),
+            yaxis=dict(tickmode='array', tickvals=y_labels,
+                       ticktext=y_labels, title=y_axis_title),
+            # xaxis=dict(tickmode='array', tickvals=list(range(len(x_labels))), ticktext=x_labels, tickangle=-45, title=x_axis_title),
+            # yaxis=dict(tickmode='array', tickvals=list(range(len(y_labels))), ticktext=y_labels, title=y_axis_title),
+            autosize=True,
+            # width=len(x_labels) * 80,  # Adjust width and height as needed
+            # height=len(y_labels) * 80,
+            margin=dict(l=50, r=50, b=100, t=100),  # 调整边距以确保标签完整显示
+            # Transparent background to show cell borders
+            plot_bgcolor='rgba(0,0,0,0)'
+        )
+
+        fig.update_traces(
+            xgap=1,
+            ygap=1
+        )
+
+        result_rows = []
+
+        # 将JSON对象保存到文件
+        output_json_path = os.path.join(outputAnalysisDir, f"Data_Integrity_Of_Second_Analyst.json")
+        with open(output_json_path, 'w', encoding='utf-8') as f:
+            import json
+            json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+        # 保存图像
+        # pngFileName = f'{farmName}数据完整度分析.png'
+        # pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
+        # fig.write_image(pngFilePath, scale=3)
+
+        # 保存HTML
+        # htmlFileName = f'{farmName}数据完整度分析.html'
+        # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+        # fig.write_html(htmlFilePath)
+
+        # result_rows.append({
+        #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #     Field_CodeOfTurbine: Const_Output_Total,
+        #     Field_Return_FilePath: pngFilePath,
+        #     Field_Return_IsSaveDatabase: False
+        # })
+
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: Const_Output_Total,
+            Field_MillTypeCode: 'total',
+            Field_Return_FilePath: output_json_path,
+            Field_Return_IsSaveDatabase: True
+        })
+
+        result_df = pd.DataFrame(result_rows)
+
+        return result_df

+ 43 - 0
dataAnalysisBusiness/algorithm/formula_cp.py

@@ -0,0 +1,43 @@
+def calculate_area(diameter):
+    """
+    根据给定的风力涡轮机叶轮直径计算扫过面积的函数。
+
+    参数:
+    - diameter: 风力涡轮机叶轮的直径,单位:米(m)
+
+    返回:
+    - A: 风力涡轮机叶片扫过的面积,单位:平方米(m^2)
+    """
+    radius = diameter / 2
+    A = 3.141592653589793 * radius ** 2
+    return A
+
+def calculate_cp(P, A, rho, v):
+    """
+    计算风能利用系数Cp的函数。
+
+    参数:
+    - P: 风力涡轮机的输出功率,单位:瓦特(W)
+    - A: 风力涡轮机叶片扫过的面积,单位:平方米(m^2)
+    - rho: 空气密度,单位:千克每立方米(kg/m^3)
+    - v: 风速,单位:米每秒(m/s)
+
+    返回:
+    - Cp: 风能利用系数
+    """
+    Cp = P / (0.5 * rho * A * v ** 3)
+    return Cp
+
+# 示例变量
+diameter_example = 82 # 假设叶轮直径为46.2米
+P_example = 76.32*1000
+rho_example = 1.059
+v_example = 3.01
+
+# 使用函数计算A
+A_example = calculate_area(diameter_example)
+
+# 使用计算得到的A值调用calculate_cp函数
+cp_value = calculate_cp(P_example, A_example, rho_example, v_example)
+
+print("Cp={}".format(cp_value))

+ 168 - 0
dataAnalysisBusiness/algorithm/pitchGeneratorSpeedAnalyst.py

@@ -0,0 +1,168 @@
+import os
+from datetime import datetime
+
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from algorithmContract.confBusiness import *
+from algorithmContract.contract import Contract
+from behavior.analystWithGoodBadPoint import AnalystWithGoodBadPoint
+
+
+class PitchGeneratorSpeedAnalyst(AnalystWithGoodBadPoint):
+    """
+    风电机组变桨-发电机转速分析
+    """
+
+    def typeAnalyst(self):
+        return "pitch_generator_speed"
+
+    # def recalculation(self, turbineModelInfo: pd.Series, dataFrame: pd.DataFrame):
+    #     return self.recalculationOfGeneratorSpeed(
+    #         dataFrame, Field_RotorSpeed, Field_GeneratorSpeed,  self.turbineModelInfo[Field_RSR].iloc[0])
+    def selectColumns(self):
+        return [Field_DeviceCode, Field_Time,Field_WindSpeed,Field_ActiverPower,Field_PitchAngel1, Field_GeneratorSpeed]
+
+    def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes):
+        dictionary = self.processTurbineData(turbineCodes, conf,self.selectColumns())
+        turbineInfos = self.common.getTurbineInfos(conf.dataContract.dataFilter.powerFarmID, turbineCodes,
+                                                   self.turbineInfo)
+        dataFrame = self.userDataFrame(dictionary,conf.dataContract.configAnalysis,self)
+        return self.plot_speed_pitch_angle(dataFrame, turbineInfos, outputAnalysisDir, conf, )
+
+    def plot_speed_pitch_angle(self, dataFrameMerge, turbineModelInfo: pd.Series, outputAnalysisDir, conf: Contract):
+        # 按设备名分组数据
+        dataFrameMerge = dataFrameMerge[(dataFrameMerge[Field_ActiverPower] > 0)].sort_values(by=Field_YearMonth)
+        grouped = dataFrameMerge.groupby([Field_NameOfTurbine, Field_CodeOfTurbine])
+
+        # 遍历每个设备并绘制散点图
+        result_rows = []
+        for name, group in grouped:
+            groupNew = group.copy()
+
+            # 处理直驱转速与非直驱的差异
+            # if not self.common.isNone(conf.value_gen_speed_multiple):
+            #     groupNew[fieldGeneratorSpeed] = group[fieldGeneratorSpeed] * \
+            #         conf.value_gen_speed_multiple
+
+            # 创建图形
+            fig = go.Figure()
+
+            # 添加散点图
+            fig.add_trace(go.Scatter(
+                x=groupNew[Field_GeneratorSpeed],
+                y=groupNew[Field_PitchAngel1],
+                mode='markers',
+                # marker=dict(color='blue', size=3.5)
+                marker=dict(
+                    color=group[Field_UnixYearMonth],
+                    colorscale='Rainbow',
+                    size=3,
+                    opacity=0.7,
+                    colorbar=dict(
+                        tickvals=np.linspace(
+                            group[Field_UnixYearMonth].min(), group[Field_UnixYearMonth].max(), 6),
+                        ticktext=[datetime.fromtimestamp(ts).strftime('%Y-%m') for ts in np.linspace(
+                            group[Field_UnixYearMonth].min(), group[Field_UnixYearMonth].max(), 6)],
+                        thickness=18,
+                        len=1,  # 设置颜色条的长度,使其占据整个图的高度
+                        outlinecolor='rgba(255,255,255,0)'
+                    ),
+                    showscale=True
+                ),
+                showlegend=False
+            ))
+
+            # 设置图形布局
+            fig.update_layout(
+                title=f'机组: {name[0]}',
+                xaxis=dict(
+                    title='发电机转速',
+                    range=[self.axisLowerLimitGeneratorSpeed, self.axisUpperLimitGeneratorSpeed],
+                    dtick=self.axisStepGeneratorSpeed,
+                    tickangle=-45 # 设置x轴刻度值旋转角度为45度,如果需要
+                ),
+                yaxis=dict(
+                    title='桨距角',
+                    range=[self.axisLowerLimitPitchAngle, self.axisUpperLimitPitchAngle],
+                    dtick=self.axisStepPitchAngle
+                ),
+                coloraxis=dict(
+                    colorbar=dict(
+                        title="时间",
+                        ticks="outside",
+                        len=1,  # 设置颜色条的长度,使其占据整个图的高度
+                        thickness=20,  # 调整颜色条的宽度
+                        orientation='v',  # 设置颜色条为垂直方向
+                        tickmode='array',  # 确保刻度按顺序排列
+                        tickvals=dataFrameMerge[Field_YearMonth].unique(
+                        ).tolist(),  # 确保刻度为唯一的年月
+                        ticktext=dataFrameMerge[Field_YearMonth].unique(
+                        ).tolist()  # 以%Y-%m格式显示标签
+                    )
+                )
+            )
+            # 确保从 Series 中提取的是具体的值
+            engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+            if isinstance(engineTypeCode, pd.Series):
+                engineTypeCode = engineTypeCode.iloc[0]
+
+            engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+            if isinstance(engineTypeName, pd.Series):
+                engineTypeName = engineTypeName.iloc[0]
+
+            # 使用 apply() 对每个元素调用 datetime.fromtimestamp
+            group[Field_UnixYearMonth] = group[Field_UnixYearMonth].apply(lambda x: datetime.fromtimestamp(x).strftime('%Y-%m'))
+
+            # 构建最终的JSON对象
+            json_output = {
+                "analysisTypeCode": "变桨和发电机转速协调性分析",
+                "engineCode": engineTypeCode,
+                "engineTypeName": engineTypeName,
+                "xaixs": "发电机转速(r/min)",
+                "yaixs": "桨距角(°)",
+                "data": [{
+                    "engineName": name[0],
+                    "engineCode": name[1],
+                    "title": f' 机组: {name[0]}',
+                    "xData": groupNew[Field_GeneratorSpeed].tolist(),
+                    "yData": groupNew[Field_PitchAngel1].tolist(),
+                    "colorbar":group[Field_UnixYearMonth] .tolist(),
+
+                }]
+            }
+            # Save plot
+            # filePathOfImage = os.path.join(outputAnalysisDir, f"{name[0]}.png")
+            # fig.write_image(filePathOfImage, scale=3)
+            # filePathOfHtml = os.path.join(outputAnalysisDir, f"{name[0]}.html")
+            # fig.write_html(filePathOfHtml)
+
+            # 将JSON对象保存到文件
+            output_json_path = os.path.join(outputAnalysisDir, f"pitch_GeneratorSpeed_Analyst{name[0]}.json")
+            with open(output_json_path, 'w', encoding='utf-8') as f:
+                import json
+                json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+            # result_rows.append({
+            #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+            #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            #     Field_CodeOfTurbine: name[1],
+            #     Field_Return_FilePath: filePathOfImage,
+            #     Field_Return_IsSaveDatabase: False
+            # })
+
+            result_rows.append({
+                Field_Return_TypeAnalyst: self.typeAnalyst(),
+                Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+                Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+                Field_CodeOfTurbine: name[1],
+                Field_MillTypeCode: 'total',
+                Field_Return_FilePath: output_json_path,
+                Field_Return_IsSaveDatabase: True
+            })
+
+        result_df = pd.DataFrame(result_rows)
+
+        return result_df
+

+ 382 - 0
dataAnalysisBusiness/algorithm/powerCurveAnalyst.py

@@ -0,0 +1,382 @@
+import os
+
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from algorithmContract.confBusiness import *
+from algorithmContract.contract import Contract
+from behavior.analystWithGoodPoint import AnalystWithGoodPoint
+from utils.jsonUtil import JsonUtil
+
+
+class PowerCurveAnalyst(AnalystWithGoodPoint):
+    """
+    风电机组功率曲线散点分析。
+    秒级scada数据运算太慢,建议使用分钟级scada数据
+    """
+
+    def typeAnalyst(self):
+        return "power_curve"
+
+    def turbinesAnalysis(self,  outputAnalysisDir, conf: Contract, turbineCodes):
+        dictionary = self.processTurbineData(turbineCodes, conf, [
+                                             Field_DeviceCode, Field_Time, Field_WindSpeed, Field_ActiverPower])
+        dataFrameOfTurbines = self.userDataFrame(
+            dictionary, conf.dataContract.configAnalysis, self)
+        # 检查所需列是否存在
+        required_columns = {Field_WindSpeed, Field_ActiverPower}
+        if not required_columns.issubset(dataFrameOfTurbines.columns):
+            raise ValueError(f"DataFrame缺少必要的列。需要的列有: {required_columns}")
+
+        turbrineInfos = self.common.getTurbineInfos(
+            conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
+
+        groupedOfTurbineModel = turbrineInfos.groupby(Field_MillTypeCode)
+
+        returnDatas = []
+        for turbineModelCode, group in groupedOfTurbineModel:
+            currTurbineCodes = group[Field_CodeOfTurbine].unique().tolist()
+            currTurbineModeInfo = self.common.getTurbineModelByCode(
+                turbineModelCode, self.turbineModelInfo)
+
+            dataFrameOfContractPowerCurve = self.dataFrameContractOfTurbine[
+                self.dataFrameContractOfTurbine[Field_MillTypeCode] == turbineModelCode]
+            currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
+                currTurbineCodes)]
+
+            powerCurveDataOfTurbines = self.dataReprocess(
+                currDataFrameOfTurbines, self.binsWindSpeed)
+
+            # returnData = self.drawOfPowerCurve(
+            #     powerCurveDataOfTurbines, outputAnalysisDir, conf, dataFrameOfContractPowerCurve, currTurbineModeInfo)
+            # returnDatas.append(returnData)
+
+            returnJsonData= self.outputPowerCurveData(conf,outputAnalysisDir,currTurbineModeInfo,powerCurveDataOfTurbines,dataFrameOfContractPowerCurve)
+            returnDatas.append(returnJsonData)
+
+        returnResult = pd.concat(returnDatas, ignore_index=True)
+
+        return returnResult
+
+    def outputPowerCurveData(self, conf: Contract, outputAnalysisDir: str, turbineModelInfo: pd.Series, powerCurveDataOfTurbines: pd.DataFrame, dataFrameOfContractPowerCurve: pd.DataFrame) -> pd.DataFrame:
+        turbineCodes = powerCurveDataOfTurbines[Field_CodeOfTurbine].unique()
+        jsonDictionary = self.convert2Json(turbineModelInfo,turbineCodes=turbineCodes,
+                                       dataFrameOfTurbines=powerCurveDataOfTurbines, dataFrameOfContract=dataFrameOfContractPowerCurve)
+        jsonFileName = f"power_curve-{turbineModelInfo[Field_MillTypeCode]}.json"
+        jsonFilePath = os.path.join(outputAnalysisDir, jsonFileName)
+
+        JsonUtil.write_json(jsonDictionary, file_path=jsonFilePath)
+        result_rows = []
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: Const_Output_Total,
+            Field_MillTypeCode:turbineModelInfo[Field_MillTypeCode],
+            Field_Return_FilePath: jsonFilePath,
+            Field_Return_IsSaveDatabase: True
+        })
+
+        for turbineCode in turbineCodes:
+            data:pd.DataFrame=powerCurveDataOfTurbines[powerCurveDataOfTurbines[Field_CodeOfTurbine]==turbineCode]
+            jsonFileName2 = f"power_curve-{data[Field_NameOfTurbine].iloc[0]}.json"
+            jsonFilePath2 = os.path.join(outputAnalysisDir, jsonFileName2)
+            JsonUtil.write_json(jsonDictionary, file_path=jsonFilePath2)
+            result_rows.append({
+                Field_Return_TypeAnalyst: self.typeAnalyst(),
+                Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+                Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+                Field_CodeOfTurbine: turbineCode,
+                Field_Return_FilePath: jsonFilePath2,
+                Field_Return_IsSaveDatabase: True
+            })
+
+        returnDatas = pd.DataFrame(result_rows)
+        return returnDatas
+
+    def convert2Json(self, turbineModelInfo: pd.Series,turbineCodes, dataFrameOfTurbines: pd.DataFrame, dataFrameOfContract: pd.DataFrame):
+        result = {
+            "analysisTypeCode":"功率曲线分析",
+            "engineTypeCode":  turbineModelInfo[Field_MillTypeCode] ,
+            "engineTypeName": turbineModelInfo[Field_MachineTypeCode] ,
+            "data": []
+        }
+
+        # 定义要替换的空值类型
+        na_values = {pd.NA, float('nan')}
+
+        # 从对象A提取数据
+        for turbineCode in turbineCodes:
+            data:pd.DataFrame=dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine]==turbineCode]
+            engine_data = {
+                "enginName": data[Field_NameOfTurbine].iloc[0],
+                "enginCode": turbineCode,
+                "xData": data[Field_WindSpeed].replace(na_values, None).tolist(),
+                "yData": data[Field_ActiverPower].replace(na_values, None).tolist(),
+                "zData": []
+            }
+            result["data"].append(engine_data)
+
+        # 从对象B提取数据
+        contract_curve = {
+            "enginName": "合同功率曲线",
+            "xData": dataFrameOfContract[Field_WindSpeed].replace(na_values, None).tolist(),
+            "yData": dataFrameOfContract[Field_ActiverPower].replace(na_values, None).tolist(),
+            "zData": []
+        }
+        result["data"].append(contract_curve)
+
+        return result
+
+    def buildPowerCurveData(self, group: pd.DataFrame, fieldWindSpeed: str, fieldActivePower: str, bins) -> pd.DataFrame:
+        """
+        计算设备的功率曲线。
+        """
+        powerCut = group.groupby(pd.cut(group[fieldWindSpeed], bins, labels=np.arange(0, 25.5, 0.5))).agg({
+            fieldActivePower: 'median',
+            fieldWindSpeed: ['median', 'count']
+        })
+        wind_count = powerCut[fieldWindSpeed]['count'].tolist()
+        line = powerCut[fieldActivePower]['median'].round(decimals=2).tolist()
+        act_line = pd.DataFrame([powerCut.index, wind_count, line]).T
+        act_line.columns = [Field_WindSpeed,
+                            'EffectiveQuantity', Field_ActiverPower]
+        return act_line
+
+    def dataReprocess(self, dataFrameMerge: pd.DataFrame, binsWindSpeed) -> pd.DataFrame:
+        # 初始化结果DataFrame
+        dataFrames = []
+
+        # 按设备名分组数据
+        grouped = dataFrameMerge.groupby(
+            [Field_NameOfTurbine, Field_CodeOfTurbine])
+
+        # 计算每个设备的功率曲线
+        for name, group in grouped:
+            dataFramePowerCurveTurbine = self.buildPowerCurveData(
+                group, Field_WindSpeed, Field_ActiverPower, binsWindSpeed)
+            dataFramePowerCurveTurbine[Field_NameOfTurbine] = name[0]
+            dataFramePowerCurveTurbine[Field_CodeOfTurbine] = name[1]
+
+            dataFrames.append(dataFramePowerCurveTurbine)
+
+        # 绘制全场功率曲线图
+        dataFrameReprocess: pd.DataFrame = pd.concat(
+            dataFrames, ignore_index=True).reset_index(drop=True)
+
+        return dataFrameReprocess
+
+    def drawOfPowerCurve(self, powerCurveOfTurbines: pd.DataFrame, outputAnalysisDir, conf: Contract, dataFrameGuaranteePowerCurve: pd.DataFrame, turbineModelInfo: pd.Series):
+        """
+        生成功率曲线并保存为文件。
+
+        参数:
+        frames (pd.DataFrame): 包含数据的DataFrame,需要包含设备名、风速和功率列。
+        outputAnalysisDir (str): 分析输出目录。
+        confData (ConfBusiness): 配置
+        """
+        # 绘制全场功率曲线图
+        # ress =self.dataReprocess(dataFrameMerge,self.binsWindSpeed) # all_res.reset_index(drop=True)
+
+        df1 = self.plot_power_curve(
+            powerCurveOfTurbines, outputAnalysisDir, dataFrameGuaranteePowerCurve, Field_NameOfTurbine, conf, turbineModelInfo)
+
+        # 绘制每个设备的功率曲线图
+        grouped = powerCurveOfTurbines.groupby(
+            [Field_NameOfTurbine, Field_CodeOfTurbine])
+
+        df2 = pd.DataFrame()  # 新建一个空表格,与返回的单图功率曲线合并
+        for name, group in grouped:
+            df_temp2 = self.plot_single_power_curve(
+                powerCurveOfTurbines, group, dataFrameGuaranteePowerCurve, name, outputAnalysisDir, conf)
+            df2 = pd.concat([df2, df_temp2], ignore_index=True)
+
+        # 总图与单图的表格合并
+        df = pd.concat([df1, df2], ignore_index=True)
+        return df
+
+    def plot_power_curve(self, ress, output_path, dataFrameGuaranteePowerCurve: pd.DataFrame, Field_NameOfTurbine, conf: Contract, turbineModelInfo: pd.Series):
+        """
+        绘制全场功率曲线图。
+        """
+        # colors = px.colors.sequential.Turbo
+
+        fig = go.Figure()
+
+        for turbine_num in ress[Field_NameOfTurbine].unique():
+            turbine_data = ress[ress[Field_NameOfTurbine] == turbine_num]
+            # 循环创建风速-功率折线
+            fig.add_trace(go.Scatter(
+                x=turbine_data[Field_WindSpeed],
+                y=turbine_data[Field_ActiverPower],
+                mode='lines',
+                # line=dict(color=colors[idx % len(colors)]),
+                name=f'{turbine_num}'  # 使用风电机组编号作为图例的名称
+            )
+            )
+
+        if not ress.empty and Field_CutInWS in ress.columns and ress[Field_CutInWS].notna().any():
+            cut_in_ws = ress[Field_CutInWS].min() - 1
+        else:
+            cut_in_ws = 2
+
+        fig.add_trace(go.Scatter(
+            x=dataFrameGuaranteePowerCurve[Field_WindSpeed],
+            y=dataFrameGuaranteePowerCurve[Field_ActiverPower],
+            # mode='lines',
+            # line=dict(color='red', dash='dash'),
+            mode='lines+markers',
+            line=dict(color='red'),
+            marker=dict(color='red', size=5),
+            name='合同功率曲线',
+            showlegend=True
+        )
+        )
+        # 创建布局
+        fig.update_layout(
+            title={
+                "text": f'功率曲线-{turbineModelInfo[Field_MachineTypeCode]}',
+                'x': 0.5
+            },
+            # legend_title='Turbine',
+            xaxis=dict(
+                title='风速',
+                dtick=1,
+                tickangle=-45,
+                range=[cut_in_ws, 25]
+            ),
+            yaxis=dict(
+                title='有功功率',
+                dtick=self.axisStepActivePower,
+                range=[self.axisLowerLimitActivePower,
+                       self.axisUpperLimitActivePower]
+            ),
+            legend=dict(
+                orientation="h",  # Horizontal orientation
+                xanchor="center",  # Anchor the legend to the center
+                x=0.5,  # Position legend at the center of the x-axis
+                y=-0.2,  # Position legend below the x-axis
+                # itemsizing='constant',  # Keep the size of the legend entries constant
+                # itemwidth=50
+            )
+        )
+
+        # 保存HTML
+        htmlFileName = '全场-{}-{}-功率曲线.html'.format(self.powerFarmInfo[Field_PowerFarmName].iloc[0],turbineModelInfo[Field_MillTypeCode])
+        htmlFilePath = os.path.join(output_path, htmlFileName)
+        fig.write_html(htmlFilePath)
+
+        result_rows = []
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: Const_Output_Total,
+            Field_Return_FilePath: htmlFilePath,
+            Field_Return_IsSaveDatabase: False
+        })
+
+        result_df = pd.DataFrame(result_rows)
+        return result_df
+
+    def plot_single_power_curve(self, ress, group, dataFrameGuaranteePowerCurve: pd.DataFrame, turbineName, outputAnalysisDir, conf: Contract):
+
+        fig = go.Figure()
+        for turbine_num in ress[Field_NameOfTurbine].unique():
+            turbine_data = ress[ress[Field_NameOfTurbine] == turbine_num]
+
+            # 循环创建风速-功率折线
+            fig.add_trace(go.Scatter(
+                x=turbine_data[Field_WindSpeed],
+                y=turbine_data[Field_ActiverPower],
+                mode='lines',
+                line=dict(color='lightgrey'),
+                name=f'{turbine_num}',
+                showlegend=False
+            )
+            )
+
+        if not ress.empty and Field_CutInWS in ress.columns and ress[Field_CutInWS].notna().any():
+            cut_in_ws = ress[Field_CutInWS].min() - 1
+        else:
+            cut_in_ws = 2
+
+        fig.add_trace(go.Scatter(
+            x=group[Field_WindSpeed],
+            y=group[Field_ActiverPower],
+            mode='lines',
+            line=dict(color='darkblue'),
+            name=Field_ActiverPower,
+            showlegend=False
+        )
+        )
+
+        fig.add_trace(go.Scatter(
+            x=dataFrameGuaranteePowerCurve[Field_WindSpeed],
+            y=dataFrameGuaranteePowerCurve[Field_ActiverPower],
+            mode='lines+markers',
+            line=dict(color='red'),
+            marker=dict(color='red', size=5),
+            name='合同功率曲线',
+            showlegend=True
+        )
+        )
+
+        # 创建布局
+        fig.update_layout(
+            title={
+                "text": f'机组: {turbineName[0]}'
+            },
+            legend=dict(
+                orientation="h",  # 或者 "v" 表示垂直
+                yanchor="bottom",  # 图例垂直对齐方式
+                y=0,  # 图例距离y轴下边界的距离(0到1之间)
+                xanchor="right",  # 图例水平对齐方式
+                x=1,  # 图例距离x轴右边界的距离(0到1之间)
+                bgcolor='rgba(255,255,255,0)'
+            ),
+            xaxis=dict(
+                title='风速',
+                dtick=1,
+                tickangle=-45,
+                range=[cut_in_ws, 25]
+            ),
+            yaxis=dict(
+                title='有功功率',
+                dtick=self.axisStepActivePower,
+                range=[self.axisLowerLimitActivePower,
+                       self.axisUpperLimitActivePower]
+            )
+        )
+
+        # 保存图像
+        # pngFileName = f"{turbineName[0]}.png"
+        # pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
+        # fig.write_image(pngFilePath, scale=3)
+
+        # # 保存HTML
+        # htmlFileName = f"{turbineName[0]}.html"
+        # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+        # fig.write_html(htmlFilePath)
+
+        result_rows = []
+        # result_rows.append({
+        #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #     Field_CodeOfTurbine: turbineName[1],
+        #     Field_Return_FilePath: pngFilePath,
+        #     Field_Return_IsSaveDatabase: False
+        # })
+
+        # result_rows.append({
+        #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #     Field_CodeOfTurbine: turbineName[1],
+        #     Field_Return_FilePath: htmlFilePath,
+        #     Field_Return_IsSaveDatabase: False
+        # })
+
+        result_df = pd.DataFrame(result_rows)
+        return result_df

+ 107 - 0
dataAnalysisBusiness/algorithm/powerOscillationAnalyst.py

@@ -0,0 +1,107 @@
+import os
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+from algorithmContract.confBusiness import *
+from behavior.analyst import Analyst
+from utils.directoryUtil import DirectoryUtil as dir
+
+
+class PowerOscillationAnalyst(Analyst):
+    """
+    风电机组功率震荡分析
+    """
+
+    def typeAnalyst(self):
+        return "power_diff"
+
+    def turbineAnalysis(self,
+                 dataFrame,
+                 outputAnalysisDir,
+                 outputFilePath,
+                 confData: ConfBusiness,
+                 turbineName):
+        self.power_diff(dataFrame, outputFilePath,
+                        confData.field_power,confData.field_rotor_speed)
+
+    def power_diff(self, dataFrame, outputFilePath,  field_Active_Power, field_Rotor_Speed):
+        # Floor the power column to the nearest 10
+        dataFrame['power_col_floor'] = (
+            dataFrame[field_Active_Power] // 10 * 10).astype('int32')
+
+        # Group by the floored power column
+        grouped = dataFrame.groupby('power_col_floor')
+
+        # Calculate max, min, and diff of generator speed within each group
+        agg_df = grouped[field_Rotor_Speed].agg(
+            speed_max='max',
+            speed_min='min'
+        )
+        agg_df['speed_diff'] = agg_df['speed_max'] - agg_df['speed_min']
+
+        # Sort by the floored power column
+        agg_df = agg_df.sort_index()
+
+        # Write the result to a CSV file
+        agg_df.to_csv(outputFilePath)
+
+    def turbinesAnalysis(self,dataFrameMerge,outputAnalysisDir, confData: ConfBusiness):
+        self.plot_power_oscillations(outputAnalysisDir,confData.farm_name)
+
+    def plot_power_oscillations(self, csvFileDirOfCp, farm_name, encoding='utf-8'):
+        """
+        Plot TSR trend from CSV files in a given input path and save the plots to an output path.
+
+        Parameters:
+        - csvFileDirOfCp: str, path to the directory containing input CSV files.
+        - farm_name: str, name of the wind farm.
+        - encoding: str, encoding of the input CSV files. Defaults to 'utf-8'.
+        """
+        sns.set_palette('deep')
+        field_Name_Turbine = "turbine_name"
+        x_name = 'power_col_floor'
+        y_name = 'speed_diff'
+
+        # 初始化结果DataFrame
+        res = pd.DataFrame()
+
+        # 遍历输入路径下的所有文件
+        for root, dir_names, file_names in dir.list_directory(csvFileDirOfCp):
+            for file_name in file_names:
+
+                if not file_name.endswith(CSVSuffix):
+                    continue
+
+                file_path = os.path.join(root, file_name)
+                frame = pd.read_csv(file_path, encoding=encoding)
+
+                # 获取输出文件名前缀
+                turbine_name = file_name.split(CSVSuffix)[0]
+                # 添加设备名作为新列
+                frame[field_Name_Turbine] = turbine_name
+
+                selected_data = frame.loc[:, [field_Name_Turbine, x_name, y_name]]
+                res = pd.concat([res, selected_data], axis=0)
+
+        # 重置索引
+        ress = res.reset_index(drop=True)
+
+        # 绘制所有设备的功率震荡图
+        fig, ax = plt.subplots(figsize=(16, 8))
+        ax = sns.lineplot(x=x_name, y=y_name, data=ress, hue=field_Name_Turbine)
+        ax.set_title(f'功率震荡-{self.turbineModelInfo[Field_MachineTypeCode].iloc[0]}')
+        plt.legend(ncol=4)
+        plt.savefig(csvFileDirOfCp+ r'/{}-Power-Oscillation.png'.format(farm_name), bbox_inches='tight', dpi=300)
+        plt.close()
+
+        # 分组绘制每个设备的功率震荡图
+        grouped = ress.groupby(field_Name_Turbine)
+        for name, group in grouped:
+            color = ["lightgrey"] * len(ress[field_Name_Turbine].unique())
+            fig, ax = plt.subplots(figsize=(8, 8))
+            ax = sns.lineplot(x=x_name, y=y_name, data=ress, hue=field_Name_Turbine, palette=sns.color_palette(color), legend=False)
+            ax = sns.lineplot(x=x_name, y=y_name, data=group, color='darkblue', legend=False)
+            ax.set_title('turbine_name={}'.format(name))
+            plt.savefig(csvFileDirOfCp+ r'/{}.png'.format(name), bbox_inches='tight', dpi=120)
+            plt.close()

+ 278 - 0
dataAnalysisBusiness/algorithm/ratedPowerWindSpeedAnalyst.py

@@ -0,0 +1,278 @@
+import os
+
+import pandas as pd
+import plotly.graph_objects as go
+from algorithmContract.confBusiness import *
+from algorithmContract.contract import Contract
+from behavior.analystWithGoodBadPoint import AnalystWithGoodBadPoint
+from plotly.subplots import make_subplots
+
+
+class RatedPowerWindSpeedAnalyst(AnalystWithGoodBadPoint):
+    """
+    风电机组额定功率风速分析。
+    秒级scada数据运算太慢,建议使用分钟级scada数据
+    """
+
+    def typeAnalyst(self):
+        return "rated_power_windspeed"
+
+    def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes):
+        dictionary=self.processTurbineData(turbineCodes,conf,[Field_DeviceCode,Field_Time,Field_EnvTemp,Field_WindSpeed,Field_ActiverPower])
+        dataFrameMerge=self.userDataFrame(dictionary,conf.dataContract.configAnalysis,self)
+        turbineInfos = self.common.getTurbineInfos(conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
+        return self.draw(dataFrameMerge, outputAnalysisDir, conf,turbineInfos)
+
+    def draw(self, dataFrameMerge: pd.DataFrame, outputAnalysisDir, conf: Contract,turbineModelInfo: pd.Series):
+        """
+        绘制并保存额定满发风速功率分布图,根据环境温度是否大于等于25℃。
+
+        参数:
+        dataFrameMerge (pd.DataFrame): 包含数据的DataFrame,需要包含设备名、风速和功率列。
+        outputAnalysisDir (str): 分析输出目录。
+        confData (ConfBusiness): 配置
+        """
+        # 检查所需列是否存在
+        required_columns = {Field_EnvTemp,
+                            Field_WindSpeed, Field_ActiverPower}
+        if not required_columns.issubset(dataFrameMerge.columns):
+            raise ValueError(f"DataFrame缺少必要的列。需要的列有: {required_columns}")
+
+        y_name = '功率'
+        upLimitOfPower = self.turbineInfo[Field_RatedPower].max() * 1.2
+        lowLimitOfPower = self.turbineInfo[Field_RatedPower].max()*0.9
+        field_RatedWindSpeed = self.turbineModelInfo[Field_RatedWindSpeed].max()
+
+        # 根据环境温度筛选数据
+        over_temp = dataFrameMerge[(dataFrameMerge[Field_EnvTemp] >= 25) & (
+            dataFrameMerge[Field_WindSpeed] >= field_RatedWindSpeed) & (dataFrameMerge[Field_ActiverPower] >= lowLimitOfPower)].sort_values(by=Field_NameOfTurbine)
+        below_temp = dataFrameMerge[(dataFrameMerge[Field_EnvTemp] < 25) & (
+            dataFrameMerge[Field_WindSpeed] >= field_RatedWindSpeed) & (dataFrameMerge[Field_ActiverPower] >= lowLimitOfPower)].sort_values(by=Field_NameOfTurbine)
+
+        # 绘制环境温度大于等于25℃的功率分布图
+        fig = make_subplots(rows=1, cols=1)
+        fig.add_trace(go.Box(y=over_temp[Field_ActiverPower], x=over_temp[Field_NameOfTurbine],
+                             # name='Ambient Temp >= 25°C',
+                             boxpoints='outliers',
+                             # box line color
+                             line=dict(color='black', width=1),
+                             # quartilemethod='exclusive',
+                             fillcolor='dodgerblue',
+                             showlegend=False,
+                             marker=dict(color='rgba(0, 0, 0, 0)', size=0.1)),
+                      row=1, col=1)
+
+        # Calculate medians and plot them as a line for visibility
+        medians = over_temp.groupby(Field_NameOfTurbine)[
+            Field_ActiverPower].median()
+        median_line = go.Scatter(
+            x=medians.index,
+            y=medians.values,
+            mode='markers',
+            marker=dict(symbol='line-ew-open', color='red', size=12),
+            showlegend=False
+        )
+        fig.add_trace(median_line)
+
+        # 更新布局
+        fig.update_yaxes(title_text=y_name, row=1, col=1, range=[
+                         lowLimitOfPower, upLimitOfPower], tickfont=dict(size=10))
+        fig.update_xaxes(title_text='机组', type='category',
+                         tickangle=-45, tickfont=dict(size=10))
+        fig.update_layout(title={
+                          'text': f'额定功率分布(环境温度>=25摄氏度)', 'x': 0.5}, boxmode='group')
+        # 确保从 Series 中提取的是具体的值
+        engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+        if isinstance(engineTypeCode, pd.Series):
+            engineTypeCode = engineTypeCode.iloc[0]
+        engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+        if isinstance(engineTypeName, pd.Series):
+            engineTypeName = engineTypeName.iloc[0]
+        # 构建最终的JSON对象
+        json_output = {
+            "analysisTypeCode": "额定功率和风速分析",
+            "engineCode":  engineTypeCode,
+            "engineTypeName": engineTypeName,
+            "xaixs": "机组",
+            "yaixs": "功率(kW)",
+            "data": [{
+                    "title":f'额定功率分布(环境温度>=25摄氏度)',
+                    "xData": over_temp[Field_NameOfTurbine].tolist(),
+                    "yData": over_temp[Field_ActiverPower].tolist(),
+                    "linecolor":'black',
+                    "linewidth":1,
+                    "fillcolor":'dodgerblue'
+                    }]
+        }
+        result_rows = []
+        # 保存图像
+        # pngFileName = "额定满发风速功率分布(10min)(环境温度大于25度).png"
+        # pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
+        # fig.write_image(pngFilePath, scale=3)
+
+        # # 保存HTML
+        # htmlFileName = "额定满发风速功率分布(10min)(环境温度大于25度).html"
+        # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+        # fig.write_html(htmlFilePath)
+        # 保存Json
+        # 将JSON对象保存到文件
+        output_json_path = os.path.join(outputAnalysisDir, "total_more_25.json")
+        with open(output_json_path, 'w', encoding='utf-8') as f:
+            import json
+            json.dump(json_output, f, ensure_ascii=False, indent=4)
+        # 如果需要返回DataFrame,可以包含文件路径
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: 'total',
+            Field_MillTypeCode: 'total_less_25',
+            Field_Return_FilePath: output_json_path,
+            Field_Return_IsSaveDatabase: True
+        })
+        # result_rows.append({
+        #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #     Field_CodeOfTurbine: 'total',
+        #     Field_Return_FilePath: pngFilePath,
+        #     Field_Return_IsSaveDatabase: False
+        # })
+
+        # result_rows.append({
+        #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #     Field_CodeOfTurbine: 'total',
+        #     Field_Return_FilePath: htmlFilePath,
+        #     Field_Return_IsSaveDatabase: True
+        # })
+
+        # 绘制环境温度小于25℃的功率分布图
+        fig = make_subplots(rows=1, cols=1)
+        fig.add_trace(go.Box(y=below_temp[Field_ActiverPower], x=below_temp[Field_NameOfTurbine],
+                             # name='Ambient Temp < 25°C',
+                             boxpoints='outliers',
+                             # box line color
+                             line=dict(color='black', width=1),
+                             # quartilemethod='exclusive',
+                             fillcolor='dodgerblue',
+                             showlegend=False,
+                             marker=dict(color='rgba(0, 0, 0, 0)', size=0.1)),
+                      row=1, col=1)
+
+        # Calculate medians and plot them as a line for visibility
+        medians = below_temp.groupby(Field_NameOfTurbine)[
+            Field_ActiverPower].median()
+        median_line = go.Scatter(
+            x=medians.index,
+            y=medians.values,
+            mode='markers',
+            marker=dict(symbol='line-ew-open', color='red', size=10),
+            showlegend=False
+        )
+        fig.add_trace(median_line)
+
+        # 更新布局
+        fig.update_yaxes(title_text=y_name, row=1, col=1, range=[
+                         lowLimitOfPower, upLimitOfPower], tickfont=dict(size=10))
+        fig.update_xaxes(title_text='机组', type='category',
+                         tickangle=-45, tickfont=dict(size=10))
+        fig.update_layout(title={
+                          'text': f'额定功率分布(环境温度<25摄氏度)', 'x': 0.5}, boxmode='group')
+        # 构建最终的JSON对象2
+        json_output2 = {
+            "analysisTypeCode": "额定功率和风速分析",
+            "engineCode":  engineTypeCode,
+            "engineTypeName": engineTypeName,
+            "xaixs": "机组",
+            "yaixs": "功率(kw)",
+            "data": [{
+                    "title":f'额定功率分布(环境温度<25摄氏度)',
+                    "xData": below_temp[Field_NameOfTurbine].tolist(),
+                    "yData": below_temp[Field_ActiverPower].tolist(),
+                    "linecolor":'black',
+                    "linewidth":1,
+                    "fillcolor":'dodgerblue'
+                    }]
+        }
+        # 保存图像
+        # pngFileName = "额定满发风速功率分布(10min)(环境温度小于25度).png"
+        # pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
+        # fig.write_image(pngFilePath, scale=3)
+
+        # # 保存HTML
+        # htmlFileName = "额定满发风速功率分布(10min)(环境温度小于25度).html"
+        # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+        # fig.write_html(htmlFilePath)
+                # 保存Json
+        # 将JSON对象保存到文件
+        output_json_path2 = os.path.join(outputAnalysisDir, "total_less_25.json")
+        with open(output_json_path2, 'w', encoding='utf-8') as f:
+            import json
+            json.dump(json_output2, f, ensure_ascii=False, indent=4)
+        # 如果需要返回DataFrame,可以包含文件路径
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: 'total',
+            Field_MillTypeCode: 'total_less_25',
+            Field_Return_FilePath: output_json_path2,
+            Field_Return_IsSaveDatabase: True
+        })
+        # result_rows.append({
+        #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #     Field_CodeOfTurbine: 'total',
+        #     Field_Return_FilePath: pngFilePath,
+        #     Field_Return_IsSaveDatabase: False
+        # })
+
+        # result_rows.append({
+        #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #     Field_CodeOfTurbine: 'total',
+        #     Field_Return_FilePath: htmlFilePath,
+        #     Field_Return_IsSaveDatabase: True
+        # })
+
+        result_df = pd.DataFrame(result_rows)
+
+        return result_df
+
+        """
+        # 绘制环境温度大于等于25℃的功率分布图
+        fig, ax = plt.subplots()
+        sns.boxplot(y=confData.field_power, x=Field_NameOfTurbine, data=over_temp, fliersize=0, ax=ax,
+                    medianprops={'linestyle': '-', 'color': 'red'},
+                    boxprops={'color': 'dodgerblue', 'facecolor': 'dodgerblue'})
+        ax.yaxis.set_major_locator(ticker.MultipleLocator(100))
+        ax.set_ylim(lowLimitOfPower, upLimitOfPower)
+        ax.set_ylabel(y_name)
+        ax.set_title(
+            'rated wind speed and power distribute(10min)(ambient temperature>=25℃)')
+        ax.grid(True)
+        plt.xticks(rotation=45)  # 旋转45度
+        plt.savefig(os.path.join(outputAnalysisDir,
+                    "额定满发风速功率分布(10min)(环境温度大于25度).png"), bbox_inches='tight', dpi=120)
+        plt.close()
+
+        # 绘制环境温度小于25℃的功率分布图
+        fig, ax = plt.subplots()
+        sns.boxplot(y=confData.field_power, x=Field_NameOfTurbine, data=below_temp, fliersize=0, ax=ax,
+                    medianprops={'linestyle': '-', 'color': 'red'},
+                    boxprops={'color': 'dodgerblue', 'facecolor': 'dodgerblue'})
+        ax.yaxis.set_major_locator(ticker.MultipleLocator(100))
+        ax.set_ylim(lowLimitOfPower, upLimitOfPower)
+        ax.set_ylabel(y_name)
+        ax.set_title(
+            'rated wind speed and power distribute(10min)(ambient temperature<25℃)')
+        ax.grid(True)
+        plt.xticks(rotation=45)  # 旋转45度
+        plt.savefig(os.path.join(outputAnalysisDir,
+                    "额定满发风速功率分布(10min)(环境温度小于25度).png"), bbox_inches='tight', dpi=120)
+        plt.close()
+        """

+ 138 - 0
dataAnalysisBusiness/algorithm/ratedWindSpeedAnalyst.py

@@ -0,0 +1,138 @@
+import os
+
+import pandas as pd
+import plotly.graph_objects as go
+from algorithmContract.confBusiness import *
+from algorithmContract.contract import Contract
+from behavior.analystWithGoodBadPoint import AnalystWithGoodBadPoint
+
+
+class RatedWindSpeedAnalyst(AnalystWithGoodBadPoint):
+    """
+    风电机组额定风速分析。
+    秒级scada数据运算太慢,建议使用分钟级scada数据
+    """
+
+    def typeAnalyst(self):
+        return "rated_windspeed"
+
+    def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes):
+        # dictionary=self.processTurbineData(turbineCodes,conf,[Field_DeviceCode,Field_Time,Field_WindSpeed,Field_ActiverPower])
+        # dataFrameMerge=self.userDataFrame(dictionary,conf.dataContract.configAnalysis,self)
+        dictionary = self.processTurbineData(turbineCodes, conf,
+                                             [Field_DeviceCode, Field_Time, Field_WindSpeed, Field_ActiverPower])
+        turbineInfos = self.common.getTurbineInfos(conf.dataContract.dataFilter.powerFarmID, turbineCodes,
+                                                   self.turbineInfo)
+        dataFrameMerge = self.userDataFrame(dictionary, conf.dataContract.configAnalysis, self)
+        return self.draw(dataFrameMerge, turbineInfos, outputAnalysisDir, conf)
+
+
+    def draw(self, dataFrameMerge: pd.DataFrame, turbineModelInfo: pd.Series, outputAnalysisDir, conf: Contract):
+        """
+        绘制并保存满发风速区间数据计数图。
+
+        参数:
+        dataFrameMerge (pd.DataFrame): 包含数据的DataFrame,需要包含设备名、风速和功率列。
+        outputAnalysisDir (str): 分析输出目录。
+        confData (ConfBusiness): 配置
+        """
+
+        # 初始化结果列表
+        res = []
+
+        # 按设备名分组并计算统计数据
+        grouped = dataFrameMerge.groupby(Field_NameOfTurbine)
+        for name, group in grouped:
+            group = group[group[Field_WindSpeed] >= 11]
+            res.append([name, group[Field_ActiverPower].min(), group[Field_ActiverPower].max(
+            ), group[Field_ActiverPower].median(), group.shape[0]])
+
+        # 创建结果DataFrame
+        data = pd.DataFrame(res, columns=[
+                            Field_NameOfTurbine, 'power-min', 'power-max', 'power-median', 'count'])
+
+        fig = go.Figure(data=[go.Bar(
+                        x=data[Field_NameOfTurbine],
+                        y=data['count'],
+                        marker_color='dodgerblue'
+                        )
+        ]
+        )
+
+        fig.update_layout(
+            title={
+                "text": f'额定风速间隔数据计数',
+                        'x': 0.5
+            },
+            xaxis=dict(
+                title='机组',
+                tickangle=-45
+            ),
+            yaxis=dict(
+                title='总数'
+            )
+        )
+
+        # 确保从 Series 中提取的是具体的值
+
+        engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+        if isinstance(engineTypeCode, pd.Series):
+            engineTypeCode = engineTypeCode.iloc[0]
+
+        engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+        if isinstance(engineTypeName, pd.Series):
+            engineTypeName = engineTypeName.iloc[0]
+        # 构建最终的JSON对象
+        json_output = {
+            "analysisTypeCode": "额定风速分析",
+            "engineCode": engineTypeCode,
+            "engineTypeName": engineTypeName,
+            "xaixs": "机组",
+            "yaixs": "总数",
+            "data": [{
+                "engineName": "",
+                "engineCode": "",
+                "title": f' 额定风速间隔数据计数',
+                "xData": data[Field_NameOfTurbine].tolist(),
+                "yData": data['count'].tolist(),
+            }]
+        }
+
+        result_rows = []
+        # 保存图像
+        # pngFileName = '风速区间数据计数.png'
+        # pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
+        # fig.write_image(pngFilePath, scale=3)
+
+        # 保存HTML
+        # htmlFileName = '风速区间数据计数.html'
+        # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+        # fig.write_html(htmlFilePath)
+        # 将JSON对象保存到文件
+        output_json_path = os.path.join(outputAnalysisDir, f"rated_WindSpeed.json")
+        with open(output_json_path, 'w', encoding='utf-8') as f:
+            import json
+            json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+        # result_rows.append({
+        #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #     Field_CodeOfTurbine: 'total',
+        #     Field_Return_FilePath: pngFilePath,
+        #     Field_Return_IsSaveDatabase: False
+        # })
+
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: 'total',
+            Field_MillTypeCode: 'total',
+            Field_Return_FilePath: output_json_path,
+            Field_Return_IsSaveDatabase: True
+        })
+
+        result_df = pd.DataFrame(result_rows)
+
+        return result_df

+ 319 - 0
dataAnalysisBusiness/algorithm/temperatureEnvironmentAnalyst.py

@@ -0,0 +1,319 @@
+import os
+
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from algorithmContract.confBusiness import *
+from algorithmContract.contract import Contract
+from behavior.analystWithGoodBadLimitPoint import AnalystWithGoodBadLimitPoint
+from geopy.distance import geodesic
+from plotly.subplots import make_subplots
+
+
+class TemperatureEnvironmentAnalyst(AnalystWithGoodBadLimitPoint):
+    """
+    风电机组环境温度传感器分析
+    """
+
+    def typeAnalyst(self):
+        return "temperature_environment"
+
+    def turbinesAnalysis(self,  outputAnalysisDir, conf: Contract, turbineCodes):
+        dictionary = self.processTurbineData(turbineCodes, conf, [
+                                             Field_DeviceCode, Field_Time,Field_EnvTemp, Field_WindSpeed, Field_ActiverPower])
+        dataFrameOfTurbines = self.userDataFrame(
+            dictionary, conf.dataContract.configAnalysis, self)
+        # 检查所需列是否存在
+        required_columns = {Field_CodeOfTurbine,Field_EnvTemp}
+        if not required_columns.issubset(dataFrameOfTurbines.columns):
+            raise ValueError(f"DataFrame缺少必要的列。需要的列有: {required_columns}")
+
+        #  环境温度分析
+        turbineEnvTempData = dataFrameOfTurbines.groupby(Field_CodeOfTurbine).agg(
+            {Field_EnvTemp: 'median'})
+        turbineEnvTempData = turbineEnvTempData.reset_index()
+        mergeData = self.mergeData(self.turbineInfo, turbineEnvTempData)
+        # 分机型
+        turbrineInfos = self.common.getTurbineInfos(
+            conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
+
+        returnResult= self.draw(mergeData, outputAnalysisDir, conf,turbrineInfos)
+
+        return returnResult
+        # return self.draw(mergeData, outputAnalysisDir, conf)
+
+    def mergeData(self,  turbineInfos: pd.DataFrame, turbineEnvTempData):
+        """
+        将每台机组的环境温度均值数据与机组信息,按机组合并
+
+        参数:
+        turbineInfos (pandas.DataFrame): 机组信息数据
+        turbineEnvTempData (pandas.DataFrame): 每台机组的环境温度均值数据
+
+        返回:
+        pandas.DataFrame: 每台机组的环境温度均值数据与机组信息合并数据
+        """
+
+        """
+        合并类型how的选项包括:
+
+        'inner': 内连接,只保留两个DataFrame中都有的键的行。
+        'outer': 外连接,保留两个DataFrame中任一或两者都有的键的行。
+        'left': 左连接,保留左边DataFrame的所有键,以及右边DataFrame中匹配的键的行。
+        'right': 右连接,保留右边DataFrame的所有键,以及左边DataFrame中匹配的键的行。
+        """
+        # turbineInfos[fieldTurbineName]=turbineInfos[fieldTurbineName].astype(str).apply(confData.add_W_if_starts_with_digit)
+        # turbineEnvTempData[Field_NameOfTurbine] = turbineEnvTempData[Field_NameOfTurbine].astype(
+        #     str)
+        tempDataFrame = pd.merge(turbineInfos, turbineEnvTempData, on=[
+                              Field_CodeOfTurbine], how='inner')
+
+        # 保留指定字段,例如 'Key' 和 'Value1'
+        mergeDataFrame = tempDataFrame[[Field_CodeOfTurbine, Field_NameOfTurbine, Field_Latitude,Field_Longitude, Field_EnvTemp]]
+
+        return mergeDataFrame
+
+    # 定义查找给定半径内点的函数
+    def find_points_within_radius(self, data, center, field_temperature_env, radius):
+        points_within_radius = []
+        for index, row in data.iterrows():
+            distance = geodesic(
+                (center[2], center[1]), (row[Field_Latitude], row[Field_Longitude])).meters
+            if distance <= radius:
+                points_within_radius.append(
+                    (row[Field_NameOfTurbine], row[field_temperature_env]))
+        return points_within_radius
+
+    fieldTemperatureDiff = "temperature_diff"
+
+    # def draw(self, dataFrame: pd.DataFrame, outputAnalysisDir, conf: Contract, charset=charset_unify):
+    def draw(self, dataFrame: pd.DataFrame, outputAnalysisDir, conf: Contract, turbineModelInfo: pd.Series):
+        # 处理数据
+        dataFrame['new'] = dataFrame.loc[:, [Field_NameOfTurbine,
+                                             Field_Longitude, Field_Latitude, Field_EnvTemp]].apply(tuple, axis=1)
+        coordinates = dataFrame['new'].tolist()
+        # df = pd.DataFrame(coordinates, columns=[Field_NameOfTurbine, Field_Longitude, Field_Latitude, confData.field_env_temp])
+
+        # 查找半径内的点
+        points_within_radius = {coord: self.find_points_within_radius(
+            dataFrame, coord, Field_EnvTemp, self.turbineModelInfo[Field_RotorDiameter].iloc[0]*10) for coord in coordinates}
+        res = []
+        for center, nearby_points in points_within_radius.items():
+            current_temp = dataFrame[dataFrame[Field_NameOfTurbine]
+                                     == center[0]][Field_EnvTemp].iloc[0]
+            target_tuple = (center[0], current_temp)
+            if target_tuple in nearby_points:
+                nearby_points.remove(target_tuple)
+            median_temp = np.median(
+                [i[1] for i in nearby_points]) if nearby_points else current_temp
+            res.append((center[0], nearby_points, median_temp, current_temp))
+        res = pd.DataFrame(
+            res, columns=[Field_NameOfTurbine, '周边机组', '周边机组温度', '当前机组温度'])
+        res[self.fieldTemperatureDiff] = res['当前机组温度'] - res['周边机组温度']
+
+        # 使用plotly进行数据可视化
+        fig1 = make_subplots(rows=1, cols=1)
+
+        # 温度差异条形图
+        fig1.add_trace(
+            go.Bar(x=res[Field_NameOfTurbine],
+                   y=res[self.fieldTemperatureDiff], marker_color='dodgerblue'),
+            row=1, col=1
+        )
+        fig1.update_layout(
+            title={'text': f'温度偏差', 'x': 0.5},
+            xaxis_title='机组名称',
+            yaxis_title='温度偏差',
+            shapes=[
+                {'type': 'line', 'x0': 0, 'x1': 1, 'xref': 'paper', 'y0': 5,
+                    'y1': 5, 'line': {'color': 'red', 'dash': 'dot'}},
+                {'type': 'line', 'x0': 0, 'x1': 1, 'xref': 'paper', 'y0': -
+                    5, 'y1': -5, 'line': {'color': 'red', 'dash': 'dot'}}
+            ],
+            xaxis=dict(tickangle=-45)  # 设置x轴刻度旋转角度为45度
+        )
+
+        # 确保从 Series 中提取的是具体的值
+        engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+        if isinstance(engineTypeCode, pd.Series):
+            engineTypeCode = engineTypeCode.iloc[0]
+
+        engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+        if isinstance(engineTypeName, pd.Series):
+            engineTypeName = engineTypeName.iloc[0]
+        # 构建最终的JSON对象
+        json_output = {
+            "analysisTypeCode": "风电机组环境温度传感器分析",
+            "engineCode": engineTypeCode,
+            "engineTypeName": engineTypeName,
+            "xaixs": "机组名称",
+            "yaixs": "温度偏差(℃)",
+            "data": [{
+                "engineName": "",  # Field_NameOfTurbine
+                "engineCode": "",  # Field_CodeOfTurbine
+                "title": f'温度偏差',
+                "xData": res[Field_NameOfTurbine].tolist(),
+                "yData": res[self.fieldTemperatureDiff].tolist(),
+            }]
+        }
+
+        result_rows = []
+        # 保存图像
+        # pngFileName = '{}环境温差Bias.png'.format(
+        #     self.powerFarmInfo[Field_PowerFarmName].iloc[0])
+        # pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
+        # fig1.write_image(pngFilePath, scale=3)
+
+        # 保存HTML
+        # htmlFileName = '{}环境温差Bias.html'.format(
+        #    self.powerFarmInfo[Field_PowerFarmName].iloc[0])
+        # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+        # fig1.write_html(htmlFilePath)
+
+        # 将JSON对象保存到文件
+        output_json_path = os.path.join(outputAnalysisDir, f"total_Bias.json")
+        with open(output_json_path, 'w', encoding='utf-8') as f:
+            import json
+            json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+        # 如果需要返回DataFrame,可以包含文件路径
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: 'total',
+            Field_MillTypeCode: 'total_Bias',
+            Field_Return_FilePath: output_json_path,
+            Field_Return_IsSaveDatabase: True
+        })
+
+        # result_rows.append({
+        #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #     Field_CodeOfTurbine: Const_Output_Total,
+        #     Field_Return_FilePath: pngFilePath,
+        #     Field_Return_IsSaveDatabase: False
+        # })
+
+        # result_rows.append({
+        #    Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #    Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #    Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #    Field_CodeOfTurbine: Const_Output_Total,
+        #    Field_Return_FilePath: htmlFilePath,
+        #    Field_Return_IsSaveDatabase: True
+        # })
+
+        # 环境温度中位数条形图
+        fig2 = make_subplots(rows=1, cols=1)
+        fig2.add_trace(
+            go.Bar(x=res[Field_NameOfTurbine],
+                   y=res['当前机组温度'], marker_color='dodgerblue'),
+            row=1, col=1
+        )
+        fig2.update_layout(
+            title={'text': f'平均温度', 'x': 0.5},
+            xaxis_title='机组名称',
+            yaxis_title=' 温度',
+            xaxis=dict(tickangle=-45)  # 为x轴也设置旋转角度
+        )
+
+        # 确保从 Series 中提取的是具体的值
+        engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+        if isinstance(engineTypeCode, pd.Series):
+            engineTypeCode = engineTypeCode.iloc[0]
+
+        engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+        if isinstance(engineTypeName, pd.Series):
+            engineTypeName = engineTypeName.iloc[0]
+        # 构建最终的JSON对象
+        json_output = {
+            "analysisTypeCode": "风电机组环境温度传感器分析",
+            "engineCode": engineTypeCode,
+            "engineTypeName": engineTypeName,
+            "xaixs": "机组名称",
+            "yaixs": "温度(℃)",
+            "data": [{
+                "engineName": "",  # Field_NameOfTurbine
+                "engineCode": "",  # Field_CodeOfTurbine
+                "title": f'平均温度',
+                "xData": res[Field_NameOfTurbine].tolist(),
+                "yData": res['当前机组温度'].tolist(),
+            }]
+        }
+
+
+        # 保存图像
+        # pngFileName = '{}环境温度中位数.png'.format(
+        #     self.powerFarmInfo[Field_PowerFarmName].iloc[0])
+        # pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
+        # fig2.write_image(pngFilePath, scale=3)
+
+        # 保存HTML
+        # htmlFileName = '{}环境温度中位数.html'.format(
+        #     self.powerFarmInfo[Field_PowerFarmName].iloc[0])
+        # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+        # fig2.write_html(htmlFilePath)
+
+        # 将JSON对象保存到文件
+        output_json_path = os.path.join(outputAnalysisDir, f"total_Mid.json")
+        with open(output_json_path, 'w', encoding='utf-8') as f:
+            import json
+            json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+        # 如果需要返回DataFrame,可以包含文件路径
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: 'total',
+            Field_MillTypeCode: 'total_Mid',
+            Field_Return_FilePath: output_json_path,
+            Field_Return_IsSaveDatabase: True
+        })
+
+        # result_rows.append({
+        #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #     Field_CodeOfTurbine: Const_Output_Total,
+        #     Field_Return_FilePath: pngFilePath,
+        #     Field_Return_IsSaveDatabase: False
+        # })
+
+        # result_rows.append({
+        #    Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #    Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #    Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #    Field_CodeOfTurbine: Const_Output_Total,
+        #    Field_Return_FilePath: htmlFilePath,
+        #    Field_Return_IsSaveDatabase: True
+        # })
+
+        result_df = pd.DataFrame(result_rows)
+
+        return result_df
+        """
+        fig, ax = plt.subplots(figsize=(16,8),dpi=96)
+       
+        # 设置x轴刻度值旋转角度为45度  
+        plt.tick_params(axis='x', rotation=45)
+
+        sns.barplot(x=Field_NameOfTurbine,y=self.fieldTemperatureDiff,data=res,ax=ax,color='dodgerblue')
+        plt.axhline(y=5,ls=":",c="red")#添加水平直线
+        plt.axhline(y=-5,ls=":",c="red")#添加水平直线
+        ax.set_ylabel('temperature_difference')
+        ax.set_title('temperature Bias')
+        plt.savefig(outputAnalysisDir +'//'+ "{}环境温差Bias.png".format(confData.farm_name),bbox_inches='tight',dpi=120)
+
+
+        fig2, ax2 = plt.subplots(figsize=(16,8),dpi=96)
+        # 设置x轴刻度值旋转角度为45度  
+        plt.tick_params(axis='x', rotation=45)
+        
+        sns.barplot(x=Field_NameOfTurbine ,y='当前机组温度',data=res,ax=ax2,color='dodgerblue')
+        ax2.set_ylabel('temperature')
+        ax2.set_title('temperature median')
+        plt.savefig(outputAnalysisDir +'//'+ "{}环境温度均值.png".format(confData.farm_name),bbox_inches='tight',dpi=120)
+        """

+ 365 - 0
dataAnalysisBusiness/algorithm/tsrAnalyst.py

@@ -0,0 +1,365 @@
+import os
+import pandas as pd
+import math
+import numpy as np
+from plotly.subplots import make_subplots
+import plotly.express as px
+import pandas as pd
+import plotly.graph_objects as go
+import seaborn as sns
+from matplotlib.ticker import MultipleLocator
+from behavior.analystWithGoodPoint import AnalystWithGoodPoint
+from utils.directoryUtil import DirectoryUtil as dir
+from algorithmContract.confBusiness import *
+from algorithmContract.contract import Contract
+
+
+class TSRAnalyst(AnalystWithGoodPoint):
+    """
+    风电机组叶尖速比分析
+    """
+
+    def typeAnalyst(self):
+        return "tsr"
+
+    def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes):
+        dictionary = self.processTurbineData(turbineCodes, conf, [
+                                             Field_DeviceCode, Field_Time, Field_WindSpeed, Field_ActiverPower,Field_RotorSpeed,Field_GeneratorSpeed])
+        dataFrameOfTurbines = self.userDataFrame(
+            dictionary, conf.dataContract.configAnalysis, self)
+
+        # 检查所需列是否存在
+        required_columns = {Field_WindSpeed, Field_RotorSpeed,Field_PowerFloor,Field_GeneratorSpeed}
+        if not required_columns.issubset(dataFrameOfTurbines.columns):
+            raise ValueError(f"DataFrame缺少必要的列。需要的列有: {required_columns}")
+
+        turbrineInfos = self.common.getTurbineInfos(
+            conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
+
+        groupedOfTurbineModel = turbrineInfos.groupby(Field_MillTypeCode)
+
+        returnDatas = []
+        for turbineModelCode, group in groupedOfTurbineModel:
+            currTurbineCodes = group[Field_CodeOfTurbine].unique().tolist()
+            currTurbineModeInfo = self.common.getTurbineModelByCode(
+                turbineModelCode, self.turbineModelInfo)
+
+            currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
+                currTurbineCodes)]
+
+            #创建一个与currDataFrameOfTurbines相同的dataFrameMerge
+            dataFrameMerge=currDataFrameOfTurbines.copy()
+            # return self.plot_tsr_distribution(self.tsr(dataFrameMerge), outputAnalysisDir, conf)
+            dataFrameMerge[Field_PowerFarmName] = self.currPowerFarmInfo.loc[Field_PowerFarmName]
+            # Calculate 'power_floor'
+            dataFrameMerge[Field_PowerFloor] = (
+                dataFrameMerge[Field_ActiverPower] / 10).astype(int) * 10
+
+            # Ensure the necessary columns are of float type
+            dataFrameMerge[Field_WindSpeed] = dataFrameMerge[Field_WindSpeed].astype(float)
+            dataFrameMerge[Field_RotorSpeed] = dataFrameMerge[Field_RotorSpeed].astype(float)
+            dataFrameMerge[Field_GeneratorSpeed] = dataFrameMerge[Field_GeneratorSpeed].astype(float)
+
+            # Group by 'power_floor' and calculate median, max, and min of TSR
+            grouped = dataFrameMerge.groupby([Field_PowerFloor, Field_CodeOfTurbine, Field_NameOfTurbine]).agg({
+                Field_WindSpeed: 'mean',
+                Field_RotorSpeed: 'median',
+                Field_GeneratorSpeed: 'median',
+                Field_TSR: ['mean', 'max', 'min'],
+                Field_PowerFarmName: 'max'
+            }).reset_index()
+
+            # Rename columns for clarity post aggregation
+            grouped.columns = [Field_PowerFloor,  Field_CodeOfTurbine, Field_NameOfTurbine, Field_WindSpeed,
+                               Field_RotorSpeed, Field_GeneratorSpeed, Field_TSR, Field_TSRMax, Field_TSRMin, Field_PowerFarmName]
+
+            # Sort by 'power_floor'
+            grouped = grouped.sort_values(by=[Field_CodeOfTurbine, Field_PowerFloor])
+
+            returnData = self.plot_tsr_distribution(
+                grouped, outputAnalysisDir, conf, currTurbineModeInfo)
+            returnDatas.append(returnData)
+
+        returnResult = pd.concat(returnDatas, ignore_index=True)
+
+        return returnResult
+
+      #------------------------------------------
+
+        # dictionary = self.processTurbineData(turbineCodes,conf,[Field_DeviceCode,Field_Time,Field_WindSpeed,Field_ActiverPower,Field_RotorSpeed,Field_GeneratorSpeed])
+        # dataFrameMerge = self.userDataFrame(dictionary,conf.dataContract.configAnalysis,self)
+        # # return self.plot_tsr_distribution(self.tsr(dataFrameMerge), outputAnalysisDir, conf)
+        # dataFrameMerge[Field_PowerFarmName] = self.currPowerFarmInfo.loc[Field_PowerFarmName]
+        # # Calculate 'power_floor'
+        # dataFrameMerge[Field_PowerFloor] = (
+        #     dataFrameMerge[Field_ActiverPower] / 10).astype(int) * 10
+
+        # # Ensure the necessary columns are of float type
+        # dataFrameMerge[Field_WindSpeed] = dataFrameMerge[Field_WindSpeed].astype(float)
+        # dataFrameMerge[Field_RotorSpeed] = dataFrameMerge[Field_RotorSpeed].astype(float)
+        # dataFrameMerge[Field_GeneratorSpeed] = dataFrameMerge[Field_GeneratorSpeed].astype(float)
+
+        # # Group by 'power_floor' and calculate median, max, and min of TSR
+        # grouped = dataFrameMerge.groupby([Field_PowerFloor, Field_CodeOfTurbine, Field_NameOfTurbine]).agg({
+        #     Field_WindSpeed: 'median',
+        #     Field_RotorSpeed: 'median',
+        #     Field_GeneratorSpeed: 'median',
+        #     Field_TSR: ['median', 'max', 'min'],
+        #     Field_PowerFarmName: 'max'
+        # }).reset_index()
+
+        # # Rename columns for clarity post aggregation
+        # grouped.columns = [Field_PowerFloor,  Field_CodeOfTurbine, Field_NameOfTurbine, Field_WindSpeed,
+        #                    Field_RotorSpeed, Field_GeneratorSpeed, Field_TSR, Field_TSRMax, Field_TSRMin, Field_PowerFarmName]
+
+        # # Sort by 'power_floor'
+        # grouped = grouped.sort_values(by=[Field_CodeOfTurbine, Field_PowerFloor])
+
+        # return self.plot_tsr_distribution(grouped, outputAnalysisDir, conf)
+
+    def plot_tsr_distribution(self, dataFrameMerge: pd.DataFrame, outputAnalysisDir, conf: Contract, turbineModelInfo: pd.Series):
+        """
+        Generates tsr distribution plots for turbines in a wind farm.
+
+        Parameters:
+        - csvFileDirOfCp: str, path to the directory containing input CSV files.
+        - farm_name: str, name of the wind farm.
+        - encoding: str, encoding of the input CSV files. Defaults to 'utf-8'.
+        """
+        x_name = Field_PowerFloor
+        y_name = Field_TSR
+
+        upLimitOfTSR = 20
+
+        # 创建一个列表来存储各个风电机组的数据
+        turbine_data_list = []
+
+        # 绘制全场TSR分布图
+        fig = go.Figure()
+        # colors = px.colors.sequential.Turbo
+        # 遍历不同的turbine来添加线条
+        for turbine in dataFrameMerge[Field_NameOfTurbine].unique():
+            turbine_data = dataFrameMerge[dataFrameMerge[Field_NameOfTurbine] == turbine]
+            fig.add_trace(go.Scatter(x=turbine_data[x_name], y=turbine_data[y_name],
+                                     mode='lines',
+                                     # line=dict(color=colors[idx % len(colors)]),
+                                     name=turbine))
+            # 提取数据
+            turbine_data_total = {
+                "engineName": turbine,
+                "engineCode": turbine_data[Field_CodeOfTurbine].iloc[0],
+                "xData": turbine_data[x_name].tolist(),
+                "yData": turbine_data[y_name].tolist(),
+                }
+            turbine_data_list.append(turbine_data_total)
+
+
+        fig.update_layout(
+            title={
+                "text": f'叶尖速比分布-{turbineModelInfo[Field_MachineTypeCode]}',
+                'x': 0.5
+            },
+
+            xaxis=dict(
+                title='最小功率',
+                dtick=200,
+                tickangle=-45,
+                range=[0, 1800]),
+            yaxis=dict(
+                title='叶尖速比',
+                dtick=self.axisStepTSR,
+                range=[self.axisLowerLimitTSR,
+                       self.axisUpperLimitTSR]
+            ),
+            legend=dict(
+                orientation="h",  # Horizontal orientation
+                xanchor="center",  # Anchor the legend to the center
+                x=0.5,  # Position legend at the center of the x-axis
+                y=-0.2,  # Position legend below the x-axis
+                # itemsizing='constant',  # Keep the size of the legend entries constant
+                # itemwidth=50
+            )
+        )
+
+        # 设置x轴标签旋转
+        fig.update_xaxes(tickangle=-45)
+
+        engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+        if isinstance(engineTypeCode, pd.Series):
+            engineTypeCode = engineTypeCode.iloc[0]
+
+        engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+        if isinstance(engineTypeName, pd.Series):
+            engineTypeName = engineTypeName.iloc[0]
+        # 构建最终的JSON对象
+        json_output = {
+            "analysisTypeCode": "风电机组叶尖速比分析",
+            "typecode": turbineModelInfo[Field_MillTypeCode],
+            "engineCode": engineTypeCode,
+            "engineTypeName": engineTypeName,
+            "title": f'叶尖速比分布-{turbineModelInfo[Field_MachineTypeCode]}',
+            "xaixs": "最小功率(kW)",
+            "yaixs": "叶尖速比",
+            "data": turbine_data_list
+
+        }
+
+        # 保存图形
+        # fig.write_image(csvFileDirOfCp + r"/{}-TSR-Distibute.png".format(confData.farm_name),format='png',width=800, height=500,scale=3)
+        # fig.show()
+
+        # 保存HTML
+        # htmlFileName = f"{dataFrameMerge[Field_PowerFarmName].iloc[0]}-TSR-Distribution-{turbineModelInfo[Field_MillTypeCode]}.html"
+        #htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+        #fig.write_html(htmlFilePath)
+
+        result_rows = []
+        # result_rows.append({
+        #    Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #    Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #    Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #    Field_CodeOfTurbine: 'total',
+        #    Field_Return_FilePath: htmlFilePath,
+        #        Field_Return_IsSaveDatabase: True
+        # })
+
+        # 将JSON对象保存到文件
+        output_json_path = os.path.join(outputAnalysisDir, f"{turbineModelInfo[Field_MillTypeCode]}.json")
+        with open(output_json_path, 'w', encoding='utf-8') as f:
+            import json
+            json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+        # 如果需要返回DataFrame,可以包含文件路径
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: 'total',
+            Field_MillTypeCode: turbineModelInfo[Field_MillTypeCode],
+            Field_Return_FilePath: output_json_path,
+            Field_Return_IsSaveDatabase: True
+        })
+
+
+        # 绘制每个设备的TSR分布图
+        for name, group in dataFrameMerge.groupby([Field_NameOfTurbine, Field_CodeOfTurbine]):
+            fig = go.Figure()
+
+            # 创建一个列表来存储各个风电机组的数据
+            turbine_data_list_each = []
+
+            # 循环绘制turbine的线条
+            for turbine in dataFrameMerge[Field_NameOfTurbine].unique():
+                turbine_data = dataFrameMerge[dataFrameMerge[Field_NameOfTurbine] == turbine]
+                fig.add_trace(go.Scatter(x=turbine_data[x_name],
+                                         y=turbine_data[y_name],
+                                         mode='lines',
+                                         line=dict(color='lightgrey'),
+                                         showlegend=False))
+                # 提取数据
+                turbine_data_each = {
+                    "engineName": turbine,
+                    "engineCode": turbine_data[Field_CodeOfTurbine].iloc[0],
+                    "xData": turbine_data[x_name].tolist(),
+                    "yData": turbine_data[y_name].tolist(),
+                }
+                turbine_data_list_each.append(turbine_data_each)
+
+            fig.add_trace(go.Scatter(x=group[x_name],
+                                     y=group[y_name],
+                                     mode='lines',
+                                     line=dict(color='darkblue'),
+                                     showlegend=False))
+
+            fig.update_layout(
+                title={"text": '机组: {}'.format(name[0])},
+                # margin=dict(
+                # t=35,  # 顶部 margin,减小这个值可以使标题更靠近图形
+                # l=60,  # 左侧 margin
+                # r=60,  # 右侧 margin
+                # b=40,  # 底部 margin
+                # ),
+                xaxis=dict(
+                    title='功率',
+                    dtick=200,
+                    tickangle=-45,
+                    range=[0, 1800]),
+                yaxis=dict(
+                    title='叶尖速比',
+                    dtick=self.axisStepTSR,
+                    range=[self.axisLowerLimitTSR,
+                           self.axisUpperLimitTSR]
+                )
+            )
+            fig.update_xaxes(tickangle=-45)
+
+
+            engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+            if isinstance(engineTypeCode, pd.Series):
+                engineTypeCode = engineTypeCode.iloc[0]
+
+            engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+            if isinstance(engineTypeName, pd.Series):
+                engineTypeName = engineTypeName.iloc[0]
+            # 构建最终的JSON对象
+            json_output = {
+                "analysisTypeCode": "风电机组叶尖速比分析",
+                "typecode": turbineModelInfo[Field_MillTypeCode],
+                "engineCode": engineTypeCode,
+                "engineTypeName": engineTypeName,
+                "title": f'机组:{format(name[0])}',
+                "xaixs": "功率(kW)",
+                "yaixs": "叶尖速比",
+                "data": turbine_data_list_each
+
+            }
+
+
+            # 保存图像
+            # pngFileName = f"{name[0]}.png"
+            # pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
+            # fig.write_image(pngFilePath, scale=3)
+
+            # 保存HTML
+            # htmlFileName = f"{name[0]}.html"
+            # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+            # fig.write_html(htmlFilePath)
+
+            # result_rows.append({
+            #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+            #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            #     Field_CodeOfTurbine: name[1],
+            #     Field_Return_FilePath: pngFilePath,
+            #     Field_Return_IsSaveDatabase: False
+            # })
+
+            # 将JSON对象保存到文件
+            output_json_path_each = os.path.join(outputAnalysisDir, f"{name[0]}.json")
+            with open(output_json_path_each, 'w', encoding='utf-8') as f:
+                import json
+                json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+            # 如果需要返回DataFrame,可以包含文件路径
+            result_rows.append({
+                Field_Return_TypeAnalyst: self.typeAnalyst(),
+                Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+                Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+                Field_CodeOfTurbine: name[1],
+                Field_Return_FilePath: output_json_path_each,
+                Field_Return_IsSaveDatabase: True
+            })
+
+
+            # result_rows.append({
+            #    Field_Return_TypeAnalyst: self.typeAnalyst(),
+            #    Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            #    Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            #    Field_CodeOfTurbine: name[1],
+            #    Field_Return_FilePath: htmlFilePath,
+            #    Field_Return_IsSaveDatabase: True
+            # })
+
+        result_df = pd.DataFrame(result_rows)
+        return result_df
+

+ 379 - 0
dataAnalysisBusiness/algorithm/tsrWindSpeedAnalyst.py

@@ -0,0 +1,379 @@
+import os
+import pandas as pd
+import plotly.graph_objects as go
+import seaborn as sns
+from algorithmContract.confBusiness import *
+from algorithmContract.contract import Contract
+from behavior.analystWithGoodPoint import AnalystWithGoodPoint
+
+
+class TSRWindSpeedAnalyst(AnalystWithGoodPoint):
+    """
+    风电机组叶尖速比分析
+    """
+
+    def typeAnalyst(self):
+        return "tsr_windspeed"
+
+    def selectColumns(self):
+        return [Field_DeviceCode,Field_Time,Field_WindSpeed,Field_ActiverPower,Field_RotorSpeed,Field_GeneratorSpeed]
+
+    # def turbineAnalysis(self, dataFrame, outputAnalysisDir, outputFilePath, conf: Contract, Field_NameOfTurbine):
+    #
+    #     self.tsr(dataFrame, outputFilePath, Field_WindSpeed, Field_RotorSpeed,
+    #              Field_ActiverPower)
+
+    def tsr(self, dataFrame : pd.DataFrame):
+
+        #Add column field_name
+        dataFrame[Field_PowerFarmName] = self.currPowerFarmInfo.loc[Field_PowerFarmName]
+        # Alias the power column
+        # dataFrame[Field_Power] = dataFrame[Field_ActiverPower]
+        # Calculate 'wind_speed_floor'
+        # dataFrame[Field_WindSpeedFloor] = (dataFrame[Field_WindSpeed] / 1).astype(int) + 0.5
+
+        # Ensure the necessary columns are of float type
+        # dataFrame['wind_speed'] = dataFrame[Field_WindSpeed].astype(float)
+        dataFrame[Field_RotorSpeed] = dataFrame[Field_RotorSpeed].astype(float)
+        dataFrame[Field_GeneratorSpeed] = dataFrame[Field_GeneratorSpeed].astype(float)
+
+        # rotor_diameter = pd.to_numeric(rotor_diameter, errors='coerce')
+        # # Calculate TSR
+        # dataFrame['tsr'] = (dataFrame['rotor_speed'] * 0.104667 *
+        #                     (rotor_diameter / 2)) / dataFrame['wind_speed']
+
+        # Group by 'wind_speed_floor' and calculate median, max, and min of TSR
+        grouped = dataFrame.groupby([Field_WindSpeedFloor,Field_CodeOfTurbine,Field_NameOfTurbine]).agg({
+            Field_ActiverPower: 'median',
+            Field_PowerFloor: 'median',
+            Field_RotorSpeed : 'median',
+            Field_GeneratorSpeed: 'median',
+            Field_TSR : ['median', 'max', 'min'],
+            Field_PowerFarmName: 'max'
+        }).reset_index()
+
+        # Rename columns for clarity post aggregation
+        grouped.columns = [Field_WindSpeedFloor,  Field_CodeOfTurbine, Field_NameOfTurbine,Field_Power,Field_PowerFloor,
+                           Field_RotorSpeed, Field_GeneratorSpeed, Field_TSR, Field_TSRMax, Field_TSRMin, Field_PowerFarmName]
+
+        # Sort by 'wind_speed_floor'
+        grouped.sort_values(by=[Field_NameOfTurbine, Field_PowerFloor])
+
+        return grouped
+
+    def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes):
+        dictionary = self.processTurbineData(turbineCodes,conf,self.selectColumns())
+        dataFrameOfTurbines = self.userDataFrame(dictionary,conf.dataContract.configAnalysis,self)
+        # 检查所需列是否存在
+        required_columns = {Field_WindSpeed, Field_RotorSpeed,Field_PowerFloor,Field_GeneratorSpeed,Field_TSR}
+        if not required_columns.issubset(dataFrameOfTurbines.columns):
+            raise ValueError(f"DataFrame缺少必要的列。需要的列有: {required_columns}")
+
+        turbrineInfos = self.common.getTurbineInfos(
+            conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
+
+        groupedOfTurbineModel = turbrineInfos.groupby(Field_MillTypeCode)
+
+        returnDatas = []
+        for turbineModelCode, group in groupedOfTurbineModel:
+            currTurbineCodes = group[Field_CodeOfTurbine].unique().tolist()
+            currTurbineModeInfo = self.common.getTurbineModelByCode(
+                turbineModelCode, self.turbineModelInfo)
+
+            currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
+                currTurbineCodes)]
+            #创建一个与dataFrameOfTurbines相同的dataFrame
+            dataFrame=currDataFrameOfTurbines.copy()
+            returnData = self.plot_tsr_distribution(
+                    self.tsr(dataFrame), outputAnalysisDir, conf, currTurbineModeInfo)
+            returnDatas.append(returnData)
+
+        returnResult = pd.concat(returnDatas, ignore_index=True)
+
+
+        return returnResult
+
+    def plot_tsr_distribution(self, dataFrame: pd.DataFrame, outputAnalysisDir, conf: Contract,turbineModelInfo: pd.Series):
+        """
+        Generates tsr distribution plots for turbines in a wind farm.
+
+        Parameters:
+        - csvFileDirOfCp: str, path to the directory containing input CSV files.
+        - farm_name: str, name of the wind farm.
+        - encoding: str, encoding of the input CSV files. Defaults to 'utf-8'.
+        """
+        x_name = 'wind_speed_floor'
+        y_name = 'tsr'
+
+        # upLimitOfTSR = 20
+
+        # 设置绘图样式
+        sns.set_palette('deep')
+
+        # 初始化结果DataFrame
+        # res = pd.DataFrame()
+        #
+        # # 遍历输入目录中的所有文件
+        # for root, dir_names, file_names in dir.list_directory(csvFileDirOfCp):
+        #     for file_name in file_names:
+        #
+        #         if not file_name.endswith(CSVSuffix):
+        #             continue
+        #
+        #         file_path = os.path.join(root, file_name)
+        #
+        #         # 读取CSV文件
+        #         frame = pd.read_csv(file_path, encoding=encoding)
+        #         frame = frame[(frame[x_name] > 0)]
+        #
+        #         # 选择需要的列并合并到结果DataFrame中
+        #         res = pd.concat([res, frame], axis=0)
+        #
+        # # 重置索引
+        # ress = res.reset_index()
+        # dataFrame[Field_NameOfTurbine] = dataFrame[Field_NameOfTurbine].astype(str)
+        # dataFrame = dataFrame.sort_values(by=[Field_NameOfTurbine, x_name])
+        # 绘制全场TSR分布图
+        fig = go.Figure()
+        # colors = px.colors.sequential.Turbo
+
+        # 创建一个列表来存储各个风电机组的数据
+        turbine_data_list = []
+
+        # 遍历不同的turbine来添加线条
+        for turbine in dataFrame[Field_NameOfTurbine].unique():
+            turbine_data = dataFrame[dataFrame[Field_NameOfTurbine] == turbine]
+            fig.add_trace(go.Scatter(x=turbine_data[x_name], y=turbine_data[y_name],
+                                     mode='lines',
+                                     # line=dict(color=colors[idx % len(colors)]),
+                                     name=turbine))
+            # 提取数据
+            turbine_data_total = {
+                "engineName": turbine,
+                "engineCode": turbine_data[Field_CodeOfTurbine].iloc[0],
+                "xData": turbine_data[x_name].tolist(),
+                "yData": turbine_data[y_name].tolist(),
+                }
+            turbine_data_list.append(turbine_data_total)
+
+
+        fig.update_layout(
+            title={
+                "text": f'叶尖速比分布图-{turbineModelInfo[Field_MachineTypeCode]}',
+                'x': 0.5
+            },
+            # margin=dict(
+            # t=35,  # 顶部 margin,减小这个值可以使标题更靠近图形
+            # l=60,  # 左侧 margin
+            # r=60,  # 右侧 margin
+            # b=40,  # 底部 margin
+            # ),
+
+            # legend=dict(title='Turbine',
+            #             x=1.02,
+            #             y=0.5,
+            #             orientation='v',
+            #             traceorder='normal',
+            #             font=dict(size=12),
+            #             bgcolor='rgba(255, 255, 255, 0)',
+            #             bordercolor='rgba(255, 255, 255, 0)'),
+            legend=dict(
+                orientation="h",  # Horizontal orientation
+                xanchor="center",  # Anchor the legend to the center
+                x=0.5,  # Position legend at the center of the x-axis
+                y=-0.2,  # Position legend below the x-axis
+                # itemsizing='constant',  # Keep the size of the legend entries constant
+                # itemwidth=50
+            ),
+            xaxis=dict(
+                title='风速',
+                dtick=1,
+                tickangle=-45,
+                range=[0, 26]),
+            yaxis=dict(
+                title='叶尖风速比',
+                dtick=self.axisStepTSR,
+                range=[self.axisLowerLimitTSR,
+                       self.axisUpperLimitTSR]
+            )
+        )
+        # 设置x轴标签旋转
+        fig.update_xaxes(tickangle=-45)
+        # 保存图形
+        # fig.write_image(csvFileDirOfCp + r"/{}-TSR-Distibute.png".format(confData.farm_name),format='png',width=800, height=500,scale=3)
+        # fig.show()
+
+
+        engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+        if isinstance(engineTypeCode, pd.Series):
+            engineTypeCode = engineTypeCode.iloc[0]
+
+        engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+        if isinstance(engineTypeName, pd.Series):
+            engineTypeName = engineTypeName.iloc[0]
+        # 构建最终的JSON对象
+        json_output = {
+            "analysisTypeCode": "风电机组叶尖速比和风速分析",
+            "typecode": turbineModelInfo[Field_MillTypeCode],
+            "engineCode": engineTypeCode,
+            "engineTypeName": engineTypeName,
+            "title": f'叶尖速比分布图-{turbineModelInfo[Field_MachineTypeCode]}',
+            "xaixs": "风速",
+            "yaixs": "叶尖风速比",
+            "data": turbine_data_list
+
+        }
+
+
+        # 保存HTML
+        # htmlFileName = f"{dataFrame[Field_PowerFarmName].iloc[0]}-TSR-Distribution-{turbineModelInfo[Field_MillTypeCode]}.html"
+        # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+        # fig.write_html(htmlFilePath)
+
+        result_rows = []
+        # result_rows.append({
+        #    Field_Return_TypeAnalyst: self.typeAnalyst(),
+        #    Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+        #    Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+        #    Field_CodeOfTurbine: 'total',
+        #    Field_Return_FilePath: htmlFilePath,
+        #    Field_Return_IsSaveDatabase: True
+        # })
+
+        # 将JSON对象保存到文件
+        output_json_path = os.path.join(outputAnalysisDir, f"{turbineModelInfo[Field_MillTypeCode]}.json")
+        with open(output_json_path, 'w', encoding='utf-8') as f:
+            import json
+            json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+        # 如果需要返回DataFrame,可以包含文件路径
+        result_rows.append({
+            Field_Return_TypeAnalyst: self.typeAnalyst(),
+            Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            Field_CodeOfTurbine: 'total',
+            Field_MillTypeCode: turbineModelInfo[Field_MillTypeCode],
+            Field_Return_FilePath: output_json_path,
+            Field_Return_IsSaveDatabase: True
+        })
+
+
+
+        # 绘制每个设备的TSR分布图
+        for name, group in dataFrame.groupby([Field_NameOfTurbine, Field_CodeOfTurbine]):
+            fig = go.Figure()
+
+            # 创建一个列表来存储各个风电机组的数据
+            turbine_data_list_each = []
+
+            # 循环绘制turbine的线条
+            for turbine in dataFrame[Field_NameOfTurbine].unique():
+                turbine_data = dataFrame[dataFrame[Field_NameOfTurbine] == turbine]
+                fig.add_trace(go.Scatter(x=turbine_data[x_name],
+                                         y=turbine_data[y_name],
+                                         mode='lines',
+                                         line=dict(color='lightgrey'),
+                                         showlegend=False))
+                # 提取数据
+                turbine_data_each = {
+                    "engineName": turbine,
+                    "engineCode": turbine_data[Field_CodeOfTurbine].iloc[0],
+                    "xData": turbine_data[x_name].tolist(),
+                    "yData": turbine_data[y_name].tolist(),
+                }
+                turbine_data_list_each.append(turbine_data_each)
+
+            fig.add_trace(go.Scatter(x=group[x_name],
+                                     y=group[y_name],
+                                     mode='lines',
+                                     line=dict(color='darkblue'),
+                                     showlegend=False))
+
+            fig.update_layout(
+                title={"text": '机组: {}'.format(name[0])},
+                # margin=dict(
+                #         t=35,  # 顶部 margin,减小这个值可以使标题更靠近图形
+                #         l=60,  # 左侧 margin
+                #         r=60,  # 右侧 margin
+                #         b=40,  # 底部 margin
+                # ),
+                xaxis=dict(
+                    title='风速最低阈值 ',
+                    dtick=1,
+                    tickangle=-45,
+                    range=[0, 26]),
+                yaxis=dict(
+                    title='叶尖速比',
+                    dtick=self.axisStepTSR,
+                    range=[self.axisLowerLimitTSR,
+                           self.axisUpperLimitTSR]
+                )
+            )
+            fig.update_xaxes(tickangle=-45)
+
+
+            engineTypeCode = turbineModelInfo.get(Field_MillTypeCode, "")
+            if isinstance(engineTypeCode, pd.Series):
+                engineTypeCode = engineTypeCode.iloc[0]
+
+            engineTypeName = turbineModelInfo.get(Field_MachineTypeCode, "")
+            if isinstance(engineTypeName, pd.Series):
+                engineTypeName = engineTypeName.iloc[0]
+            # 构建最终的JSON对象
+            json_output = {
+                "analysisTypeCode": "风电机组叶尖速比和风速分析",
+                "typecode": turbineModelInfo[Field_MillTypeCode],
+                "engineCode": engineTypeCode,
+                "engineTypeName": engineTypeName,
+                "title": f'机组:{format(name[0])}',
+                "xaixs": "功率(kW)",
+                "yaixs": "叶尖速比",
+                "data": turbine_data_list_each
+
+            }
+            # 将JSON对象保存到文件
+            output_json_path_each = os.path.join(outputAnalysisDir, f"{name[0]}.json")
+            with open(output_json_path_each, 'w', encoding='utf-8') as f:
+                import json
+                json.dump(json_output, f, ensure_ascii=False, indent=4)
+
+            # 如果需要返回DataFrame,可以包含文件路径
+            result_rows.append({
+                Field_Return_TypeAnalyst: self.typeAnalyst(),
+                Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+                Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+                Field_CodeOfTurbine: name[1],
+                Field_Return_FilePath: output_json_path_each,
+                Field_Return_IsSaveDatabase: True
+            })
+
+            # 保存图像
+            # pngFileName = f"{name[0]}.png"
+            # pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
+            # fig.write_image(pngFilePath, scale=3)
+
+            # 保存HTML
+            # htmlFileName = f"{name[0]}.html"
+            # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
+            # fig.write_html(htmlFilePath)
+
+            # result_rows.append({
+            #     Field_Return_TypeAnalyst: self.typeAnalyst(),
+            #     Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            #     Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            #     Field_CodeOfTurbine: name[1],
+            #     Field_Return_FilePath: pngFilePath,
+            #     Field_Return_IsSaveDatabase: False
+            # })
+
+            # result_rows.append({
+            #    Field_Return_TypeAnalyst: self.typeAnalyst(),
+            #    Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
+            #    Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
+            #    Field_CodeOfTurbine: name[1],
+            #    Field_Return_FilePath: htmlFilePath,
+            #    Field_Return_IsSaveDatabase: True
+            # })
+
+        result_df = pd.DataFrame(result_rows)
+        return result_df

+ 450 - 0
dataAnalysisBusiness/demo/SCADA_10min_category_0.py

@@ -0,0 +1,450 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr  8 15:01:43 2024
+
+@author: LDDN
+"""
+import math
+import pandas as pd  
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.pyplot import MultipleLocator#设定固定刻度
+
+
+scada_10min = pd.read_csv(r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\82.csv',encoding="utf-8")  #.value是将单元格
+turbine_info = pd.read_csv(r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\info.csv')  #.value是将单元格
+PRated = turbine_info.loc[:,["额定功率"]] #2000
+PRated = PRated.values
+VCutOut = turbine_info.loc[:,["切出风速"]]  #25
+VCutOut = VCutOut.values
+VCutIn = turbine_info.loc[:,["切入风速"]]  #3
+VCutIn = VCutIn.values
+VRated = turbine_info.loc[:,["额定风速"]] #10
+VRated = VRated.values
+
+time_stamp = scada_10min.loc[:,['时间']] #dataframe
+active_power = scada_10min.loc[:,['变频器电网侧有功功率']]
+wind_speed = scada_10min.loc[:,['风速']]
+LM = pd.concat([time_stamp,active_power,wind_speed],axis=1)  #dataframe
+
+
+Labeled_March809 = LM
+APower = Labeled_March809["变频器电网侧有功功率"]  #series读入有功功率
+WSpeed = Labeled_March809["风速"]  #读入风速
+maxP=np.max(APower)
+intervalP=25  #ceil(PRated*0.01)#功率分区间隔为额定功率的1%
+intervalwindspeed=0.25  #风速分区间隔0.25m/s
+
+#初始化
+PNum = 0  
+TopP = 0   
+# 根据条件计算PNum和TopP  
+if maxP >= PRated:  
+    PNum = math.floor(maxP / intervalP) + 1  
+    TopP = math.floor((maxP - PRated) / intervalP) + 1  
+else:  
+    PNum = math.floor(PRated / intervalP)  
+    TopP = 0   
+VNum = math.ceil(VCutOut / intervalwindspeed)  
+  
+SM1 = Labeled_March809.shape
+AA1 = SM1[0]  
+lab = [[0] for _ in range(AA1)]
+lab = pd.DataFrame(lab,columns=['lab'])
+Labeled_March809 = pd.concat([Labeled_March809,lab],axis=1)  #在tpv后加一列标签列
+Labeled_March809 = Labeled_March809.values
+SM = Labeled_March809.shape #(52561,4)
+AA = SM[0]  
+#存储功率大于0的运行数据
+#标识功率为0的点,标识-1
+DzMarch809_0 = np.zeros((AA, 3)) # 初始化数组来存储功率大于零的运行数据  
+nCounter1 = 0 
+Point_line = np.zeros(AA, dtype=int)  
+#考虑到很多功率小于10的数据存在,将<10的功率视为0
+for i in range(AA):
+    if (APower[i] > 10) & (WSpeed[i] > 0):
+        nCounter1 += 1   #共有nCounter1个功率大于0的正常数据
+        DzMarch809_0[nCounter1-1, 0] = WSpeed[i]  
+        DzMarch809_0[nCounter1-1, 1] = APower[i]  
+        Point_line[nCounter1-1] = i+1  # 记录nCounter1记下的数据在原始数据中的位置  
+    if APower[i] <= 10: 
+        Labeled_March809[i,SM[1]-1] = -1  # 功率为0标识为-1  array类型
+# 截取DzMarch809_0中实际存储的数据  其他全为0
+DzMarch809 = DzMarch809_0[:nCounter1, :]  
+#统计各网格落入的散点个数
+XBoxNumber = np.ones((PNum, VNum),dtype=int)  #(86 100)
+nWhichP = 0
+nWhichV = 0
+
+# 循环遍历DzMarch809中的有效数据  
+for i in range(nCounter1):  
+    
+    # 查找功率所在的区间  
+    for m in range(1, PNum + 1):  # 注意Python的range是左闭右开的,所以需要+1  
+        if (DzMarch809[i,1] > (m - 1) * intervalP) and (DzMarch809[i,1] <= m * intervalP):  
+            nWhichP = m  
+            break  
+      
+    # 查找风速所在的区间  
+    for n in range(1, VNum + 1):  # 同样需要+1  
+        if (DzMarch809[i, 0] > (n - 1)*intervalwindspeed) and (DzMarch809[i, 0] <= n*intervalwindspeed):  
+            nWhichV = n  
+            break  
+      
+    # 如果功率和风速都在有效区间内,增加对应网格的计数  
+    if (nWhichP > 0) and (nWhichV > 0):  
+        XBoxNumber[nWhichP - 1, nWhichV - 1] += 1  # 注意Python的索引是从0开始的,所以需要减1  
+# XBoxNumber现在包含了每个网格的计数[PNum行, VNum列]
+
+for m in range(1,PNum+1):
+    for n in range(1,VNum+1):
+        XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
+
+#在功率方向将网格内散点绝对个数转换为相对百分比,备用
+PBoxPercent = np.zeros((PNum, VNum),dtype = float)  #(86 100) #计算后会出现浮点型,所以不能定义int类型
+PBinSum = np.zeros((PNum,1),dtype=int)
+for i in range(1,PNum+1):
+    for m in range(1,VNum+1):
+        PBinSum[i-1] = PBinSum[i-1] + XBoxNumber[i-1,m-1] 
+    for m in range(1,VNum+1):
+        if PBinSum[i-1]>0:
+            PBoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / PBinSum[i-1])*100
+#在风速方向将网格内散点绝对个数转换为相对百分比,备用          
+VBoxPercent = np.zeros((PNum, VNum))  #(86 100) #计算后会出现浮点型,所以不能定义int类型
+VBinSum = np.zeros((VNum,1),dtype=int)
+for i in range(1,VNum+1):
+    for m in range(1,PNum+1):
+        VBinSum[i-1] = VBinSum[i-1] + XBoxNumber[m-1,i-1] 
+    for m in range(1,PNum+1):
+        if VBinSum[i-1]>0:
+            VBoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / VBinSum[i-1])*100
+# VBoxPercent PBoxPercent 左上-右下
+# 将数据颠倒一下  左下-右上         第一行换为倒数第一行 方便可视化
+InvXBoxNumber = np.zeros((PNum,VNum),dtype = int)
+InvPBoxPercent = np.zeros((PNum,VNum),dtype = float)
+InvVBoxPercent = np.zeros((PNum,VNum),dtype = float)
+for m in range(1,PNum+1):
+    for n in range(1,VNum+1):
+        InvXBoxNumber[m-1,n-1] = XBoxNumber[PNum-(m-1)-1,n-1]
+        InvPBoxPercent[m-1,n-1] = PBoxPercent[PNum-(m-1)-1,n-1]
+        InvVBoxPercent[m-1,n-1] = VBoxPercent[PNum-(m-1)-1,n-1]
+
+#以水平功率带方向为准,分析每个水平功率带中,功率主带中心,即找百分比最大的网格位置。
+PBoxMaxIndex = np.zeros((PNum,1),dtype = int)  #水平功率带最大网格位置索引
+PBoxMaxP = np.zeros((PNum,1),dtype = float)       #水平功率带最大网格百分比
+for m in range(1,PNum+1):
+    PBoxMaxIndex[m-1] = np.argmax(PBoxPercent[m-1, :])   #argmax返回最大值的索引
+    PBoxMaxP[m-1] = np.max(PBoxPercent[m-1, :])
+#以垂直风速方向为准,分析每个垂直风速带中,功率主带中心,即找百分比最大的网格位置。
+VBoxMaxIndex = np.zeros((VNum,1),dtype = int)  
+VBoxMaxV = np.zeros((VNum,1),dtype = float)       
+for m in range(1,VNum+1):
+    VBoxMaxIndex[m-1] = np.argmax(VBoxPercent[:, m-1])   
+    VBoxMaxV[m-1] = np.max(VBoxPercent[:, m-1])
+
+#切入风速特殊处理,如果切入风速过于偏右,向左拉回
+if PBoxMaxIndex[0]>14:                     #第一个值对应的是风速最小处 即切入风速
+    PBoxMaxIndex[0] = 9 
+#以水平功率带方向为基准,进行分析
+DotDense = np.zeros(PNum)   #每一水平功率带的功率主带包含的网格数
+DotDenseLeftRight = np.zeros((PNum,2))  #存储每一水平功率带的功率主带以最大网格为中心,向左,向右扩展的网格数
+DotValve = 90  #从中心向左右对称扩展网格的散点百分比和的阈值。
+PDotDenseSum = 0
+for i in range(PNum - TopP):  # 从最下层水平功率带开始,向上分析到特定的功率带  
+    PDotDenseSum = PBoxMaxP[i]  # 以中心最大水平功率带为基准,向左向右对称扩展网格,累加各网格散点百分比  
+    iSpreadRight = 1  
+    iSpreadLeft = 1  
+      
+    while PDotDenseSum < DotValve:  
+        if (PBoxMaxIndex[i] + iSpreadRight) < VNum-1-1:  
+            PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]  # 向右侧扩展  
+            iSpreadRight += 1  
+        else:
+            break  
+          
+        if (PBoxMaxIndex[i] - iSpreadLeft) > 0:  
+            PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]  # 向左侧扩展  
+            iSpreadLeft += 1  
+        else:  
+            break  
+    iSpreadRight = iSpreadRight-1
+    iSpreadLeft = iSpreadLeft-1
+    #向左右扩展完毕
+    DotDenseLeftRight[i, 0] = iSpreadLeft  # 左  
+    DotDenseLeftRight[i, 1] = iSpreadRight  # 右  
+    DotDense[i] = iSpreadLeft + iSpreadRight + 1  # 记录向左向右扩展的个数及每个功率仓内网格的个数  
+# 此时DotDense和DotDenseLeftRight数组已经包含了所需信息    
+#各行功率主带右侧宽度的中位数最具有代表性(因为先右后左)
+DotDenseWidthLeft = np.zeros((PNum-TopP))
+for i in range(PNum-TopP):
+    DotDenseWidthLeft[i] = DotDenseLeftRight[i,1]  #DotDenseLeftRight[i,1]:向右延伸个数
+MainBandRight = np.median(DotDenseWidthLeft) #计算中位数
+
+# 初始化变量  
+PowerLimit = np.zeros(PNum, dtype=int)  # 各水平功率带是否为限功率标识,1:是;0:不是  
+WidthAverage = 0  # 功率主带右侧平均宽度  
+WidthAverage_L = 0  # 功率主带左侧平均宽度  
+WidthVar = 0  # 功率主带方差(此变量在提供的代码中并未使用)  
+PowerLimitValve = 6  # 限功率主带判别阈值  
+N_Pcount = 20  # 阈值  
+  
+nCounterLimit = 0  # 限功率的个数  
+nCounter = 0  # 正常水平功率带的个数  
+  
+# 循环遍历水平功率带,从第1个到第PNum-TopP个  
+for i in range(PNum - TopP):  
+    # 如果向右扩展网格数大于阈值,且该水平功率带点总数大于20,则标记为限功率带  
+    if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):  
+        PowerLimit[i] = 1  
+        nCounterLimit += 1  #限功率的个数
+      
+    # 如果向右扩展网格数小于等于阈值,则累加右侧宽度(左侧宽度在代码中似乎有误)  
+    if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
+        WidthAverage += DotDenseLeftRight[i, 1]  # 统计正常水平功率带右侧宽度
+        WidthAverage_L += DotDenseLeftRight[i,1]   #统计正常水平功率带左侧宽度
+        nCounter += 1  
+# 计算平均宽度  
+WidthAverage /= nCounter if nCounter > 0 else 1  # 避免除以0的情况  
+WidthAverage_L /= nCounter if nCounter > 0 else 1   
+
+#计算正常(即非限功率)水平功率带的功率主带宽度的方差,以此来反映从下到上宽度是否一致
+WidthVar = 0  # 功率主带宽度的方差   
+for i in range(PNum - TopP):  
+    # 如果向右扩展网格数小于等于阈值,则计算当前宽度与平均宽度的差值平方  
+    if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
+        WidthVar += (DotDenseLeftRight[i, 1] - WidthAverage) ** 2  
+# 计算方差(注意:除以nCounter-1是为了得到样本方差)  
+WidthVar = np.sqrt(WidthVar / (nCounter - 1) if nCounter > 1 else 0)  # 避免除以0的情况
+
+#各水平功率带,功率主带的风速范围,右侧扩展网格数*2*0.25
+PowerBandWidth = WidthAverage*intervalwindspeed+WidthAverage_L*intervalwindspeed
+
+# 对限负荷水平功率带的最大网格进行修正  
+for i in range(1, PNum - TopP+1):  
+    if (PowerLimit[i] == 1) and (abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5):  
+        PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1  
+  
+# 输出各层功率主带的左右边界网格索引  
+DotDenseInverse = np.flipud(DotDenseLeftRight)  # 上下翻转数组以得到反向顺序  
+  
+# 计算功率主带的左右边界  
+CurveWidthR = np.ceil(WidthAverage) + 2  # 功率主带的右边界 + 2  
+CurveWidthL = np.ceil(WidthAverage_L) + 2  # 功率主带的左边界 + 2  
+  
+# 网格是否为限功率网格的标识数组  
+BBoxLimit = np.zeros((PNum, VNum), dtype=int)  
+# 标记限功率网格  
+for i in range(2, PNum - TopP):  
+    if PowerLimit[i] == 1:
+        BBoxLimit[i, int(PBoxMaxIndex[i] + CurveWidthR + 1):VNum] = 1
+
+# 初始化数据异常需要剔除的网格标识数组  
+BBoxRemove = np.zeros((PNum, VNum), dtype=int)  
+# 标记需要剔除的网格  
+for m in range(PNum - TopP): 
+    for n in range(int(PBoxMaxIndex[m]) + int(CurveWidthR), VNum):  # 注意Python中的索引从0开始,因此需要减去1  
+        BBoxRemove[m, n] = 1 
+    # 功率主带左侧的超发网格,从最大索引向左直到第一个网格  
+    
+    for n in range(int(PBoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):  # 使用range的步长参数来实现从右向左的迭代  
+        BBoxRemove[m, n-1] = 2  # 注意Python中的索引从0开始,因此需要减去1
+
+# 初始化变量  
+CurveTop = np.zeros((2, 1), dtype=int)  
+CurveTopValve = 1  # 网格的百分比阈值  
+BTopFind = 0  
+mm = 0  
+#确定功率主带的左上拐点,即额定风速位置的网格索引
+CurveTop = np.zeros((2, 1), dtype=int)  
+CurveTopValve = 1  # 网格的百分比阈值  
+BTopFind = 0  
+mm = 0   
+for m in range(PNum - TopP, 0, -1):  # 注意Python的range是左闭右开区间,所以这里从PNum-TopP开始到1(不包括0)  
+    for n in range(int(np.floor(int(VCutIn) / intervalwindspeed)), VNum - 1):  # 使用floor函数来向下取整  
+        if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):   
+            CurveTop[0] = m  
+            CurveTop[1] = n  #[第80个,第40个]
+            BTopFind = 1  
+            mm = m  # mm是拐点所在功率仓,对应其index
+            break  # 找到后退出内层循环  
+    if BTopFind == 1:  
+        break  # 找到后退出外层循环
+        
+IsolateValve = 3  #功率主带右侧孤立点占比功率仓阈值 3%
+# 遍历功率仓和网格  
+for m in range(PNum - TopP):    
+    for n in range(int(PBoxMaxIndex[m]) + int(CurveWidthR), VNum):  
+        # 检查PBoxPercent是否小于阈值,如果是,则标记BBoxRemove为1  
+        if PBoxPercent[m, n] < IsolateValve:   
+            BBoxRemove[m, n] = 1
+#功率主带顶部宽度
+CurveWidthT = np.floor((maxP - PRated) / intervalP) + 1  
+# 标记额定功率以上的超发点(PNum-PTop之间)  
+for m in range(PNum - TopP, PNum):   
+    for n in range(VNum):  
+        BBoxRemove[m, n] = 3
+  
+# 标记功率主带拐点左侧的欠发网格  
+for m in range(mm-1, PNum - TopP): 
+    for n in range(int(CurveTop[1]) - 2):
+        BBoxRemove[m, n] = 2    # BBoxRemove数组现在包含了根据条件标记的超发点和欠发网格的信息
+
+#以网格的标识,决定该网格内数据的标识。
+# DzMarch809Sel数组现在包含了每个数据点的标识
+DzMarch809Sel = np.zeros(nCounter1, dtype=int)  # 初始化标识数组   
+nWhichP = 0  
+nWhichV = 0  
+nBadA = 0   
+for i in range(nCounter1):  
+    for m in range( PNum ):   
+        if (DzMarch809[i, 1] > m * intervalP) and (DzMarch809[i, 1] <= (m+1) * intervalP):  
+            nWhichP = m  #m记录的是index
+            break  
+    for n in range( VNum ):  # 注意Python的range是左闭右开区间,所以这里到VNum+1  
+        if DzMarch809[i, 0] > ((n+1) * intervalwindspeed - intervalwindspeed/2) and DzMarch809[i, 0] <= ((n+1) * intervalwindspeed + intervalwindspeed / 2):  
+            nWhichV = n  #index
+            break  
+    if nWhichP >= 0 and nWhichV >= 0:  
+        if BBoxRemove[nWhichP, nWhichV] == 1:   
+            DzMarch809Sel[i] = 1  
+            nBadA += 1  
+        elif BBoxRemove[nWhichP, nWhichV] == 2:  
+            DzMarch809Sel[i] = 2  
+        elif BBoxRemove[nWhichP , nWhichV] == 3:  
+            DzMarch809Sel[i] = 0  # 额定风速以上的超发功率点认为是正常点,不再标识  
+# DzMarch809Sel数组现在包含了每个数据点的标识
+
+##############################滑动窗口方法
+# 存储限负荷数据  
+PVLimit = np.zeros((nCounter1, 3))  #存储限负荷数据  %第3列用于存储限电的点所在的行数
+nLimitTotal = 0  
+nWindowLength = 6   #滑动窗口长度设置为6
+LimitWindow = np.zeros(nWindowLength)  #滑动窗口空列表
+UpLimit = 0    #上限
+LowLimit = 0   #下限
+PowerStd = 30  # 功率波动方差  
+nWindowNum = np.floor(nCounter1/nWindowLength) #6587
+PowerLimitUp = PRated - 100  
+PowerLimitLow = 100  
+
+# 循环遍历每个窗口  
+for i in range(int(nWindowNum)):  
+    start_idx = i * nWindowLength  
+    end_idx = start_idx + nWindowLength  
+    LimitWindow = DzMarch809[start_idx:end_idx, 1]  # 提取当前窗口的数据  
+      
+    # 检查窗口内所有数据是否在功率范围内  
+    bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)  
+    if not bAllInAreas:  
+        continue  
+      
+    # 计算方差上下限  
+    UpLimit = LimitWindow[0] + PowerStd  
+    LowLimit = LimitWindow[0] - PowerStd  
+      
+    # 检查窗口内数据是否在方差范围内  
+    bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)  
+    if bAllInUpLow:  
+        # 标识窗口内的数据为限负荷数据  
+        DzMarch809Sel[start_idx:end_idx] = 4  
+          
+        # 存储限负荷数据  
+        for j in range(nWindowLength):  
+            PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]  
+            PVLimit[nLimitTotal, 2] = Point_line[start_idx + j]  # 对数据进行标识  
+            nLimitTotal += 1  
+# PVLimit现在包含了限负荷数据,nLimitTotal是限负荷数据的总数
+
+
+#将功率滑动窗口主带平滑化
+# 初始化锯齿平滑的计数器  
+nSmooth = 0  
+# 遍历除了最后 TopP+1 个元素之外的所有 PBoxMaxIndex 元素  
+for i in range(PNum - TopP - 1):  
+    PVLeftDown = np.zeros(2)  
+    PVRightUp = np.zeros(2)  
+    # 检查当前与下一个 PBoxMaxIndex 之间的距离是否大于等于1  
+    if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:  
+        # 计算左下和右上顶点的坐标  
+        PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125  
+        PVLeftDown[1] = (i) * 25  
+        PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125  
+        PVRightUp[1] = (i+1) * 25  
+          
+        # 遍历 DzMarch809 数组  
+        for m in range(nCounter1):  
+            # 检查当前点是否在锯齿区域内  
+            if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
+                # 检查斜率是否大于对角连线  
+                if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0]):
+                    # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器  
+                    DzMarch809Sel[m] = 0  
+                    nSmooth += 1  
+# DzMarch809Sel 数组现在+包含了锯齿平滑的选择结果,nSmooth 是选中的点数
+
+
+###################################存储数据
+# 存储好点  
+nCounterPV = 0  # 初始化计数器  
+PVDot = np.zeros((nCounter1, 3))  # 初始化存储好点的数组  nCounter1是p>0的数
+for i in range(nCounter1):  
+    if DzMarch809Sel[i] == 0:  
+        nCounterPV += 1  
+        PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]  
+        PVDot[nCounterPV-1, 2] = Point_line[i]  # 好点 Point_line记录nCounter1在原始数据中的位置 
+nCounterVP = nCounterPV  
+ 
+# 对所有数据中的好点进行标注    
+for i in range(nCounterVP):  
+    Labeled_March809[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1  # 注意Python的索引从0开始,并且需要转换为整数索引  
+ 
+# 存储坏点  
+nCounterBad = 0  # 初始化计数器  
+PVBad = np.zeros((nCounter1, 3))  # 初始化存储坏点的数组  
+for i in range(nCounter1):  
+    if DzMarch809Sel[i] in [1, 2, 3]:  
+        nCounterBad += 1  
+        PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]  
+        PVBad[nCounterBad-1, 2] = Point_line[i]  
+    
+# 对所有数据中的坏点进行标注  
+for i in range(nCounterBad):  
+    Labeled_March809[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5  # 坏点标识  
+
+# 对所有数据中的限电点进行标注   
+for i in range(nLimitTotal):  
+    Labeled_March809[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4  # 限电点标识  
+# 对所有的数据点进行标注  
+# Labeled_March809是array,提取所第四列的值保存为dataframe
+A = Labeled_March809[:,3]
+A=pd.DataFrame(A,columns=['lab'])
+
+
+mergedTable = pd.concat([scada_10min,A],axis=1)#合并dataframe
+D = mergedTable[mergedTable['lab'] == 1]#选择为1的行
+
+ws = D["风速"].values  #array
+ap = D["变频器电网侧有功功率"]
+
+# fig=plt.figure(figsize=(10,6),dpi=500)  #figsize是图形大小,dpi像素
+fig=plt.figure()  #figsize是图形大小,dpi像素
+plt.scatter(ws,ap,s=1,c='black',marker='.') #'.'比'o'要更小
+
+# plt.scatter(x2,y2,s=10,c='b',marker='.',label='5.8-6.5建模噪声点')
+
+x_major_locator=MultipleLocator(5)
+y_major_locator=MultipleLocator(500)
+ax=plt.gca()
+ax.xaxis.set_major_locator(x_major_locator)
+ax.yaxis.set_major_locator(y_major_locator)
+plt.xlim((0,30))
+plt.ylim((0,2200))
+plt.tick_params(labelsize=8)
+
+# plt.grid(c='dimgray',alpha=0.2)
+
+plt.xlabel("V/(m$·$s$^{-1}$)",fontsize=8)
+plt.ylabel("P/kW",fontsize=8)
+
+# plt.savefig(r'D:\赵雅丽\研究生学习资料\学习资料\劣化度健康度\spyder\大论文\图\风速-功率.jpg',bbox_inches='tight')
+plt.show()

+ 507 - 0
dataAnalysisBusiness/demo/SCADA_10min_category_1.py

@@ -0,0 +1,507 @@
+import os
+import re
+import math
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.pyplot import MultipleLocator#设定固定刻度
+
+def scada_10min_category():
+    turbine_number=24
+    
+    fpath = 'D:/赵雅丽/实习/算法/min_scada_LuoTuoGou/72/'
+    # 定义一个正则表达式来匹配纯数字文件名且扩展名为.csv  
+    pattern = re.compile(r'^\d+\.csv$')
+    
+    # 列出指定路径下的所有文件和文件夹  
+    files_in_dir = os.listdir(fpath)
+    for file in files_in_dir:  
+        # 使用正则表达式匹配文件名  
+        if pattern.match(file):  
+            # 拼接文件的完整路径  
+            fname = os.path.join(fpath, file)
+            # 读取csv文件,保持原始变量名而不忽略任何行
+            scada_10min = pd.read_csv(fname)
+        
+            # 显示数据        
+            time_stamp = scada_10min.loc[:,['时间']] #dataframe
+            active_power = scada_10min.loc[:,['变频器电网侧有功功率']]
+            wind_speed = scada_10min.loc[:,['风速']]
+            LM = pd.concat([time_stamp,active_power,wind_speed],axis=1)  #dataframe
+            # lm=LM.values #array
+    
+            xx = data_label(LM,fpath)#dataframe
+            mergedTable = pd.concat([scada_10min,xx],axis=1)#合并dataframe
+            D = mergedTable[mergedTable['lab'] == 1]#选择为1的行
+            ws = D["风速"]#series
+            ap = D["变频器电网侧有功功率"]
+            ##绘图
+            # fig = plt.figure(figsize=(10,6),dpi=500)  #figsize是图形大小,dpi像素
+            plt.scatter(ws,ap,s=8,c='black',marker='.',label='好点')
+            # x_major_locator=MultipleLocator(5)
+            # y_major_locator=MultipleLocator(500)
+            # ax=plt.gca()
+            # ax.xaxis.set_major_locator(x_major_locator)
+            # ax.yaxis.set_major_locator(y_major_locator)
+            # plt.xlim((0,30))
+            # plt.ylim((0,2200))
+            # plt.tick_params(labelsize=20)
+            # # plt.grid(c='dimgray',alpha=0.2)
+            # plt.xlabel("V/(m$·$s$^{-1}$)",fontsize=20)
+            # plt.ylabel("P/kW",fontsize=20)
+
+            # # plt.savefig(r'D:\赵雅丽\研究生学习资料\学习资料\劣化度健康度\spyder\大论文\图\风速-功率.jpg',bbox_inches='tight')
+            # plt.show()
+        
+        
+        
+def data_label(x1,x2):   # LM:T P V  path:文件获取路径
+    fpath2 = x2
+    fname2 = os.path.join(fpath2, "info.csv") #读取数据文件2(额定风速额定功率等)
+    # 参数na_filter=False仅阻止了pandas自动检测这些缺失值,并不能忽略  
+    # 但请注意,pandas没有直接的'omitrow'选项,如果需要忽略包含缺失值的行,需要在后续处理中处理
+    turbine_info = pd.read_csv(fname2, na_filter=False)
+    # 删除包含任何缺失值的行  
+    turbine_info = turbine_info.dropna() 
+    
+    PRated = turbine_info.loc[:,["额定功率"]] #dataframe
+    VCutOut = turbine_info.loc[:,["切出风速"]]  
+    VCutIn = turbine_info.loc[:,["切入风速"]]  
+    VRated = turbine_info.loc[:,["额定风速"]]
+    
+    #网格法确定风速风向分区数量,功率方向分区数量
+    Labeled_March809 = x1
+    APower = Labeled_March809["active_power"]  #series读入有功功率
+    WSpeed = Labeled_March809["wind_speed"]  #读入风速
+    maxP=np.max(APower)
+    intervalP=25  #ceil(PRated*0.01)#功率分区间隔为额定功率的1%
+    intervalwindspeed=0.25  #风速分区间隔0.25m/s
+    #初始化
+    PNum = 0  
+    TopP = 0   
+    # 根据条件计算PNum和TopP  
+    if maxP >= PRated:  
+        PNum = math.floor(maxP / intervalP) + 1  
+        TopP = math.floor((maxP - PRated) / intervalP) + 1  
+    else:  
+        PNum = math.floor(PRated / intervalP)  
+        TopP = 0   
+    VNum = math.ceil(VCutOut / intervalwindspeed)  
+    SM1 = Labeled_March809.shape  
+    AA1 = SM1[0]  #运行数据的条数
+    lab = [[0] for _ in range(AA1)]  #创建全0空列表
+    lab = pd.DataFrame(lab,columns=['lab'])
+    Labeled_March809 = pd.concat([Labeled_March809,lab],axis=1)  #在tpv后加一列标签列
+    SM = Labeled_March809.shape #(52561,4)
+    AA = SM[0]  
+    #存储功率大于0的运行数据
+    #标识功率为0的点,标识-1
+    DzMarch809_0 = np.zeros(AA, 3)  #array(52561,3)
+    nCounter1 = 1
+    Point_line=np.zeros(AA,1)
+    #考虑到很多功率小于10的数据存在,将<10的功率视为0
+    for i in range(AA):
+        if (APower[i] > 10) & (WSpeed[i] > 0):
+            nCounter1 += 1   #共有nCounter1个功率大于0的正常数据
+            DzMarch809_0[nCounter1-1, 0] = WSpeed[i]  
+            DzMarch809_0[nCounter1-1, 1] = APower[i]  
+            Point_line[nCounter1-1] = i+1  # 记录nCounter1记下的数据在原始数据中的位置  
+        if APower[i] <= 10: 
+            Labeled_March809[i,SM[1]-1] = -1  # 功率为0标识为-1  array类型
+    # 截取DzMarch809_0中实际存储的数据  其他全为0
+    DzMarch809 = DzMarch809_0[:nCounter1, :]  
+    #统计各网格落入的散点个数
+    XBoxNumber = np.ones((PNum, VNum),dtype=int)  #(86 100)
+    nWhichP = 0
+    nWhichV = 0
+
+    # 循环遍历DzMarch809中的有效数据  
+    for i in range(nCounter1):  
+        
+        # 查找功率所在的区间  
+        for m in range(1, PNum + 1):  # 注意Python的range是左闭右开的,所以需要+1  
+            if (DzMarch809[i,1] > (m - 1) * intervalP) and (DzMarch809[i,1] <= m * intervalP):  
+                nWhichP = m  
+                break  
+          
+        # 查找风速所在的区间  
+        for n in range(1, VNum + 1):  # 同样需要+1  
+            if (DzMarch809[i, 0] > (n - 1)*intervalwindspeed) and (DzMarch809[i, 0] <= n*intervalwindspeed):  
+                nWhichV = n  
+                break  
+          
+        # 如果功率和风速都在有效区间内,增加对应网格的计数  
+        if (nWhichP > 0) and (nWhichV > 0):  
+            XBoxNumber[nWhichP - 1, nWhichV - 1] += 1  # 注意Python的索引是从0开始的,所以需要减1  
+    # XBoxNumber现在包含了每个网格的计数[PNum行, VNum列]
+
+    for m in range(1,PNum+1):
+        for n in range(1,VNum+1):
+            XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
+
+    #在功率方向将网格内散点绝对个数转换为相对百分比,备用
+    PBoxPercent = np.zeros((PNum, VNum))  #(86 100) #计算后会出现浮点型,所以不能定义int类型
+    PBinSum = np.zeros((PNum,1),dtype=int)
+    for i in range(1,PNum+1):
+        for m in range(1,VNum+1):
+            PBinSum[i-1] = PBinSum[i-1] + XBoxNumber[i-1,m-1] 
+        for m in range(1,VNum+1):
+            if PBinSum[i-1]>0:
+                PBoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / PBinSum[i-1])*100
+    #在风速方向将网格内散点绝对个数转换为相对百分比,备用          
+    VBoxPercent = np.zeros((PNum, VNum))  #(86 100) #计算后会出现浮点型,所以不能定义int类型
+    VBinSum = np.zeros((VNum,1),dtype=int)
+    for i in range(1,VNum+1):
+        for m in range(1,PNum+1):
+            VBinSum[i-1] = VBinSum[i-1] + XBoxNumber[m-1,i-1] 
+        for m in range(1,PNum+1):
+            if VBinSum[i-1]>0:
+                VBoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / VBinSum[i-1])*100
+    # VBoxPercent PBoxPercent 左上-右下
+    # 将数据颠倒一下  左下-右上         第一行换为倒数第一行 方便可视化
+    InvXBoxNumber = np.zeros((PNum,VNum),dtype = int)
+    InvPBoxPercent = np.zeros((PNum,VNum),dtype = float)
+    InvVBoxPercent = np.zeros((PNum,VNum),dtype = float)
+    for m in range(1,PNum+1):
+        for n in range(1,VNum+1):
+            InvXBoxNumber[m-1,n-1] = XBoxNumber[PNum-(m-1)-1,n-1]
+            InvPBoxPercent[m-1,n-1] = PBoxPercent[PNum-(m-1)-1,n-1]
+            InvVBoxPercent[m-1,n-1] = VBoxPercent[PNum-(m-1)-1,n-1]
+    
+    #以水平功率带方向为准,分析每个水平功率带中,功率主带中心,即找百分比最大的网格位置。
+    PBoxMaxIndex = np.zeros((PNum,1),dtype = int)  #水平功率带最大网格位置索引
+    PBoxMaxP = np.zeros((PNum,1),dtype = float)       #水平功率带最大网格百分比
+    for m in range(1,PNum+1):
+        PBoxMaxIndex[m-1] = np.argmax(PBoxPercent[m-1, :])   #argmax返回最大值的索引
+        PBoxMaxP[m-1] = np.max(PBoxPercent[m-1, :])
+    #以垂直风速方向为准,分析每个垂直风速带中,功率主带中心,即找百分比最大的网格位置。
+    VBoxMaxIndex = np.zeros((VNum,1),dtype = int)  
+    VBoxMaxV = np.zeros((VNum,1),dtype = float)       
+    for m in range(1,VNum+1):
+        VBoxMaxIndex[m-1] = np.argmax(VBoxPercent[:, m-1])   
+        VBoxMaxV[m-1] = np.max(VBoxPercent[:, m-1])
+    
+    #切入风速特殊处理,如果切入风速过于偏右,向左拉回
+    if PBoxMaxIndex[0]>14:                     #第一个值对应的是风速最小处 即切入风速
+        PBoxMaxIndex[0] = 9 
+    #以水平功率带方向为基准,进行分析
+    DotDense = np.zeros(PNum)   #每一水平功率带的功率主带包含的网格数
+    DotDenseLeftRight = np.zeros((PNum,2))  #存储每一水平功率带的功率主带以最大网格为中心,向左,向右扩展的网格数
+    DotValve = 90  #从中心向左右对称扩展网格的散点百分比和的阈值。
+    PDotDenseSum = 0
+    for i in range(PNum - TopP):  # 从最下层水平功率带开始,向上分析到特定的功率带  
+        PDotDenseSum = PBoxMaxP[i]  # 以中心最大水平功率带为基准,向左向右对称扩展网格,累加各网格散点百分比  
+        iSpreadRight = 1  
+        iSpreadLeft = 1  
+          
+        while PDotDenseSum < DotValve:  
+            if (PBoxMaxIndex[i] + iSpreadRight) < VNum-1-1:  
+                PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight]  # 向右侧扩展  
+                iSpreadRight += 1  
+            else:
+                break  
+              
+            if (PBoxMaxIndex[i] - iSpreadLeft) > 0:  
+                PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft]  # 向左侧扩展  
+                iSpreadLeft += 1  
+            else:  
+                break  
+        iSpreadRight = iSpreadRight-1
+        iSpreadLeft = iSpreadLeft-1
+        #向左右扩展完毕
+        DotDenseLeftRight[i, 0] = iSpreadLeft  # 左  
+        DotDenseLeftRight[i, 1] = iSpreadRight  # 右  
+        DotDense[i] = iSpreadLeft + iSpreadRight + 1  # 记录向左向右扩展的个数及每个功率仓内网格的个数  
+    # 此时DotDense和DotDenseLeftRight数组已经包含了所需信息    
+    #各行功率主带右侧宽度的中位数最具有代表性(因为先右后左)
+    DotDenseWidthLeft = np.zeros((PNum-TopP))
+    for i in range(PNum-TopP):
+        DotDenseWidthLeft[i] = DotDenseLeftRight[i,1]  #DotDenseLeftRight[i,1]:向右延伸个数
+    MainBandRight = np.median(DotDenseWidthLeft) #计算中位数
+    
+    # 初始化变量  
+    PowerLimit = np.zeros(PNum, dtype=int)  # 各水平功率带是否为限功率标识,1:是;0:不是  
+    WidthAverage = 0  # 功率主带右侧平均宽度  
+    WidthAverage_L = 0  # 功率主带左侧平均宽度  
+    WidthVar = 0  # 功率主带方差(此变量在提供的代码中并未使用)  
+    PowerLimitValve = 6  # 限功率主带判别阈值  
+    N_Pcount = 20  # 阈值  
+      
+    nCounterLimit = 0  # 限功率的个数  
+    nCounter = 0  # 正常水平功率带的个数  
+      
+    # 循环遍历水平功率带,从第1个到第PNum-TopP个  
+    for i in range(PNum - TopP):  
+        # 如果向右扩展网格数大于阈值,且该水平功率带点总数大于20,则标记为限功率带  
+        if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):  
+            PowerLimit[i] = 1  
+            nCounterLimit += 1  #限功率的个数
+          
+        # 如果向右扩展网格数小于等于阈值,则累加右侧宽度(左侧宽度在代码中似乎有误)  
+        if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
+            WidthAverage += DotDenseLeftRight[i, 1]  # 统计正常水平功率带右侧宽度
+            WidthAverage_L += DotDenseLeftRight[i,1]   #统计正常水平功率带左侧宽度
+            nCounter += 1  
+    # 计算平均宽度  
+    WidthAverage /= nCounter if nCounter > 0 else 1  # 避免除以0的情况  
+    WidthAverage_L /= nCounter if nCounter > 0 else 1   
+
+    #计算正常(即非限功率)水平功率带的功率主带宽度的方差,以此来反映从下到上宽度是否一致
+    WidthVar = 0  # 功率主带宽度的方差   
+    for i in range(PNum - TopP):  
+        # 如果向右扩展网格数小于等于阈值,则计算当前宽度与平均宽度的差值平方  
+        if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
+            WidthVar += (DotDenseLeftRight[i, 1] - WidthAverage) ** 2  
+    # 计算方差(注意:除以nCounter-1是为了得到样本方差)  
+    WidthVar = np.sqrt(WidthVar / (nCounter - 1) if nCounter > 1 else 0)  # 避免除以0的情况
+
+    #各水平功率带,功率主带的风速范围,右侧扩展网格数*2*0.25
+    PowerBandWidth = WidthAverage*intervalwindspeed+WidthAverage_L*intervalwindspeed
+
+    # 对限负荷水平功率带的最大网格进行修正  
+    for i in range(1, PNum - TopP+1):  
+        if (PowerLimit[i] == 1) and (abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5):  
+            PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1  
+      
+    # 输出各层功率主带的左右边界网格索引  
+    DotDenseInverse = np.flipud(DotDenseLeftRight)  # 上下翻转数组以得到反向顺序  
+      
+    # 计算功率主带的左右边界  
+    CurveWidthR = np.ceil(WidthAverage) + 2  # 功率主带的右边界 + 2  
+    CurveWidthL = np.ceil(WidthAverage_L) + 2  # 功率主带的左边界 + 2  
+      
+    # 网格是否为限功率网格的标识数组  
+    BBoxLimit = np.zeros((PNum, VNum), dtype=int)  
+    # 标记限功率网格  
+    for i in range(2, PNum - TopP):  
+        if PowerLimit[i] == 1:
+            BBoxLimit[i, int(PBoxMaxIndex[i] + CurveWidthR + 1):VNum] = 1
+
+    # 初始化数据异常需要剔除的网格标识数组  
+    BBoxRemove = np.zeros((PNum, VNum), dtype=int)  
+    # 标记需要剔除的网格  
+    for m in range(PNum - TopP): 
+        for n in range(int(PBoxMaxIndex[m]) + int(CurveWidthR), VNum):  # 注意Python中的索引从0开始,因此需要减去1  
+            BBoxRemove[m, n] = 1 
+        # 功率主带左侧的超发网格,从最大索引向左直到第一个网格  
+        
+        for n in range(int(PBoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):  # 使用range的步长参数来实现从右向左的迭代  
+            BBoxRemove[m, n-1] = 2  # 注意Python中的索引从0开始,因此需要减去1
+
+    # 初始化变量  
+    CurveTop = np.zeros((2, 1), dtype=int)  
+    CurveTopValve = 1  # 网格的百分比阈值  
+    BTopFind = 0  
+    mm = 0  
+    #确定功率主带的左上拐点,即额定风速位置的网格索引
+    CurveTop = np.zeros((2, 1), dtype=int)  
+    CurveTopValve = 1  # 网格的百分比阈值  
+    BTopFind = 0  
+    mm = 0   
+    for m in range(PNum - TopP, 0, -1):  # 注意Python的range是左闭右开区间,所以这里从PNum-TopP开始到1(不包括0)  
+        for n in range(int(np.floor(int(VCutIn) / intervalwindspeed)), VNum - 1):  # 使用floor函数来向下取整  
+            if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):   
+                CurveTop[0] = m  
+                CurveTop[1] = n  #[第80个,第40个]
+                BTopFind = 1  
+                mm = m  # mm是拐点所在功率仓,对应其index
+                break  # 找到后退出内层循环  
+        if BTopFind == 1:  
+            break  # 找到后退出外层循环
+            
+    IsolateValve = 3  #功率主带右侧孤立点占比功率仓阈值 3%
+    # 遍历功率仓和网格  
+    for m in range(PNum - TopP):    
+        for n in range(int(PBoxMaxIndex[m]) + int(CurveWidthR), VNum):  
+            # 检查PBoxPercent是否小于阈值,如果是,则标记BBoxRemove为1  
+            if PBoxPercent[m, n] < IsolateValve:   
+                BBoxRemove[m, n] = 1
+    #功率主带顶部宽度
+    CurveWidthT = np.floor((maxP - PRated) / intervalP) + 1  
+    # 标记额定功率以上的超发点(PNum-PTop之间)  
+    for m in range(PNum - TopP, PNum):   
+        for n in range(VNum):  
+            BBoxRemove[m, n] = 3
+      
+    # 标记功率主带拐点左侧的欠发网格  
+    for m in range(mm-1, PNum - TopP): 
+        for n in range(int(CurveTop[1]) - 2):
+            BBoxRemove[m, n] = 2    # BBoxRemove数组现在包含了根据条件标记的超发点和欠发网格的信息
+
+    #以网格的标识,决定该网格内数据的标识。
+    # DzMarch809Sel数组现在包含了每个数据点的标识
+    DzMarch809Sel = np.zeros(nCounter1, dtype=int)  # 初始化标识数组   
+    nWhichP = 0  
+    nWhichV = 0  
+    nBadA = 0   
+    for i in range(nCounter1):  
+        for m in range( PNum ):   
+            if (DzMarch809[i, 1] > m * intervalP) and (DzMarch809[i, 1] <= (m+1) * intervalP):  
+                nWhichP = m  #m记录的是index
+                break  
+        for n in range( VNum ):  # 注意Python的range是左闭右开区间,所以这里到VNum+1  
+            if DzMarch809[i, 0] > ((n+1) * intervalwindspeed - intervalwindspeed/2) and DzMarch809[i, 0] <= ((n+1) * intervalwindspeed + intervalwindspeed / 2):  
+                nWhichV = n  #index
+                break  
+        if nWhichP >= 0 and nWhichV >= 0:  
+            if BBoxRemove[nWhichP, nWhichV] == 1:   
+                DzMarch809Sel[i] = 1  
+                nBadA += 1  
+            elif BBoxRemove[nWhichP, nWhichV] == 2:  
+                DzMarch809Sel[i] = 2  
+            elif BBoxRemove[nWhichP , nWhichV] == 3:  
+                DzMarch809Sel[i] = 0  # 额定风速以上的超发功率点认为是正常点,不再标识  
+    # DzMarch809Sel数组现在包含了每个数据点的标识
+
+    ##############################滑动窗口方法
+    # 存储限负荷数据  
+    PVLimit = np.zeros((nCounter1, 3))  #存储限负荷数据  %第3列用于存储限电的点所在的行数
+    nLimitTotal = 0  
+    nWindowLength = 6   #滑动窗口长度设置为6
+    LimitWindow = np.zeros(nWindowLength)  #滑动窗口空列表
+    UpLimit = 0    #上限
+    LowLimit = 0   #下限
+    PowerStd = 30  # 功率波动方差  
+    nWindowNum = np.floor(nCounter1/nWindowLength) #6587
+    PowerLimitUp = PRated - 100  
+    PowerLimitLow = 100  
+
+    # 循环遍历每个窗口  
+    for i in range(int(nWindowNum)):  
+        start_idx = i * nWindowLength  
+        end_idx = start_idx + nWindowLength  
+        LimitWindow = DzMarch809[start_idx:end_idx, 1]  # 提取当前窗口的数据  
+          
+        # 检查窗口内所有数据是否在功率范围内  
+        bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)  
+        if not bAllInAreas:  
+            continue  
+          
+        # 计算方差上下限  
+        UpLimit = LimitWindow[0] + PowerStd  
+        LowLimit = LimitWindow[0] - PowerStd  
+          
+        # 检查窗口内数据是否在方差范围内  
+        bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)  
+        if bAllInUpLow:  
+            # 标识窗口内的数据为限负荷数据  
+            DzMarch809Sel[start_idx:end_idx] = 4  
+              
+            # 存储限负荷数据  
+            for j in range(nWindowLength):  
+                PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]  
+                PVLimit[nLimitTotal, 2] = Point_line[start_idx + j]  # 对数据进行标识  
+                nLimitTotal += 1  
+    # PVLimit现在包含了限负荷数据,nLimitTotal是限负荷数据的总数
+    
+    
+    #将功率滑动窗口主带平滑化
+    # 初始化锯齿平滑的计数器  
+    nSmooth = 0  
+    # 遍历除了最后 TopP+1 个元素之外的所有 PBoxMaxIndex 元素  
+    for i in range(PNum - TopP - 1):  
+        PVLeftDown = np.zeros(2)  
+        PVRightUp = np.zeros(2)  
+        # 检查当前与下一个 PBoxMaxIndex 之间的距离是否大于等于1  
+        if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:  
+            # 计算左下和右上顶点的坐标  
+            PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125  
+            PVLeftDown[1] = (i) * 25  
+            PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125  
+            PVRightUp[1] = (i+1) * 25  
+              
+            # 遍历 DzMarch809 数组  
+            for m in range(nCounter1):  
+                # 检查当前点是否在锯齿区域内  
+                if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
+                    # 检查斜率是否大于对角连线  
+                    if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0]):
+                        # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器  
+                        DzMarch809Sel[m] = 0  
+                        nSmooth += 1  
+    # DzMarch809Sel 数组现在+包含了锯齿平滑的选择结果,nSmooth 是选中的点数
+    ###################################存储数据
+    # 存储好点  
+    nCounterPV = 0  # 初始化计数器  
+    PVDot = np.zeros((nCounter1, 3))  # 初始化存储好点的数组  nCounter1是p>0的数
+    for i in range(nCounter1):  
+        if DzMarch809Sel[i] == 0:  
+            nCounterPV += 1  
+            PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]  
+            PVDot[nCounterPV-1, 2] = Point_line[i]  # 好点 Point_line记录nCounter1在原始数据中的位置 
+    nCounterVP = nCounterPV  
+     
+    # 对所有数据中的好点进行标注    
+    for i in range(nCounterVP):  
+        Labeled_March809[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1  # 注意Python的索引从0开始,并且需要转换为整数索引  
+     
+    # 存储坏点  
+    nCounterBad = 0  # 初始化计数器  
+    PVBad = np.zeros((nCounter1, 3))  # 初始化存储坏点的数组  
+    for i in range(nCounter1):  
+        if DzMarch809Sel[i] in [1, 2, 3]:  
+            nCounterBad += 1  
+            PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]  
+            PVBad[nCounterBad-1, 2] = Point_line[i]  
+        
+    # 对所有数据中的坏点进行标注  
+    for i in range(nCounterBad):  
+        Labeled_March809[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5  # 坏点标识  
+
+    # 对所有数据中的限电点进行标注   
+    for i in range(nLimitTotal):  
+        Labeled_March809[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4  # 限电点标识  
+
+    # 对所有的数据点进行标注  
+    # Labeled_March809是array,提取所第四列的值保存为dataframe
+    A = Labeled_March809[:,3]
+    A=pd.DataFrame(A,columns=['lab'])
+    return A
+
+
+# scada_10min_category()
+ 
+
+
+
+    
+    
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    
+
+
+

+ 193 - 0
dataAnalysisBusiness/demo/SCADA_10min_category_2.py

@@ -0,0 +1,193 @@
+import os
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.pyplot import MultipleLocator
+import math
+
+
+intervalPower = 25  # For example
+intervalWindspeed = 0.25  # For example
+
+fieldRatedPower="额定功率"
+fieldRatedWindSpeed="额定风速"
+fieldWindSpeedCutIn="切入风速"
+fieldWindSpeedCutOut="切出风速"
+
+fieldTime="时间"
+fieldWindSpeed="风速"
+fieldActivePower="变频器电网侧有功功率"
+fieldLabel="lab"
+
+# 1. 数据加载和预处理函数
+def loadData(filePathSCADA:str, filePathTurbineInfo:str):
+    dataFrameSCADA = pd.read_csv(filePathSCADA, encoding="utf-8")
+    dataFrameTurbineInfo = pd.read_csv(filePathTurbineInfo)
+    return dataFrameSCADA, dataFrameTurbineInfo
+
+def extractTurbineParameters(turbineInfo:pd.DataFrame):
+    """
+    解析风电机组参数 
+
+    参数:
+        turbineInfo 风电机组信息DataFrame
+
+    返回:
+        PRated 额定功率(kw)
+        VCutOut 切出风速(m/s)
+        VCutIn 切入风速(m/s)
+        VRated 额定风速(m/s)
+    """
+    ratedPower = turbineInfo.loc[:, [fieldRatedPower]].values
+    windSpeedCutIn = turbineInfo.loc[:, [fieldWindSpeedCutIn]].values
+    windSpeedCutOut = turbineInfo.loc[:, [fieldWindSpeedCutOut]].values
+    ratedWindSpeed = turbineInfo.loc[:, [fieldRatedWindSpeed]].values
+
+    return ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed
+
+def preprocessData(dataFrameOfSCADA:pd.DataFrame):
+    """
+    获取机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
+
+    参数:
+        dataFrameOfSCADA 机组SCADA数据
+
+    返回:
+        由机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
+
+    """
+    timeStamp = dataFrameOfSCADA.loc[:, ['时间']]
+    activePower = dataFrameOfSCADA.loc[:, ['变频器电网侧有功功率']]
+    windSpeed = dataFrameOfSCADA.loc[:, ['风速']]
+    dataFramePartOfSCADA = pd.concat([timeStamp, activePower, windSpeed], axis=1)
+
+    dataFramePartOfSCADA[fieldLabel]=0
+    dataFramePartOfSCADA[fieldLabel]=dataFramePartOfSCADA[fieldLabel].astype(int)
+
+    return dataFramePartOfSCADA
+
+# 2. 数据标签分配和分箱计算
+def calculateIntervals(activePowerMax, ratedPower, windSpeedCutOut):
+    """
+    按有功功率(以25kw为间隔)、风速(以0.25m/s为间隔)分仓
+
+    参数:
+        max_power 当前机组的有功功率最大值
+        PRated  机组额定功率
+        wind_speed_cutout  切出风速
+
+    返回:
+        interval_power 有功功率分仓间隔
+        interval_windspeed 风速分仓间隔
+        PNum  有功功率分仓数量
+        VNum 风速分仓数量
+    """
+    binNumOfPower = math.floor(activePowerMax / intervalPower) + 1 if activePowerMax >= ratedPower else math.floor(ratedPower / intervalPower)
+    binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed)
+
+    return binNumOfPower, binNumOfWindSpeed
+
+def labelData(dataFramePartOfSCADA:pd.DataFrame, conditions):
+    """
+    根据特定条件对数据进行标签分配,例如功率和风速阈值。
+    
+    参数:
+        LM (DataFrame): 包含功率和风速数据的DataFrame。
+        conditions (dict): 字典,键为条件名称,值为相应的阈值。
+    
+    返回:
+        DataFrame: 带有新的'label'列的原始DataFrame。
+    """
+    # 初始化标签列
+    dataFramePartOfSCADA['label'] = 0
+    
+    # 根据条件进行数据标签分配
+    for condition, threshold in conditions.items():
+        if condition == 'power_below':
+            dataFramePartOfSCADA.loc[dataFramePartOfSCADA[fieldActivePower] <= threshold, 'label'] = -1
+        elif condition == 'power_above':
+            dataFramePartOfSCADA.loc[dataFramePartOfSCADA[fieldActivePower] >= threshold, 'label'] = 1
+    
+    return dataFramePartOfSCADA
+
+def computeBins(data, intervals):
+    """为给定数据计算统计箱。
+    
+    参数:
+        data (DataFrame): 需要进行分箱的数据。
+        intervals (dict): 字典,为每个列指定间隔大小。
+    
+    返回:
+        DataFrame: 分箱数据作为区间内的计数或百分比。
+    """
+    binsResults = {}
+    for column, interval in intervals.items():
+        minValue = data[column].min()
+        maxValue = data[column].max()
+        bins = np.arange(minValue, maxValue + interval, interval)
+        binnedData = pd.cut(data[column], bins, include_lowest=True)
+        binCounts = pd.value_counts(binnedData, sort=False)
+        binsResults[column] = binCounts
+    
+    return pd.DataFrame(binsResults)
+
+# 3. 应用标签函数
+def applyLabels(data, labels):
+    """根据外部或计算出的标签对数据应用标签。
+    
+    参数:
+        data (DataFrame): 需要应用标签的数据。
+        labels (Series或array): 应用的标签;必须与数据的索引或长度相匹配。
+    
+    返回:
+        DataFrame: 应用标签后的数据。
+    """
+    data['label'] = labels
+    return data
+
+# 4. 数据可视化
+def plot_data(ws:list, ap:list):
+    fig = plt.figure()
+    plt.scatter(ws, ap, s=1, c='black', marker='.')
+    ax = plt.gca()
+    ax.xaxis.set_major_locator(MultipleLocator(5))
+    ax.yaxis.set_major_locator(MultipleLocator(500))
+    plt.xlim((0, 30))
+    plt.ylim((0, 2200))
+    plt.tick_params(labelsize=8)
+    plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8)
+    plt.ylabel("P/kW", fontsize=8)
+    plt.show()
+
+# 5. Main Execution
+def main():
+    turbine=82
+    filePathSCADA = r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\{}.csv'.format(turbine)
+    filePathTurbineInfo = r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\info.csv'
+    outputFilePathOfSCADA=r"E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\labeled\labeled_{}.csv".format(turbine)
+
+    dataFrameOfSCADA, turbineInfo = loadData(filePathSCADA, filePathTurbineInfo)
+    ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed = extractTurbineParameters(turbineInfo)
+    dataFramePartOfSCADA = preprocessData(dataFrameOfSCADA)
+
+    powerMax=dataFramePartOfSCADA[fieldActivePower].max()
+    binNumOfPower, binNumOfWindSpeed=calculateIntervals(powerMax,ratedPower,windSpeedCutOut)
+    
+    # 根据功率阈值对数据进行标签分配
+    conditions = {'power_below': 10, 'power_above': ratedPower[0][0]}
+    labeledData = labelData(dataFramePartOfSCADA, conditions)
+    
+    # 为功率和风速计算分箱
+    intervals = {fieldActivePower: 100, fieldWindSpeed: 1}
+    binnedData = computeBins(labeledData, intervals)
+    
+    # 应用标签(假设某些外部标签被提供或在其他地方计算)
+    externalLabels = np.random.choice([0, 1], size=len(labeledData))  # 随机示例
+    labeledData = applyLabels(labeledData, externalLabels)
+
+    labeledData.to_csv(outputFilePathOfSCADA)
+    
+    plot_data(labeledData[fieldWindSpeed], labeledData[fieldActivePower])
+
+if __name__ == '__main__':
+    main()

+ 632 - 0
dataAnalysisBusiness/demo/SCADA_10min_category_3.py

@@ -0,0 +1,632 @@
+import os
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.pyplot import MultipleLocator
+import math
+import pdb
+# pdb.set_trace()  # 设置断点
+
+intervalPower = 25  # For example
+intervalWindspeed = 0.25  # For example
+
+fieldRatedPower="额定功率"
+fieldRatedWindSpeed="额定风速"
+fieldWindSpeedCutIn="切入风速"
+fieldWindSpeedCutOut="切出风速"
+
+fieldTime="时间"
+fieldWindSpeed="风速"
+fieldActivePower="变频器电网侧有功功率"
+fieldLabel="lab"
+
+# 1. 数据加载和预处理函数
+def loadData(filePathSCADA:str, filePathTurbineInfo:str):
+    dataFrameSCADA = pd.read_csv(filePathSCADA, encoding="utf-8")
+    dataFrameTurbineInfo = pd.read_csv(filePathTurbineInfo)
+    return dataFrameSCADA, dataFrameTurbineInfo
+
+def extractTurbineParameters(turbineInfo:pd.DataFrame):
+    """
+    解析风电机组参数 
+
+    参数:
+        turbineInfo 风电机组信息DataFrame
+
+    返回:
+        PRated 额定功率(kw)
+        VCutOut 切出风速(m/s)
+        VCutIn 切入风速(m/s)
+        VRated 额定风速(m/s)
+    """
+    ratedPower = turbineInfo.loc[:, [fieldRatedPower]].values
+    windSpeedCutIn = turbineInfo.loc[:, [fieldWindSpeedCutIn]].values
+    windSpeedCutOut = turbineInfo.loc[:, [fieldWindSpeedCutOut]].values
+    ratedWindSpeed = turbineInfo.loc[:, [fieldRatedWindSpeed]].values
+
+    return ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed
+
+def preprocessData(dataFrameOfSCADA:pd.DataFrame):
+    """
+    获取机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
+
+    参数:
+        dataFrameOfSCADA 机组SCADA数据
+
+    返回:
+        由机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
+
+    """
+    timeStamp = dataFrameOfSCADA.loc[:, ['时间']]
+    activePower = dataFrameOfSCADA.loc[:, ['变频器电网侧有功功率']]
+    windSpeed = dataFrameOfSCADA.loc[:, ['风速']]
+    dataFramePartOfSCADA = pd.concat([timeStamp,activePower,windSpeed], axis=1)
+
+    # dataFramePartOfSCADA[fieldLabel]=0
+    # dataFramePartOfSCADA[fieldLabel]=dataFramePartOfSCADA[fieldLabel].astype(int)
+
+    return dataFramePartOfSCADA
+
+    
+# 2. 数据标签分配和分箱计算
+def calculateIntervals(activePowerMax, ratedPower, windSpeedCutOut):
+    """
+    按有功功率(以25kw为间隔)、风速(以0.25m/s为间隔)分仓
+
+    参数:
+        max_power 当前机组的有功功率最大值
+        PRated  机组额定功率
+        wind_speed_cutout  切出风速
+
+    返回:
+        interval_power 有功功率分仓间隔
+        interval_windspeed 风速分仓间隔
+        PNum  有功功率分仓数量
+        VNum 风速分仓数量
+    """
+    binNumOfPower = math.floor(activePowerMax / intervalPower) + 1 if activePowerMax >= ratedPower else math.floor(ratedPower / intervalPower)
+    binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed)
+
+    return binNumOfPower, binNumOfWindSpeed
+
+def calculateTopP(activePowerMax,ratedPower):
+    """
+    计算额定功率以上功率仓的个数
+
+    参数:
+        max_power 当前机组的有功功率最大值
+        PRated  机组额定功率
+        
+    返回:
+        TopP 额定功率以上功率仓的个数
+    """
+    TopP = 0   
+    if activePowerMax >= ratedPower: 
+        TopP = math.floor((activePowerMax - ratedPower) / intervalPower) + 1  
+    else:  
+        TopP = 0   
+    return TopP
+
+def chooseData(dataFramePartOfSCADA:pd.DataFrame, dataFrameOfSCADA):
+    """
+    根据特定条件对数据进行标签分配,例如功率和风速阈值。
+    
+    参数:
+        dataFramePartOfSCADA (DataFrame): 包含时间和功率和风速数据的DataFrame。
+        dataFrameOfSCADA: 原始数据
+    
+    返回:
+        DzMarch809: array:V P lab: 38181。
+        nCounter1: 个数
+        dataFramePartOfSCADA: 
+    """
+    # 初始化标签列
+    SM1 = dataFramePartOfSCADA.shape #(52561,3)
+    AA1 = SM1[0]  
+    lab = [[0] for _ in range(AA1)]
+    lab = pd.DataFrame(lab,columns=['lab'])
+    dataFramePartOfSCADA = pd.concat([dataFramePartOfSCADA,lab],axis=1)  #在tpv后加一列标签列
+    dataFramePartOfSCADA = dataFramePartOfSCADA.values
+    SM = dataFramePartOfSCADA.shape #(52561,4)
+    AA = SM[0] 
+    nCounter1 = 0 
+    DzMarch809_0 = np.zeros((AA, 3)) 
+    Point_line = np.zeros(AA, dtype=int)  
+    APower = dataFrameOfSCADA[fieldActivePower]
+    WSpeed = dataFrameOfSCADA[fieldWindSpeed]
+
+    for i in range(AA):
+        if (APower[i] > 10) & (WSpeed[i] > 0):
+            nCounter1 += 1  
+            DzMarch809_0[nCounter1-1, 0] = WSpeed[i]  
+            DzMarch809_0[nCounter1-1, 1] = APower[i] 
+            Point_line[nCounter1-1] = i+1  
+        if APower[i] <= 10: 
+            dataFramePartOfSCADA[i,SM[1]-1] = -1 
+    DzMarch809 = DzMarch809_0[:nCounter1, :] 
+    return DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM
+
+def gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809):
+    """
+    统计各网格中落入label!=-1的数据点个数
+    
+    参数:
+        binNumOfWindSpeed: 风速分仓个数。
+        binNumOfPower: 功率分仓个数。
+        DataFrame: 带有新的'label'列的原始DataFrame。
+        nCounter1: 数据个数
+        DzMarch809
+    返回:
+        XBoxNumber: 各网格中落入label!=-1的数据点个数的array。
+    """
+    # 遍历有效数据
+    XBoxNumber = np.ones((binNumOfPower, binNumOfWindSpeed),dtype=int) 
+    for i in range(nCounter1):             
+        for m in range(1, binNumOfPower + 1):  
+            if (DzMarch809[i,1] > (m - 1) * intervalPower) and (DzMarch809[i,1] <= m * intervalPower):  
+                nWhichP = m  
+                break  
+        for n in range(1, binNumOfWindSpeed + 1):  
+            if (DzMarch809[i, 0] > (n - 1) * intervalWindspeed) and (DzMarch809[i, 0] <= n * intervalWindspeed):  
+                nWhichV = n  
+                break  
+        if (nWhichP > 0) and (nWhichV > 0):  
+            XBoxNumber[nWhichP - 1][nWhichV - 1] += 1
+    for m in range(1,binNumOfPower+1):
+        for n in range(1,binNumOfWindSpeed+1):
+            XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
+    
+    return XBoxNumber
+
+def percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed,axis):
+    """
+    计算分仓(水平/竖直)后每个网格占百分比
+    
+    参数:
+        XBoxNumber: 各网格中落入label!=-1的数据点个数的array。
+        binNumOfPower: 功率分仓个数。
+        binNumOfWindSpeed: 风速分仓个数。
+        axis: "power"or"speed"分仓
+    返回:
+        BoxPercent: 占比情况array。
+    """
+    BoxPercent = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=float)     
+    BinSum = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed, 1), dtype=int)
+    for i in range(1,1+(binNumOfPower if axis == 'power' else binNumOfWindSpeed)):
+        for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):  
+            BinSum[i-1] = BinSum[i-1] + (XBoxNumber[i-1,m-1] if axis == 'power' else XBoxNumber[m-1,i-1])
+        for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):  
+            if BinSum[i-1]>0:
+                if axis == 'power':
+                    BoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / BinSum[i-1])*100
+                else:
+                    BoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / BinSum[i-1])*100
+                    
+    return BoxPercent,BinSum
+
+def maxBoxPercentage(BoxPercent, binNumOfPower, binNumOfWindSpeed, axis):
+    """
+    计算分仓(水平/竖直)后占百分比最大的网格索引及值
+    
+    参数:
+        BoxPercent: 占比情况array。
+        binNumOfPower: 功率分仓个数。
+        binNumOfWindSpeed: 风速分仓个数。
+        axis: "power"or"speed"分仓
+    返回:
+        BoxMaxIndex: 占百分比最大的网格索引。
+        BoxMax: 占百分比最大的网格值
+    """
+    BoxMaxIndex = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = int) 
+    BoxMax = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = float)  
+    for m in range(1,(binNumOfPower if axis == 'power' else binNumOfWindSpeed)+1):
+        BoxMaxIndex[m-1] = (np.argmax(BoxPercent[m-1, :])) if axis == 'power' else (np.argmax(BoxPercent[:, m-1]))
+        BoxMax[m-1] = (np.max(BoxPercent[m-1, :]))if axis == 'power' else (np.max(BoxPercent[:, m-1]))
+
+    return BoxMaxIndex, BoxMax
+
+def extendBoxPercent(m, BoxMax,TopP,BoxMaxIndex,BoxPercent,binNumOfPower,binNumOfWindSpeed):
+    """
+    以中心最大水平功率带为基准,向两侧对称扩展网格,使网格散点百分比总值达到阈值m
+    
+    参数:
+        m: 设定总和百分比阈值。
+        BoxMax: 占百分比最大的网格值。
+        TopP: 额定功率以上功率仓个数。
+        BoxMaxIndex: 占百分比最大的网格索引。
+        BoxPercent: 占比情况array。
+        binNumOfPower: 功率分仓个数。
+        binNumOfWindSpeed: 风速分仓个数。
+    返回:
+        DotDense: 每个功率仓内网格的个数。
+        DotDenseLeftRight: 向左向右拓展的网格个数
+    """
+    DotDense = np.zeros(binNumOfPower)  
+    DotDenseLeftRight = np.zeros((binNumOfPower,2))
+    DotValve = m 
+    PDotDenseSum = 0
+    for i in range(binNumOfPower - TopP):
+        PDotDenseSum = BoxMax[i] 
+        iSpreadRight = 1  
+        iSpreadLeft = 1         
+        while PDotDenseSum < DotValve:  
+            if (BoxMaxIndex[i] + iSpreadRight) < binNumOfWindSpeed-1-1:  
+                PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] + iSpreadRight] 
+                iSpreadRight += 1  
+            else:
+                break             
+            if (BoxMaxIndex[i] - iSpreadLeft) > 0:  
+                PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] - iSpreadLeft] 
+                iSpreadLeft += 1  
+            else:  
+                break  
+        iSpreadRight = iSpreadRight-1
+        iSpreadLeft = iSpreadLeft-1
+       
+        DotDenseLeftRight[i, 0] = iSpreadLeft 
+        DotDenseLeftRight[i, 1] = iSpreadRight 
+        DotDense[i] = iSpreadLeft + iSpreadRight + 1    
+
+    return DotDenseLeftRight
+
+def calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum):
+    """
+    计算功率主带的平均宽度
+    
+    参数:
+        binNumOfPower: 功率分仓个数。
+        TopP: 额定功率以上功率仓个数。
+        DotDenseLeftRight: 向左向右拓展的网格个数    
+        PBinSum: 功率仓内数据点总和
+    返回:
+        DotDense: 每个功率仓内网格的个数。
+        DotDenseLeftRight: 向左向右拓展的网格个数
+        PowerLimit: 各水平功率带是否为限功率标识,1:是;0:不是
+    """
+
+    PowerLimit = np.zeros(binNumOfPower, dtype=int)  
+    WidthAverage = 0    
+    WidthAverage_L = 0 
+    nCounter = 0  
+    PowerLimitValve = 6    
+    N_Pcount = 20  
+    for i in range(binNumOfPower - TopP):   
+        if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):  
+            PowerLimit[i] = 1  
+           
+        if DotDenseLeftRight[i, 1] <= PowerLimitValve:  
+            WidthAverage += DotDenseLeftRight[i, 1]
+            WidthAverage_L += DotDenseLeftRight[i,1] 
+            nCounter += 1  
+    WidthAverage /= nCounter if nCounter > 0 else 1  
+    WidthAverage_L /= nCounter if nCounter > 0 else 1   
+
+    return WidthAverage, WidthAverage_L,PowerLimit
+
+def amendMaxBox(binNumOfPower,TopP,PowerLimit,BoxMaxIndex):
+    """
+    对限负荷水平功率带的最大网格进行修正
+    
+    参数:
+        binNumOfPower: 功率分仓个数。
+        TopP: 额定功率以上功率仓个数。
+        PowerLimit:标识限功率水平功率带,1:是;0:不是
+        BoxMaxIndex: 占百分比最大的网格索引
+    返回:
+        BoxMaxIndex: 修正后的最大占比网格索引
+    """
+
+    for i in range(1, binNumOfPower - TopP+1):  
+        if (PowerLimit[i] == 1) and (abs(BoxMaxIndex[i] - BoxMaxIndex[i - 1]) > 5):  
+            BoxMaxIndex[i] = BoxMaxIndex[i - 1] + 1  
+
+    return BoxMaxIndex
+
+def markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,BoxMaxIndex):
+    '''
+    标记需剔除的网格
+    
+    参数:
+        binNumOfPower: 功率分仓个数。
+        binNumOfWindSpeed:风速分仓个数
+        TopP: 额定功率以上功率仓个数。
+        CurveWidthR:功率主带轮廓
+        CurveWidthL
+        BoxMaxIndex: 修正后的最大占比网格索引
+    返回:
+        BBoxRemove: 标识需剔除的网格
+    '''
+    BBoxRemove = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)  
+    for m in range(binNumOfPower - TopP): 
+        for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
+            BBoxRemove[m, n] = 1  
+        for n in range(int(BoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):   
+            BBoxRemove[m, n-1] = 2 
+    return BBoxRemove
+
+def markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,BoxPercent,BoxMaxIndex,mm,BBoxRemove,nn):
+    '''
+    标记限功率网格 
+    1:右侧欠发 2:左侧超发 3:额定功率以上超发
+    
+    参数:
+        binNumOfPower: 功率分仓个数。
+        binNumOfWindSpeed:风速分仓个数
+        TopP: 额定功率以上功率仓个数。
+        CurveWidthR:功率主带轮廓
+        PowerLimit: 标识限功率水平功率带,1:是;0:不是
+        BoxMaxIndex: 修正后的最大占比网格索引
+        mm: 拐点所在功率仓
+        BBoxRemove:需剔除的网格
+        CurveTop1:拐点对应列
+    返回:
+        BBoxLimit:标识限功率网格
+    '''
+    BBoxLimit = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)  
+    for i in range(2, binNumOfPower - TopP):  
+        if PowerLimit[i] == 1:
+            BBoxLimit[i, int(BoxMaxIndex[i] + CurveWidthR + 1):binNumOfWindSpeed] = 1
+    IsolateValve = 3
+    for m in range(binNumOfPower - TopP):    
+        for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):    
+            if BoxPercent[m, n] < IsolateValve:   
+                BBoxRemove[m, n] = 1
+
+    for m in range(binNumOfPower - TopP, binNumOfPower):   
+        for n in range(binNumOfWindSpeed):  
+            BBoxRemove[m, n] = 3
+      
+    # 标记功率主带拐点左侧的欠发网格  
+    for m in range(mm-1, binNumOfPower - TopP): 
+        for n in range(int(nn) - 2):
+            BBoxRemove[m, n] = 2
+    
+    return BBoxLimit
+    
+def markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1):
+    '''
+    根据网格标识来标记数据点
+    
+    参数:
+        nCounter1
+        binNumOfPower: 功率分仓个数。
+        binNumOfWindSpeed:风速分仓个数
+        DzMarch809: array V P lab: 38181。
+        BBoxRemove:需剔除的网格
+        
+    返回:
+        DzMarch809Sel:数组现在包含了每个数据点的标识
+    '''
+    DzMarch809Sel = np.zeros(nCounter1, dtype=int)
+    nWhichP = 0  
+    nWhichV = 0  
+    for i in range(nCounter1):   
+        for m in range( binNumOfPower ):   
+            if ((DzMarch809[i,1])> m * intervalPower) and ((DzMarch809[i,1]) <= (m+1) * intervalPower):  
+                nWhichP = m  #m记录的是index
+                break  
+        for n in range( binNumOfWindSpeed ):    
+            if DzMarch809[i,0] > ((n+1) * intervalWindspeed - intervalWindspeed/2) and DzMarch809[i,0] <= ((n+1) * intervalWindspeed + intervalWindspeed / 2):  
+                nWhichV = n 
+                break  
+        if nWhichP >= 0 and nWhichV >= 0:  
+            if BBoxRemove[nWhichP, nWhichV] == 1:   
+                DzMarch809Sel[i] = 1  
+            elif BBoxRemove[nWhichP, nWhichV] == 2:  
+                DzMarch809Sel[i] = 2  
+            elif BBoxRemove[nWhichP , nWhichV] == 3:  
+                DzMarch809Sel[i] = 0  
+    
+    return DzMarch809Sel
+    
+
+def windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line):
+    '''
+    滑动窗口方法,进一步标记数据坏点
+    
+    参数:
+        nCounter1:
+        ratedPower:
+        Point_line:
+        
+    返回:
+        PVLimit: 限负荷数据
+        nLimitTotal: 是限负荷数据的总数
+    '''
+
+    PVLimit = np.zeros((nCounter1, 3)) 
+    nLimitTotal = 0  
+    nWindowLength = 6  
+    LimitWindow = np.zeros(nWindowLength)
+    UpLimit = 0   
+    LowLimit = 0  
+    PowerStd = 30  
+    nWindowNum = np.floor(nCounter1/nWindowLength)
+    PowerLimitUp = ratedPower - 100  
+    PowerLimitLow = 100  
+
+    # 循环遍历每个窗口  
+    for i in range(int(nWindowNum)):  
+        start_idx = i * nWindowLength  
+        end_idx = start_idx + nWindowLength  
+        LimitWindow = DzMarch809[start_idx:end_idx, 1]  
+         
+        bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)  
+        if not bAllInAreas:  
+            continue  
+        
+        UpLimit = LimitWindow[0] + PowerStd  
+        LowLimit = LimitWindow[0] - PowerStd  
+        
+        bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)  
+        if bAllInUpLow: 
+            DzMarch809Sel[start_idx:end_idx] = 4  
+ 
+            for j in range(nWindowLength):  
+                PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]  
+                PVLimit[nLimitTotal, 2] = Point_line[start_idx + j]  # 对数据进行标识  
+                nLimitTotal += 1  
+    return PVLimit,nLimitTotal
+
+def store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1):  
+    """  
+    存储好点,并返回存储好的点的数组和计数。
+    
+    参数:
+        DzMarch809: array:V P lab: 38181。
+        DzMarch809Sel: 数组现在包含了每个数据点的标识
+        Point_line:
+        nCounter1:
+        axis: 'good' or 'bad'
+        
+    返回:
+        PVDot: 数据
+        nCounterPV: 数据个数
+
+    """  
+    PVDot = np.zeros((nCounter1, 3))
+    PVBad = np.zeros((nCounter1, 3))  
+
+    nCounterPV = 0  
+    nCounterBad = 0 
+    for i in range(nCounter1):
+        if DzMarch809Sel[i] == 0:   
+            nCounterPV += 1 
+            PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]
+            PVDot[nCounterPV-1, 2] = Point_line[i]  
+        elif DzMarch809Sel[i] in [1, 2, 3]:  
+            nCounterBad += 1  
+            PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]  
+            PVBad[nCounterBad-1, 2] = Point_line[i]
+                  
+    return PVDot, nCounterPV,PVBad,nCounterBad  
+
+def markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit):
+    """  
+    标记好点、坏点、限电点。
+    
+    参数:
+        nCounterPV
+        nCounterBad
+        dataFramePartOfSCADA
+        PVDot
+        PVBad
+        SM
+        nLimitTotal
+        PVLimit
+        
+    返回:
+        dataFramePartOfSCADA
+
+    """  
+
+    for i in range(nCounterPV):
+        dataFramePartOfSCADA[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1   
+    #坏点  
+    for i in range(nCounterBad):  
+        dataFramePartOfSCADA[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5  # 坏点标识  
+
+    # 对所有数据中的限电点进行标注   
+    for i in range(nLimitTotal):  
+        dataFramePartOfSCADA[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4  # 限电点标识  
+
+    return dataFramePartOfSCADA
+# 4. 数据可视化
+def plot_data(ws:list, ap:list):
+    fig = plt.figure()
+    plt.scatter(ws, ap, s=1, c='black', marker='.')
+    ax = plt.gca()
+    ax.xaxis.set_major_locator(MultipleLocator(5))
+    ax.yaxis.set_major_locator(MultipleLocator(500))
+    plt.xlim((0, 30))
+    plt.ylim((0, 2200))
+    plt.tick_params(labelsize=8)
+    plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8)
+    plt.ylabel("P/kW", fontsize=8)
+    plt.show()
+
+# 5. Main Execution
+def main():
+    turbine=85
+    basePath=r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72'
+    filePathSCADA = r'{}\{}.csv'.format(basePath,turbine)
+    filePathTurbineInfo = r'{}\info.csv'.format(basePath)
+    outputFilePathOfSCADA=r"{}\labeled\labeled_{}.csv".format(basePath,turbine)
+
+    dataFrameOfSCADA, turbineInfo = loadData(filePathSCADA, filePathTurbineInfo)
+    ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed = extractTurbineParameters(turbineInfo)
+    dataFramePartOfSCADA = preprocessData(dataFrameOfSCADA)
+    powerMax=dataFramePartOfSCADA[fieldActivePower].max()
+
+    binNumOfPower, binNumOfWindSpeed = calculateIntervals(powerMax,ratedPower,windSpeedCutOut)
+    TopP = calculateTopP(powerMax,ratedPower)
+    # 根据功率阈值对数据进行标签分配
+    DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM = chooseData(dataFramePartOfSCADA, dataFrameOfSCADA)  
+    XBoxNumber = gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809)
+    PBoxPercent,PBinSum = percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'power')
+    VBoxPercent,VBinSum = percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'speed')
+
+    PBoxMaxIndex, PBoxMaxP = maxBoxPercentage(PBoxPercent, binNumOfPower, binNumOfWindSpeed, 'power')
+    VBoxMaxIndex, VBoxMaxV = maxBoxPercentage(VBoxPercent, binNumOfPower, binNumOfWindSpeed, 'speed')
+    if PBoxMaxIndex[0] > 14: PBoxMaxIndex[0] = 9
+    DotDenseLeftRight = extendBoxPercent(90, PBoxMaxP,TopP,PBoxMaxIndex,PBoxPercent,binNumOfPower,binNumOfWindSpeed)
+    # pdb.set_trace()  # 设置断点
+    WidthAverage, WidthAverage_L,PowerLimit = calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum)
+    PBoxMaxIndex = amendMaxBox(binNumOfPower,TopP,PowerLimit,PBoxMaxIndex)
+    # 计算功率主带的左右边界  
+    CurveWidthR = np.ceil(WidthAverage) + 2  
+    CurveWidthL = np.ceil(WidthAverage_L) + 2 
+    #确定功率主带的左上拐点,即额定风速位置的网格索引
+    CurveTop = np.zeros((2, 1), dtype=int)  
+    BTopFind = 0  
+    for m in range(binNumOfPower - TopP, 0, -1):
+        for n in range(int(np.floor(int(windSpeedCutIn) / intervalWindspeed)), binNumOfWindSpeed - 1):   
+            if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):   
+                CurveTop[0] = m  
+                CurveTop[1] = n  #[第80个,第40个]
+                BTopFind = 1
+                mm = m
+                nn = n
+                break 
+        if BTopFind == 1:  
+            break 
+    #标记网格
+    BBoxRemove = markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,PBoxMaxIndex)
+    BBoxLimit = markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,PBoxPercent,PBoxMaxIndex,mm,BBoxRemove,nn)
+    DzMarch809Sel = markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1)
+    PVLimit,nLimitTotal = windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line)
+    #将功率滑动窗口主带平滑化
+    nSmooth = 0   
+    for i in range(binNumOfPower - TopP - 1):  
+        PVLeftDown = np.zeros(2)  
+        PVRightUp = np.zeros(2)   
+        if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:  
+            # 计算左下和右上顶点的坐标  
+            PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125  
+            PVLeftDown[1] = (i) * 25  
+            PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125  
+            PVRightUp[1] = (i+1) * 25  
+                
+            for m in range(nCounter1):  
+                # 检查当前点是否在锯齿区域内  
+                if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
+                    # 检查斜率是否大于对角连线  
+                    if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0]):
+                        # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器  
+                        DzMarch809Sel[m] = 0  
+                        nSmooth += 1  
+    # DzMarch809Sel 数组现在包含了锯齿平滑的选择结果,nSmooth 是选中的点数
+    PVDot, nCounterPV,PVBad,nCounterBad = store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1)
+    #标注   
+    dataFramePartOfSCADA = markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit)
+    A = dataFramePartOfSCADA[:,3]
+    A=pd.DataFrame(A,columns=['lab'])
+
+    labeledData = pd.concat([dataFrameOfSCADA,A],axis=1)
+    D = labeledData[labeledData['lab'].isin([-1,0,1,2,3,4,5])]#选择为1的行
+    labeledData.to_csv(outputFilePathOfSCADA,encoding='utf-8')
+    plot_data(D[fieldWindSpeed], D[fieldActivePower])
+
+
+if __name__ == '__main__':
+    main()

+ 62 - 0
dataAnalysisBusiness/demo/scatter3D_plotly.py

@@ -0,0 +1,62 @@
+import pandas as pd  
+import plotly.graph_objects as go  
+  
+# 示例数据  
+data = {  
+    '机组名': ['机组A', '机组B', '机组C', '机组D'],  
+    '时间': ['2024-01-09 09:13:29', '2024-01-10 10:14:30', '2024-02-09 08:13:29', '2024-02-10 09:14:30'],  
+    '年月': ['2024-01', '2024-01', '2024-02', '2024-02'],  
+    '风速': [5.0, 6.0, 4.5, 5.5],  
+    '有功功率': [1000, 1200, 900, 1100]  
+}  
+  
+df = pd.DataFrame(data)  
+  
+# 按风速升序排列数据  
+df_sorted = df.sort_values(by='风速')  
+  
+# 获取唯一年月  
+unique_months = df_sorted['年月'].unique()  
+  
+# 自定义颜色列表(确保颜色数量与唯一月份的数量相匹配)  
+colors = ['red', 'blue', 'green', 'purple']  # 根据实际唯一月份数量调整颜色数量  
+  
+# 创建颜色映射  
+color_map = dict(zip(unique_months, colors))  
+  
+# 使用go.Scatter3d创建3D散点图  
+trace = go.Scatter3d(  
+    x=df_sorted['风速'],  
+    y=df_sorted['有功功率'],  
+    z=[color_map[month] for month in df_sorted['年月']],  
+    mode='markers',  
+    marker=dict(  
+        color=[color_map[month] for month in df_sorted['年月']],  
+        size=10,  
+        line=dict(color='rgba(255, 255, 255, 0.8)', width=0.5),  
+        opacity=0.8  
+    )  
+)  
+  
+# 创建图形  
+fig = go.Figure(data=[trace])  
+  
+# 更新图形的布局  
+fig.update_layout(  
+    title='按风速升序排列的3D散点图:风速、有功功率与年月',  
+    margin=dict(l=0, r=0, b=0, t=0),  
+    scene=dict(  
+        xaxis=dict(title='风速'),  
+        yaxis=dict(title='有功功率'),  
+        zaxis=dict(  
+            title='年月',  
+            tickmode='array',  
+            tickvals=unique_months,  
+            ticktext=unique_months,  
+            categoryorder='category ascending'  
+        )  
+    )  
+)  
+  
+# 显示图形  
+fig.show()

+ 50 - 0
dataAnalysisBusiness/demo/scatter3D_plotly_make_subplots.py

@@ -0,0 +1,50 @@
+import pandas as pd  
+import plotly.graph_objects as go  
+from plotly.subplots import make_subplots  
+  
+# 假设你的DataFrame叫做df,并且已经包含了所需字段  
+# 如果你的数据是CSV文件,可以使用pd.read_csv('your_file.csv')来加载数据  
+# df = pd.read_csv('your_file.csv')  
+  
+# 示例数据  
+data = {  
+    '机组名': ['机组A', '机组B', '机组C', '机组D'],  
+    '时间': ['2024-01-09 09:13:29', '2024-01-10 10:14:30', '2024-02-09 08:13:29', '2024-02-10 09:14:30'],  
+    '年月': ['2024-01', '2024-01', '2024-02', '2024-02'],  
+    '风速': [5.0, 6.0, 4.5, 5.5],  
+    '有功功率': [1000, 1200, 900, 1100]  
+}  
+  
+df = pd.DataFrame(data)  
+  
+# 创建颜色映射,将每个年月映射到一个唯一的颜色  
+unique_months = df['年月'].unique()  
+colors = [f'rgb({i}, {150 - i}, 50)' for i in range(len(unique_months))]  
+color_map = dict(zip(unique_months, colors))  
+  
+# 使用make_subplots创建3D散点图  
+fig = make_subplots(rows=1, cols=1, specs=[[{"type": "scatter3d"}]])  
+  
+# 遍历DataFrame的每一行,为每个点添加数据  
+for index, row in df.iterrows():  
+    x = row['风速']  
+    y = row['年月']  
+    z = row['有功功率']  
+    color = color_map[y]  
+      
+    # 添加散点到子图  
+    fig.add_trace(go.Scatter3d(x=[x], y=[y], z=[z], mode='markers', marker=dict(color=color)), row=1, col=1)  
+  
+# 更新子图的布局,设置y轴为category类型,并设置其类别顺序  
+fig.update_layout(  
+    title='3D散点图:风速、年月与有功功率',  
+    margin=dict(l=0, r=0, b=0, t=0),  
+    scene=dict(  
+        xaxis=dict(title='风速'),  
+        yaxis=dict(title='年月', tickmode='array', tickvals=unique_months, ticktext=unique_months, categoryorder='category ascending'),  
+        zaxis=dict(title='有功功率')  
+    )  
+)  
+  
+# 显示图形  
+fig.show()

+ 19 - 0
dataAnalysisBusiness/demo/test.py

@@ -0,0 +1,19 @@
+import plotly.express as px
+import pandas as pd
+
+# 创建一个示例数据框架,包含单月数据
+data_single_month = {
+    '时间': ['2023-03', '2023-03', '2023-03', '2023-03', '2023-03'],
+    '发电机转速': [1000, 1500, 2000, 2500, 3000],
+    '功率': [120, 180, 160, 210, 230]
+}
+
+df_single = pd.DataFrame(data_single_month)
+
+# 绘制3D散点图
+fig_single = px.scatter_3d(df_single, x='发电机转速', y='时间', z='功率', 
+                           title='3D 散点图 - 单月数据',
+                           labels={'发电机转速': '转速', '时间': '月份', '功率': '输出功率'})
+
+# 显示图形
+fig_single.show()

+ 113 - 0
dataAnalysisBusiness/demo/testDataProcess.py

@@ -0,0 +1,113 @@
+import os  
+import pandas as pd  
+import numpy as np
+import matplotlib.pyplot as plt  
+  
+def process_scada_data(fpath, turbine_number, fn_start, fn_end, status_normal):  
+    """  
+    处理SCADA数据的函数。  
+      
+    参数:  
+        fpath (str): 文件存放位置的路径。  
+        turbine_number (int): 风机数量(尽管此参数在此函数中未使用,但可以保留以匹配MATLAB代码)。  
+        fn_start (int): 开始处理的文件编号。  
+        fn_end (int): 结束处理的文件编号(不包含)。  
+        status_normal (int): 风机正常并网状态的状态字(尽管此参数在此函数中未使用,但可以保留以匹配MATLAB代码)。  
+    """  
+    # 循环处理每个文件  
+    for fn in range(fn_start, fn_end):  
+        fname = os.path.join(fpath, f"{fn}.csv")  
+          
+        # 读取CSV文件  
+        scada_10min = pd.read_csv(fname)  
+          
+        # 提取所需列  
+        time_stamp = scada_10min["时间"]  
+        active_power = scada_10min["变频器电网侧有功功率"]  
+        wind_speed = scada_10min["风速"]  
+          
+        # 创建包含所需列的DataFrame  
+        LM = pd.DataFrame({  
+            "时间戳": time_stamp,  
+            "有功功率": active_power,  
+            "风速": wind_speed  
+        })  
+          
+        # 调用数据标签处理函数(需要您根据MATLAB实现来编写此函数)  
+        xx = data_label(LM,fpath)  
+          
+        # 合并标签数据到原始DataFrame  
+        merged_df = pd.concat([scada_10min, xx], axis=1)  
+          
+        # 筛选出标签为1的行  
+        D = merged_df[merged_df["lab"] == 1]  
+          
+        # 绘制散点图  
+        plt.scatter(D["风速"], D["变频器电网侧有功功率"], s=50, fillstyle='full')  
+        plt.title(f"风机 {fn} 散点图")  
+        plt.xlabel("风速")  
+        plt.ylabel("变频器电网侧有功功率")  
+        plt.show()  
+          
+        # 创建保存结果的目录(如果不存在)  
+        labeled_dir = os.path.join(fpath, "labeled")  
+        os.makedirs(labeled_dir, exist_ok=True)  
+          
+        # 将处理后的数据保存到CSV文件  
+        labeled_fname = os.path.join(labeled_dir, f"{fn}_10s_n.csv")  
+        merged_df.to_csv(labeled_fname, index=False)  
+  
+# 假设data_label函数已经实现,这里只是一个示例的占位符  
+def data_label(df:pd.DataFrame,fpath):  
+    # 在这里实现您的数据标签处理逻辑  
+    # 返回带有新标签的Series或DataFrame  
+     # 读取风机参数数据
+    fname2 = fpath + "info.csv"
+    turbine_info = pd.read_csv(fname2, keep_default_na=False)
+    PRated = turbine_info["额定功率"].values[0]
+    VCutOut = turbine_info["切出风速"].values[0]
+    VCutIn = turbine_info["切入风速"].values[0]
+    VRated = turbine_info["额定风速"].values[0]
+    
+    # 读入有功功率和风速数据
+    Labeled_March809 = df
+    APower = Labeled_March809["active_power"]
+    WSpeed = Labeled_March809["wind_speed"]
+
+    # 初始化计算用的变量
+    maxP = APower.max()
+    intervalP = 25  # 功率分区间隔为25
+    intervalwindspeed = 0.25  # 风速分区间隔为0.25m/s
+    
+    # 根据最大功率和额定功率,计算功率和风速的区间数
+    PNum = (maxP // intervalP) + 1 if maxP >= PRated else (PRated // intervalP)
+    TopP = ((maxP - PRated) // intervalP) + 1 if maxP >= PRated else 0
+    VNum = np.ceil(VCutOut / intervalwindspeed).astype(int)
+
+    # 初始化标签列
+    Labeled_March809['label'] = 0
+
+    # 数据预处理:标记功率小于等于10的点
+    Labeled_March809.loc[APower <= 10, 'label'] = -1
+
+    # 下面是逻辑处理的示例,涉及到循环、条件判断和数据标记
+    # 示例:标记风速和功率在特定范围内的点
+    for i, row in Labeled_March809.iterrows():
+        if row['active_power'] > 10 and row['wind_speed'] > 0:
+            # 这里可以根据需要添加更多的处理逻辑
+            pass
+
+    # 以下是更高级的数据处理示例,这部分代码需要您根据实际逻辑继续开发
+    # 示例:根据风速和功率的分布对数据进行进一步的标记
+    # 请注意,这里需要你根据上面 MATLAB 代码的具体逻辑来实现相应的Python代码
+
+    return Labeled_March809
+  
+# 设置文件路径和其他参数  
+fpath = "E:\\BaiduNetdiskDownload\\test\\min_scada_LuoTuoGou\\72\\"  
+# 注意:turbine_number 在此函数中未使用,但保持以匹配MATLAB代码  
+turbine_number = 24  
+status_normal = 8  
+  
+# 调用函数处理文件,假设从编号82的文件开始,只处理这一个文件  
+process_scada_data(fpath, turbine_number, 82, 83, status_normal)

+ 12 - 0
dataAnalysisBusiness/demo/testPandas.py

@@ -0,0 +1,12 @@
+import pandas as pd
+import numpy as np
+
+df=pd.read_csv(r"E:/BaiduNetdiskDownload/DTSXJK_WJWFC_Q1_W001_2023-10-01_last_1seconds.csv",header=1)
+
+print(df.head())
+
+
+df["WNAC_WdDir"]=df["WNAC_WdDir"].astype("Float32")
+df["弧度"]=df["WNAC_WdDir"]/360*2*np.pi
+
+print(df.head())

+ 0 - 0
dataContract/algorithmContract/__init__.py


+ 169 - 0
dataContract/algorithmContract/confBusiness.py

@@ -0,0 +1,169 @@
+from enum import Enum
+
+# 全局变量
+charset_unify = 'utf-8'
+CSVSuffix = '.csv'
+Const_Output_Total='total'
+Const_TimeGranularity_Second="second"
+Const_TimeGranularity_Minute="minute"
+
+### SCADA ###
+Field_DeviceCode="wind_turbine_number" #风机编号
+Field_ActiverPower="active_power"       #有功功率
+Field_GeneratorTorque = "actual_torque" #实际扭矩
+Field_GeneratorSpeed = "generator_speed" #发电机转速
+Field_RotorSpeed = "rotor_speed"       #风轮转速
+Field_WindSpeed = "wind_velocity"     #风速
+Field_WindDirection="true_wind_direction"  # 绝对风向
+Field_AngleIncluded = "yaw_error1"  # 对风角度
+Field_PitchAngel1="pitch_angle_blade_1"  # 桨距角1
+Field_PitchAngel2="pitch_angle_blade_2"  # 桨距角2
+Field_PitchAngel3="pitch_angle_blade_3"  # 桨距角3
+Field_Time="time_stamp"  # .strftime('%Y-%m-%d %H:%M:%S')
+Field_EnvTemp="outside_cabin_temperature"  # 环境温度
+Field_NacTemp="cabin_temperature"  # 机舱内温度
+Field_NacPos="cabin_position"  # 机舱位置
+Field_GeneratorDE="generatordrive_end_bearing_temperature"  # 发电机驱动端轴承温度/发电机低速轴温度
+Field_GeneratorNDE="generatornon_drive_end_bearing_temperature"  # 发电机非驱动端轴承温度/发电机高速轴温度
+Field_MainBearTemp="main_bearing_temperature"  # 主轴承轴承温度
+Field_GbHsBearTemp="gearbox_high_speed_shaft_bearing_temperature"  # 齿轮箱高速轴轴承温度
+Field_GbMsBearTemp="gearboxmedium_speed_shaftbearing_temperature"  # 齿轮箱中速轴轴承温度
+Field_GbLsBearTemp="gearbox_low_speed_shaft_bearing_temperature"  # 齿轮箱低速轴轴承温度
+Field_GenWiTemp1="generator_winding1_temperature"  # 发电机绕组1温度
+Field_GenWiTemp2="generator_winding2_temperature"  # 发电机绕组2温度
+Field_GenWiTemp3="generator_winding3_temperature"  # 发电机绕组3温度
+Field_GbOilTemp="gearbox_oil_temperature"  # 齿轮箱油温
+Field_PCA="power_curve_available"  # 功率曲线可用
+Field_APSet="set_value_of_active_power"  # 有功功率设定值
+Field_NacFbVib="front_back_vibration_of_the_cabin"  # 机舱前后振动
+Field_NacLrVib="side_to_side_vibration_of_the_cabin"  # 机舱左右振动
+Field_StatusOfTurbine="wind_turbine_status"  # 风机状态1
+
+# 故障/告警数据
+Field_DeviceCode="wind_turbine_number" #风机编号
+Field_DeviceName="wind_turbine_name"#风机名称
+Field_FaultTime="time_diff"#故障时长
+Field_FaultDetail="fault_detail"#故障类型
+Field_BeginTime="begin_time"#故障开始时间
+Field_EndTime="end_time"#故障开始时间
+
+
+### 二次计算 ###
+Field_Cp = "cp"
+Field_CpMedian = "cp_median"
+Field_CpMax = "cp_max"
+Field_CpMin = "cp_min"
+Field_TSR = "tsr"
+Field_TSRModified = "tsr_modified"
+Field_TSRMax = "tsr_max"
+Field_TSRMin = "tsr_min"
+Field_TSRMedian = "tsr_median"
+Field_Year="year"
+Field_Month="month"
+Field_UnixYearMonth="monthIntTime"
+Field_YearMonth = "year-month"
+Field_YearMonthDay = "year-month-day"
+Field_PowerFloor= "power_floor"
+Field_Power="power"
+Field_WindSpeedFloor= "wind_speed_floor"
+Field_YawError="yaw_error1"
+Field_LableFlag="lab"
+
+### 风场信息表 	wind_field ###
+Field_PowerFarmCode="field_code"
+Field_PowerFarmName="field_name"
+Field_ProvinceID="province_id"
+Field_ProvinceName="province_name"
+Field_CityID="city_id"
+Field_CityName="city_name"
+Field_CompanyCode="company_code"  # 企业编号
+Field_CreateBy="create_by"  # 创建人
+Field_CreateTime="create_time"  # 创建时间
+Field_AirDensity="density"  # 空气密度-合同功率曲线
+Field_NumberOfTurbine="engine_number"  # 风机数量
+Field_PowerContractURL="power_contract_url"  # 合同功率曲线地址
+Field_RatedPowerSUM="rated_capacity_number"  # 总额定容量-sum机组
+
+### 风机机组信息表 	wind_engine_group ###
+Field_NameOfTurbine="engine_name"
+Field_CodeOfTurbine="engine_code"
+Field_RatedPower="rated_capacity"  # 额定功率
+Field_RatedWindSpeed="rated_wind_speed" # 额定风速
+Field_Elevation="elevation_height"
+Field_HubHeight="hub_height"  # 轮毂高度
+Field_Latitude="latitude"  # wind_engine_mill 也有
+Field_Longitude="longitude"  # wind_engine_mill 也有
+Field_Sightcing="sightcing"  # 是否标杆风机
+
+### 风机机型信息表	wind_engine_mill ###
+Field_RotorDiameter="rotor_diameter" # 叶轮直径
+Field_Brand="brand"  # 品牌名称(风机)
+Field_Combination="combination"  # 组合字段
+Field_MotionType="curved_motion_type"  # 驱动方式
+Field_DelState="del_state"  # 删除方式
+Field_MachineTypeCode="machine_type_code"  # 机型型号
+Field_MillTypeCode="mill_type_code"  # 编号
+Field_ManufacturerCode="manufacturer_code"  # 厂商编号
+Field_ManufacturerName="manufacturer_name"  # 厂商名称
+Field_PowerCriterionURL="power_criterion_url"  # 标准功率曲线地址
+Field_TowerHeight="tower_height"
+Field_VaneLong="vane_long"  # 叶片长度
+Field_RSR="rotational_speed_ratio"  # 传动比-转速比
+Field_CutInWS="rated_cut_in_windspeed"  # 切入风速
+Field_CutOutWS="rated_cut_out_windspeed" # 切出风速
+
+### 测风塔信息 	anemometer_tower -> anemometer_tower_relation  ###
+Field_AnemometerCode="anemometer_code"
+Field_AnemometerName="anemometer_name"
+
+### 数据转换
+Field_TransferType="transfer_type"  # 转换类型 (枚举值:second、minute)
+Field_TimeGranularity="time_granularity"  # 时间粒度(秒)
+
+Field_State="state"
+Field_UpdateTime="update_time"
+Field_UpdateBy="update_by"
+
+Field_Return_TypeAnalyst="typeAnalyst"
+Field_Return_BatchCode="batch_code"
+Field_Return_FilePath="localFilePath"
+Field_Return_IsSaveDatabase:bool=True
+
+class ErrorState(Enum):
+    NotErr=0
+    Err=1     # 异常
+
+class AnalysisState(Enum):
+    NotAnalyzed=-1 # 未分析
+    RequstQueue=10 # 请求队列中
+    Analyzing=20    # 分析中
+    Analyzed=30     # 已分析
+
+class CustomError(Exception):
+    ERROR_CODES = {
+        -1:"未知异常,请联系技术人员排查问题",
+        100: "未获得业务基础数据,或基础数据缺失",
+        101: "未获得业务数据,或业务数据与基础数据不匹配",
+        102: "缺少风电机组运行数据",
+        103: "算法模型未输出结果",
+        104: "缺失场站基础信息",
+        105: "缺失机组基础信息",
+        106: "缺失场站的数据批次信息",
+        107: "场站存在未配置机型信息(额定风速、切入风速、切出风速)的机组",
+        108: "缺失机组的合同功率曲线信息",
+        109: "场站存在未配置额定功率的机组",
+        110: "场站未配置空气密度",
+        111: "场站所属风电机组存在未配置叶轮直径",
+        112: "SCADA数据不包含风速或其全无值",
+        113: "SCADA数据不包含有功功率或其全无值",
+        114: "机组未配置机型信息",
+        115: "机组未配置基础信息",
+    }
+
+    def __init__(self, code,msg=""):
+        self.code = code
+        self.message =f"{msg} {self.ERROR_CODES.get(code, CustomError.ERROR_CODES.get(-1))}"
+        super().__init__(self.message)
+
+    def __str__(self):
+        return f"[Error Code: {self.code}] {self.message}"

+ 14 - 0
dataContract/algorithmContract/configAnalysis.py

@@ -0,0 +1,14 @@
+class ConfigAnalysis:
+    def __init__(self, package: str, className: str, methodName: str,scada:str):
+        self.package = package
+        self.className = className
+        self.methodName = methodName
+        self.scada=scada
+
+    def to_dict(self):
+        return {
+            "package": self.package,
+            "className": self.className,
+            "methodName": self.methodName,
+            "scada":self.scada
+        }

+ 3 - 0
dataContract/algorithmContract/const.py

@@ -0,0 +1,3 @@
+
+DATABASE_BusinessFoundationDb="businessFoundationDb"
+DATABASE_businessDb="businessDb"

+ 199 - 0
dataContract/algorithmContract/contract.py

@@ -0,0 +1,199 @@
+import traceback
+import json
+from algorithmContract.dataContractType import DataContractType
+from algorithmContract.customDataContract import DataContract
+from algorithmContract.dataSource import DataSource
+from algorithmContract.dataFilter import DataFilter
+from algorithmContract.customFilter import CustomFilter
+from algorithmContract.configAnalysis import ConfigAnalysis
+from algorithmContract.graphSet import GraphSet
+
+
+class Contract:
+    def __init__(self, contractType: DataContractType, dataContract: DataContract):
+        self.contractType = contractType
+        self.dataContract = dataContract
+
+    def to_dict(self):
+        return {
+            "dataContractType": self.contractType.to_dict(),
+            "dataContract": self.dataContract.to_dict()
+        }
+
+
+def LoadAnalysisInput(jsonString: str):
+    # Load the JSON string into a Python dictionary
+    try:
+        jsonData = json.loads(jsonString)
+
+        jsonDataType = jsonData["dataContractType"]
+        contractType = DataContractType(
+            jsonDataType["type"], jsonDataType["version"])
+
+        jsonDataContract = jsonData["dataContract"]
+
+        autoOrManual = jsonDataContract["autoOrManual"]
+        # Initialize the entity classes using the JSON data
+        # data_source = DataSource(jsonDataContract["dataSource"]["scada"])
+
+        custom_filters = {}
+        for key, value in jsonDataContract["dataFilter"]["customFilter"].items():
+            custom_filter = CustomFilter(value.get("min"), value.get("max"))
+            custom_filters[key] = custom_filter
+            
+        if (not jsonDataContract["dataFilter"]["turbines"] is None) and (not isinstance(jsonDataContract["dataFilter"]["turbines"],list)):
+              raise Exception("输入参数中机组数据类型不合法.")  
+
+        data_filter = DataFilter(
+            jsonDataContract["dataFilter"]["powerFarmID"],
+            jsonDataContract["dataFilter"]["turbines"],
+            jsonDataContract["dataFilter"]["dataBatchNum"],
+            jsonDataContract["dataFilter"]["beginTime"],
+            jsonDataContract["dataFilter"]["endTime"],
+            jsonDataContract["dataFilter"]["excludingMonths"],
+            custom_filters
+        )
+
+        config_analysis = []
+        for item in jsonDataContract["configAnalysis"]:
+            analysis = ConfigAnalysis(
+                item["package"], item["className"], item["methodName"], item["scada"])
+            config_analysis.append(analysis)
+
+        graph_sets = {}
+        for key, value in jsonDataContract["graphSets"].items():
+            graph_set = GraphSet(value["step"], value["min"], value["max"])
+            graph_sets[key] = graph_set
+
+        data_contract = DataContract(autoOrManual,
+                                     data_filter, config_analysis, graph_sets)
+
+        contract = Contract(contractType, data_contract)
+    except Exception as e:
+        print("exception: {}".format(e))
+        traceback.print_exc()  # 打印异常的堆栈跟踪
+        raise e
+
+    return contract
+
+
+# JSON string representing the data contract
+jsonString = '''
+{
+	"dataContractType": {
+		"type": "analysisExecuteOrder",
+		"version": "1.2.0"
+	},
+	"dataContract": {
+		"autoOrManual":"automatic",
+		"dataSource": {
+			"scada": "second"
+		},
+		"dataFilter": {
+			"powerFarmID": "010-00001",
+			"turbines": [
+				"010-00001-0001",
+				"010-00001-0002"
+			],
+			"dataBatchNum": "B2024042211-0",
+			"beginTime": "2023-01-01 00:00:00",
+			"endTime": "2023-12-31 23:59:59",
+			"excludingMonths": [
+				"2023-12",
+				"2023-09"
+			],
+			"customFilter": {
+				"valueWindSpeed": {
+					"min": 3.0,
+					"max": 25.0
+				},
+				"valuePitchAngle": {
+					"min": 2,
+					"max": null
+				},
+				"valueActivePower": {
+					"min": 10,
+					"max": 2500
+				},
+				"valueGeneratorSpeed": {
+					"min": 10,
+					"max": 2500
+				}
+			}
+		},
+		"configAnalysis": [
+			{
+				"package": "algorithm.powerCurveAnalyst",
+				"className": "PowerCurveAnalyst",
+				"methodName": "executeAnalysis"
+			},
+			{
+				"package": "algorithm.powerScatter2DAnalyst",
+				"className": "PowerScatter2DAnayst",
+				"methodName": "executeAnalysis"
+			},
+			{
+				"package": "algorithm.powerScatterAnalyst",
+				"className": "PowerScatterAnalyst",
+				"methodName": "executeAnalysis"
+			},
+			{
+				"package": "algorithm.windSpeedFrequencyAnalyst",
+				"className": "WindSpeedFrequencyAnalyst",
+				"methodName": "executeAnalysis"
+			},
+			{
+				"package": "algorithm.generatorSpeedPowerAnalyst",
+				"className": "GeneratorSpeedPowerAnalyst",
+				"methodName": "executeAnalysis"
+			}
+		],
+		"graphSets": {
+			"generatorSpeed": {
+				"step": 200,
+				"min": 1000,
+				"max": 2000
+			},
+			"generatorTorque": {
+				"step": 2000,
+				"min": 0,
+				"max": 12000
+			},
+			"cp": {
+				"step": 0.5,
+				"min": 0,
+				"max": 2
+			},
+			"tsr": {
+				"step": 5,
+				"min": 0,
+				"max": 30
+			},
+			"pitchAngle": {
+				"step": 1,
+				"min": -1,
+				"max": 20
+			},
+			"activePower": {
+				"step": 250,
+				"min": 0,
+				"max": 2000
+			}
+		}
+	}
+}
+'''
+
+
+def load():
+    return LoadAnalysisInput(jsonString)
+
+
+def Analysis(contract: Contract):
+    jsonString = ""
+    try:
+        contractDict = contract.to_dict()  # 先转换为字典
+        jsonString = json.dumps(contractDict)
+    except:
+        traceback.print_exc()
+    return jsonString

+ 35 - 0
dataContract/algorithmContract/customDataContract.py

@@ -0,0 +1,35 @@
+import types
+from algorithmContract.dataSource import DataSource
+from algorithmContract.dataFilter import DataFilter
+from algorithmContract.customFilter import CustomFilter
+from algorithmContract.configAnalysis import ConfigAnalysis
+from algorithmContract.graphSet import GraphSet
+
+
+class DataContract:
+    def __init__(self,autoOrManual:str , dataFilter: DataFilter, configAnalysis: list[ConfigAnalysis], graphSets: dict[str,GraphSet]):
+        self.autoOrManual=autoOrManual
+        # self.dataSource = dataSource
+        self.dataFilter = dataFilter
+        self.configAnalysis = configAnalysis
+        self.graphSets = graphSets
+
+    def _graph_sets_to_dict(self, graph_sets):
+        result = {}
+        for key, value in graph_sets.items():
+            if isinstance(value, GraphSet):
+                result[key] = value.to_dict()
+            elif isinstance(value, dict):
+                result[key] = self._graph_sets_to_dict(value)
+            else:
+                raise TypeError(f"Unsupported type for graph set value: {type(value)}")
+        return result
+
+    def to_dict(self):
+        return {
+            "autoOrManual":self.autoOrManual,
+            # "dataSource": self.dataSource.to_dict(),
+            "dataFilter": self.dataFilter.to_dict(),            
+            "configAnalysis": [analysis.to_dict() for analysis in self.configAnalysis],  # 修改这一行
+            "graphSets": {key: graph.to_dict() for key, graph in self.graphSets.items()}  # 假设 graphSets 是一个字典 # self._graph_sets_to_dict(self.graphSets) # 
+        }

+ 7 - 0
dataContract/algorithmContract/customFilter.py

@@ -0,0 +1,7 @@
+class CustomFilter:
+    def __init__(self, min_val:float, max_val:float):
+        self.min = min_val
+        self.max = max_val
+
+    def to_dict(self):
+        return {"min": self.min,"max":self.max}

+ 7 - 0
dataContract/algorithmContract/dataContractType.py

@@ -0,0 +1,7 @@
+class DataContractType:
+    def __init__(self, type:str, version:str):
+        self.type = type
+        self.version = version
+    
+    def to_dict(self):
+        return {"type": self.type,"version":self.version}

+ 23 - 0
dataContract/algorithmContract/dataFilter.py

@@ -0,0 +1,23 @@
+from algorithmContract.customFilter import CustomFilter
+
+
+class DataFilter:
+    def __init__(self, powerFarmID: str, turbines: list, dataBatchNum: str, beginTime: str, endTime: str, excludingMonths: list, customFilter: dict[str,CustomFilter]):
+        self.powerFarmID = powerFarmID
+        self.turbines = turbines
+        self.dataBatchNum = dataBatchNum
+        self.beginTime = beginTime
+        self.endTime = endTime
+        self.excludingMonths = excludingMonths
+        self.customFilter = customFilter
+
+    def to_dict(self):
+        return {
+            "powerFarmID": self.powerFarmID,
+            "turbines": self.turbines,
+            "dataBatchNum": self.dataBatchNum,
+            "beginTime": self.beginTime,
+            "endTime": self.endTime,
+            "excludingMonths": self.excludingMonths,
+            "customFilter": {key: filter.to_dict() for key, filter in self.customFilter.items()}
+        }

+ 6 - 0
dataContract/algorithmContract/dataSource.py

@@ -0,0 +1,6 @@
+class DataSource:
+    def __init__(self, scada:str):
+        self.scada = scada
+        
+    def to_dict(self):
+        return {"scada": self.scada}

+ 8 - 0
dataContract/algorithmContract/graphSet.py

@@ -0,0 +1,8 @@
+class GraphSet:
+    def __init__(self, step:float, min_val:float, max_val:float):
+        self.step = step
+        self.min = min_val
+        self.max = max_val
+
+    def to_dict(self):
+        return {"step": self.step,"min":self.min,"max":self.max}

+ 107 - 0
dataContract/algorithmContract/testDataContract.py

@@ -0,0 +1,107 @@
+import json
+from algorithmContract.contract import LoadAnalysisInput,Analysis
+
+
+
+# JSON string representing the data contract
+jsonString = '''
+{
+	"dataContractType": {
+		"type": "analysisExecuteOrder",
+		"version": "1.2.0"
+	},
+	"dataContract": {
+		"autoOrManual": "automatic",
+		"dataFilter": {
+			"powerFarmID": "WOF01000002",
+			"turbines": null,
+			"dataBatchNum": "zhaoyuan_20240528",
+			"beginTime": "2023-01-01 00:00:00",
+			"endTime": "2024-12-31 23:59:59",
+			"excludingMonths": [
+				"2023-12",
+				"2023-09"
+			],
+			"customFilter": {
+				"valueWindSpeed": {
+					"min": 3.0,
+					"max": 25.0
+				},
+				"valuePitchAngle": {
+					"min": 2,
+					"max": null
+				},
+				"valueActivePower": {
+					"min": 10,
+					"max": 2500
+				},
+				"valueGeneratorSpeed": {
+					"min": 10,
+					"max": 2500
+				}
+			}
+		},
+		"configAnalysis": [
+			{
+				"package": "algorithm.temperatureLargeComponentsAnalyst",
+				"className": "TemperatureLargeComponentsAnalyst",
+				"methodName": "executeAnalysis",
+				"scada": "minute"
+			}
+		],
+		"graphSets": {
+			"directDrive": {
+				"generatorSpeed": {
+					"step": 5,
+					"min": 0,
+					"max": 30
+				},
+				"generatorTorque": {
+					"step": 10000,
+					"min": 0,
+					"max": 100000
+				}
+			},
+			"indirectDrive": {
+				"generatorSpeed": {
+					"step": 200,
+					"min": 1000,
+					"max": 2000
+				},
+				"generatorTorque": {
+					"step": 2000,
+					"min": 0,
+					"max": 12000
+				}
+			},
+			"tsr": {
+				"step": 5,
+				"min": 0,
+				"max": 30
+			},
+			"pitchAngle": {
+				"step": 1,
+				"min": -1,
+				"max": 20
+			},
+			"activePower": {
+				"step": 250,
+				"min": 0,
+				"max": 2000
+			}
+		}
+	}
+}
+'''
+
+
+data=LoadAnalysisInput(jsonString)
+
+print(data.dataContract.graphSets["directDrive"]["generatorSpeed"].step)
+print(data.dataContract.graphSets["directDrive"]["generatorTorque"].step)
+
+string=Analysis(data)
+print(string)
+
+
+

+ 1 - 1
repositoryZN/utils/minioUtil/data/upload/path-to-your-local-file2.txt

@@ -1 +1 @@
-path-to-your-local-file2.txt
+path-to-your-local-file2.txt

+ 0 - 1
wtoaamapi/apps/viewDemo/viewUser.py

@@ -10,7 +10,6 @@ tags = ['user']
  
 class User(ViewSet):
     @swagger_auto_schema(
-        tags=['Demo'],
         operation_description="apiview post description override",
         request_body=openapi.Schema(
             type=openapi.TYPE_OBJECT,