import os import re import pandas as pd import numpy as np import matplotlib.pyplot as plt from matplotlib.pyplot import MultipleLocator import math import pdb from algorithmContract.confBusiness import * #将这个包里的全部加载 intervalPower = 25 intervalWindspeed = 0.25 class DataMarker: #选取时间、风速、功率数据 def preprocessData(self,dataFrame:pd.DataFrame,confData:ConfBusiness): timeStamp = dataFrame[confData.field_turbine_time] activePower = dataFrame[confData.field_power] windSpeed = dataFrame[confData.field_wind_speed] dataFramePartOfSCADA = pd.concat([timeStamp,activePower,windSpeed], axis=1) return dataFramePartOfSCADA #计算分仓数目 def calculateIntervals(self,activePowerMax, ratedPower, windSpeedCutOut): binNumOfPower = math.floor((activePowerMax) / intervalPower) + 1 if (activePowerMax) >= ratedPower else math.floor(ratedPower / intervalPower) binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed) return binNumOfPower, binNumOfWindSpeed def calculateTopP(self,activePowerMax,ratedPower): TopP = 0 if activePowerMax >= ratedPower: TopP = math.floor((activePowerMax - ratedPower) / intervalPower) + 1 else: TopP = 0 return TopP def chooseData(self,dataFramePartOfSCADA,dataFrame:pd.DataFrame,confData:ConfBusiness): # 初始化标签列 SM1 = dataFramePartOfSCADA.shape AA1 = SM1[0] lab = [[0] for _ in range(AA1)] lab = pd.DataFrame(lab,columns=['lab']) dataFramePartOfSCADA = pd.concat([dataFramePartOfSCADA,lab],axis=1) #在tpv后加一列标签列 dataFramePartOfSCADA = dataFramePartOfSCADA.values SM = dataFramePartOfSCADA.shape #(52561,4) AA = SM[0] nCounter1 = 0 DzMarch809_0 = np.zeros((AA, 3)) Point_line = np.zeros(AA, dtype=int) APower = dataFrame[confData.field_power].values WSpeed = dataFrame[confData.field_wind_speed].values for i in range(AA): if (APower[i] > 10.0) & (WSpeed[i] > 0.0): nCounter1 += 1 DzMarch809_0[nCounter1-1, 0] = WSpeed[i] DzMarch809_0[nCounter1-1, 1] = APower[i] Point_line[nCounter1-1] = i+1 if APower[i] <= 10: dataFramePartOfSCADA[i,SM[1]-1] = -1 DzMarch809 = DzMarch809_0[:nCounter1, :] return DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM def gridCount(self,binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809): # 遍历有效数据 XBoxNumber = np.ones((binNumOfPower, binNumOfWindSpeed),dtype=int) for i in range(nCounter1): for m in range(1, binNumOfPower + 1): if (DzMarch809[i,1] > (m - 1) * intervalPower) and (DzMarch809[i,1] <= m * intervalPower): nWhichP = m break for n in range(1, binNumOfWindSpeed + 1): if (DzMarch809[i, 0] > (n - 1) * intervalWindspeed) and (DzMarch809[i, 0] <= n * intervalWindspeed): nWhichV = n break if (nWhichP > 0) and (nWhichV > 0): XBoxNumber[nWhichP - 1][nWhichV - 1] += 1 for m in range(1,binNumOfPower+1): for n in range(1,binNumOfWindSpeed+1): XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1 return XBoxNumber def percentageDots(self,XBoxNumber, binNumOfPower, binNumOfWindSpeed,axis): BoxPercent = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=float) BinSum = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed, 1), dtype=int) for i in range(1,1+(binNumOfPower if axis == 'power' else binNumOfWindSpeed)): for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1): BinSum[i-1] = BinSum[i-1] + (XBoxNumber[i-1,m-1] if axis == 'power' else XBoxNumber[m-1,i-1]) for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1): if BinSum[i-1]>0: if axis == 'power': BoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / BinSum[i-1])*100 else: BoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / BinSum[i-1])*100 return BoxPercent,BinSum def maxBoxPercentage(self,BoxPercent, binNumOfPower, binNumOfWindSpeed, axis): BoxMaxIndex = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = int) BoxMax = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = float) for m in range(1,(binNumOfPower if axis == 'power' else binNumOfWindSpeed)+1): BoxMaxIndex[m-1] = (np.argmax(BoxPercent[m-1, :])) if axis == 'power' else (np.argmax(BoxPercent[:, m-1])) BoxMax[m-1] = (np.max(BoxPercent[m-1, :]))if axis == 'power' else (np.max(BoxPercent[:, m-1])) return BoxMaxIndex, BoxMax def extendBoxPercent(self,m, BoxMax,TopP,BoxMaxIndex,BoxPercent,binNumOfPower,binNumOfWindSpeed): DotDense = np.zeros(binNumOfPower) DotDenseLeftRight = np.zeros((binNumOfPower,2)) DotValve = m PDotDenseSum = 0 for i in range(binNumOfPower - TopP): PDotDenseSum = BoxMax[i] iSpreadRight = 1 iSpreadLeft = 1 while PDotDenseSum < DotValve: if (BoxMaxIndex[i] + iSpreadRight) < binNumOfWindSpeed-1-1: PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] + iSpreadRight] iSpreadRight += 1 else: break if (BoxMaxIndex[i] - iSpreadLeft) > 0: PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] - iSpreadLeft] iSpreadLeft += 1 else: break iSpreadRight = iSpreadRight-1 iSpreadLeft = iSpreadLeft-1 DotDenseLeftRight[i, 0] = iSpreadLeft DotDenseLeftRight[i, 1] = iSpreadRight DotDense[i] = iSpreadLeft + iSpreadRight + 1 return DotDenseLeftRight def calculatePWidth(self,binNumOfPower,TopP,DotDenseLeftRight,PBinSum): PowerLimit = np.zeros(binNumOfPower, dtype=int) WidthAverage = 0 WidthAverage_L = 0 nCounter = 0 PowerLimitValve = 6 N_Pcount = 20 for i in range(binNumOfPower - TopP): if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount): PowerLimit[i] = 1 if DotDenseLeftRight[i, 1] <= PowerLimitValve: WidthAverage += DotDenseLeftRight[i, 1] WidthAverage_L += DotDenseLeftRight[i,1] nCounter += 1 WidthAverage /= nCounter if nCounter > 0 else 1 WidthAverage_L /= nCounter if nCounter > 0 else 1 return WidthAverage, WidthAverage_L,PowerLimit def amendMaxBox(self,binNumOfPower,TopP,PowerLimit,BoxMaxIndex): for i in range(1, binNumOfPower - TopP+1): if (PowerLimit[i] == 1) and (abs(BoxMaxIndex[i] - BoxMaxIndex[i - 1]) > 5): BoxMaxIndex[i] = BoxMaxIndex[i - 1] + 1 return BoxMaxIndex def markBoxLimit(self,binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,BoxMaxIndex): BBoxRemove = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int) for m in range(binNumOfPower - TopP): for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed): BBoxRemove[m, n] = 1 for n in range(int(BoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1): BBoxRemove[m, n-1] = 2 return BBoxRemove def markBoxPLimit(self,binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,BoxPercent,BoxMaxIndex,mm_value:int,BBoxRemove,nn_value:int): BBoxLimit = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int) for i in range(2, binNumOfPower - TopP): if PowerLimit[i] == 1: BBoxLimit[i, int(BoxMaxIndex[i] + CurveWidthR + 1):binNumOfWindSpeed] = 1 IsolateValve = 3 for m in range(binNumOfPower - TopP): for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed): if BoxPercent[m, n] < IsolateValve: BBoxRemove[m, n] = 1 for m in range(binNumOfPower - TopP, binNumOfPower): for n in range(binNumOfWindSpeed): BBoxRemove[m, n] = 3 # 标记功率主带拐点左侧的欠发网格 for m in range(mm_value - 1, binNumOfPower - TopP): for n in range(int(nn_value) - 2): BBoxRemove[m, n] = 2 return BBoxLimit def markData(self,binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1): DzMarch809Sel = np.zeros(nCounter1, dtype=int) nWhichP = 0 nWhichV = 0 for i in range(nCounter1): for m in range( binNumOfPower ): if ((DzMarch809[i,1])> m * intervalPower) and ((DzMarch809[i,1]) <= (m+1) * intervalPower): nWhichP = m #m记录的是index break for n in range( binNumOfWindSpeed ): if DzMarch809[i,0] > ((n+1) * intervalWindspeed - intervalWindspeed/2) and DzMarch809[i,0] <= ((n+1) * intervalWindspeed + intervalWindspeed / 2): nWhichV = n break if nWhichP >= 0 and nWhichV >= 0: if BBoxRemove[nWhichP, nWhichV] == 1: DzMarch809Sel[i] = 1 elif BBoxRemove[nWhichP, nWhichV] == 2: DzMarch809Sel[i] = 2 elif BBoxRemove[nWhichP , nWhichV] == 3: DzMarch809Sel[i] = 0 return DzMarch809Sel def windowFilter(self,nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line): PVLimit = np.zeros((nCounter1, 3)) nLimitTotal = 0 nWindowLength = 6 LimitWindow = np.zeros(nWindowLength) UpLimit = 0 LowLimit = 0 PowerStd = 30 nWindowNum = np.floor(nCounter1/nWindowLength) PowerLimitUp = ratedPower - 100 PowerLimitLow = 100 # 循环遍历每个窗口 for i in range(int(nWindowNum)): start_idx = i * nWindowLength end_idx = start_idx + nWindowLength LimitWindow = DzMarch809[start_idx:end_idx, 1] bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp) if not bAllInAreas: continue UpLimit = LimitWindow[0] + PowerStd LowLimit = LimitWindow[0] - PowerStd bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit) if bAllInUpLow: DzMarch809Sel[start_idx:end_idx] = 4 for j in range(nWindowLength): PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2] PVLimit[nLimitTotal, 2] = Point_line[start_idx + j] # 对数据进行标识 nLimitTotal += 1 return PVLimit,nLimitTotal def store_points(self,DzMarch809, DzMarch809Sel,Point_line, nCounter1): PVDot = np.zeros((nCounter1, 3)) PVBad = np.zeros((nCounter1, 3)) nCounterPV = 0 nCounterBad = 0 for i in range(nCounter1): if DzMarch809Sel[i] == 0: nCounterPV += 1 PVDot[nCounterPV-1, :2] = DzMarch809[i, :2] PVDot[nCounterPV-1, 2] = Point_line[i] elif DzMarch809Sel[i] in [1, 2, 3]: nCounterBad += 1 PVBad[nCounterBad-1, :2] = DzMarch809[i, :2] PVBad[nCounterBad-1, 2] = Point_line[i] return PVDot, nCounterPV,PVBad,nCounterBad def markAllData(self,nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit): for i in range(nCounterPV): dataFramePartOfSCADA[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1 #坏点 for i in range(nCounterBad): dataFramePartOfSCADA[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5 # 坏点标识 # 对所有数据中的限电点进行标注 for i in range(nLimitTotal): dataFramePartOfSCADA[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4 # 限电点标识 return dataFramePartOfSCADA # 4. 数据可视化 def plotData(self,turbineName:str,ws:list, ap:list): fig = plt.figure() plt.scatter(ws, ap, s=1, c='black', marker='.') ax = plt.gca() ax.xaxis.set_major_locator(MultipleLocator(5)) ax.yaxis.set_major_locator(MultipleLocator(500)) plt.title(turbineName) plt.xlim((0, 30)) plt.ylim((0, 2200)) plt.tick_params(labelsize=8) plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8) plt.ylabel("P/kW", fontsize=8) plt.show() def main(self,confData: ConfBusiness,dataFrame:pd.DataFrame): dataFramePartOfSCADA = self.preprocessData(dataFrame, confData) powerMax = dataFrame[confData.field_power].max() binNumOfPower, binNumOfWindSpeed = self.calculateIntervals(powerMax,confData.rated_power,confData.rated_cut_out_windspeed) TopP = self.calculateTopP(powerMax,confData.rated_power) # 根据功率阈值对数据进行标签分配 DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM = self.chooseData(dataFramePartOfSCADA,dataFrame,confData) XBoxNumber = self.gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809) PBoxPercent,PBinSum = self.percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'power') VBoxPercent,VBinSum = self.percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'speed') PBoxMaxIndex, PBoxMaxP = self.maxBoxPercentage(PBoxPercent, binNumOfPower, binNumOfWindSpeed, 'power') VBoxMaxIndex, VBoxMaxV = self.maxBoxPercentage(VBoxPercent, binNumOfPower, binNumOfWindSpeed, 'speed') if PBoxMaxIndex[0] > 14: PBoxMaxIndex[0] = 9 DotDenseLeftRight = self.extendBoxPercent(90, PBoxMaxP,TopP,PBoxMaxIndex,PBoxPercent,binNumOfPower,binNumOfWindSpeed) WidthAverage, WidthAverage_L,PowerLimit = self.calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum) PBoxMaxIndex = self.amendMaxBox(binNumOfPower,TopP,PowerLimit,PBoxMaxIndex) # 计算功率主带的左右边界 CurveWidthR = np.ceil(WidthAverage) + 2 CurveWidthL = np.ceil(WidthAverage_L) + 2 #确定功率主带的左上拐点,即额定风速位置的网格索引 CurveTop = np.zeros((2, 1), dtype=int) BTopFind = 0 mm_value = None nn_value = None for m in range(binNumOfPower - TopP, 0, -1): for n in range(int(np.floor(int(confData.rated_cut_in_windspeed) / intervalWindspeed)), binNumOfWindSpeed - 1): if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3): CurveTop[0] = m CurveTop[1] = n #[第80个,第40个] BTopFind = 1 mm_value = m nn_value = n break if BTopFind == 1: break #标记网格 BBoxRemove = self.markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,PBoxMaxIndex) if mm_value is not None and nn_value is not None: BBoxLimit = self.markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,PBoxPercent,PBoxMaxIndex,mm_value,BBoxRemove,nn_value) DzMarch809Sel = self.markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1) PVLimit,nLimitTotal = self.windowFilter(nCounter1,confData.rated_power,DzMarch809,DzMarch809Sel,Point_line) #将功率滑动窗口主带平滑化 nSmooth = 0 for i in range(binNumOfPower - TopP - 1): PVLeftDown = np.zeros(2) PVRightUp = np.zeros(2) if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1: # 计算左下和右上顶点的坐标 PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125 PVLeftDown[1] = (i) * 25 PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125 PVRightUp[1] = (i+1) * 25 for m in range(nCounter1): # 检查当前点是否在锯齿区域内 if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]): # 检查斜率是否大于对角连线 if ((DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0])) > ((PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0])): # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器 DzMarch809Sel[m] = 0 nSmooth += 1 # DzMarch809Sel 数组现在包含了锯齿平滑的选择结果,nSmooth 是选中的点数 PVDot, nCounterPV,PVBad,nCounterBad = self.store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1) #标注 dataFramePartOfSCADA = self.markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit) A = dataFramePartOfSCADA[:,-1] A=pd.DataFrame(A,columns=['lab']) dataFrame = pd.concat([dataFrame,A],axis=1) """ 标识 说明 5 坏点 4 限功率点 1 好点 0 null -1 P<=10 """ print("lab unique :",dataFrame['lab'].unique()) # data=dataFrame[dataFrame['lab']==1] # self.plotData(data[Field_NameOfTurbine].iloc[0],data[confData.field_wind_speed],data[confData.field_power]) return dataFrame if __name__ == '__main__': main()