123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411 |
- import os
- import re
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from matplotlib.pyplot import MultipleLocator
- import math
- import pdb
- from algorithmContract.confBusiness import * #将这个包里的全部加载
- intervalPower = 25
- intervalWindspeed = 0.25
- class DataMarker:
-
- #选取时间、风速、功率数据
- def preprocessData(self,dataFrame:pd.DataFrame,confData:ConfBusiness):
- timeStamp = dataFrame[confData.field_turbine_time]
- activePower = dataFrame[confData.field_power]
- windSpeed = dataFrame[confData.field_wind_speed]
- dataFramePartOfSCADA = pd.concat([timeStamp,activePower,windSpeed], axis=1)
- return dataFramePartOfSCADA
-
- #计算分仓数目
- def calculateIntervals(self,activePowerMax, ratedPower, windSpeedCutOut):
- binNumOfPower = math.floor((activePowerMax) / intervalPower) + 1 if (activePowerMax) >= ratedPower else math.floor(ratedPower / intervalPower)
- binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed)
- return binNumOfPower, binNumOfWindSpeed
- def calculateTopP(self,activePowerMax,ratedPower):
-
- TopP = 0
- if activePowerMax >= ratedPower:
- TopP = math.floor((activePowerMax - ratedPower) / intervalPower) + 1
- else:
- TopP = 0
- return TopP
- def chooseData(self,dataFramePartOfSCADA,dataFrame:pd.DataFrame,confData:ConfBusiness):
-
- # 初始化标签列
- SM1 = dataFramePartOfSCADA.shape
- AA1 = SM1[0]
- lab = [[0] for _ in range(AA1)]
- lab = pd.DataFrame(lab,columns=['lab'])
- dataFramePartOfSCADA = pd.concat([dataFramePartOfSCADA,lab],axis=1) #在tpv后加一列标签列
- dataFramePartOfSCADA = dataFramePartOfSCADA.values
- SM = dataFramePartOfSCADA.shape #(52561,4)
- AA = SM[0]
- nCounter1 = 0
- DzMarch809_0 = np.zeros((AA, 3))
- Point_line = np.zeros(AA, dtype=int)
- APower = dataFrame[confData.field_power].values
- WSpeed = dataFrame[confData.field_wind_speed].values
- for i in range(AA):
- if (APower[i] > 10.0) & (WSpeed[i] > 0.0):
- nCounter1 += 1
- DzMarch809_0[nCounter1-1, 0] = WSpeed[i]
- DzMarch809_0[nCounter1-1, 1] = APower[i]
- Point_line[nCounter1-1] = i+1
- if APower[i] <= 10:
- dataFramePartOfSCADA[i,SM[1]-1] = -1
-
- DzMarch809 = DzMarch809_0[:nCounter1, :]
-
- return DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM
- def gridCount(self,binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809):
- # 遍历有效数据
- XBoxNumber = np.ones((binNumOfPower, binNumOfWindSpeed),dtype=int)
- for i in range(nCounter1):
- for m in range(1, binNumOfPower + 1):
- if (DzMarch809[i,1] > (m - 1) * intervalPower) and (DzMarch809[i,1] <= m * intervalPower):
- nWhichP = m
- break
- for n in range(1, binNumOfWindSpeed + 1):
- if (DzMarch809[i, 0] > (n - 1) * intervalWindspeed) and (DzMarch809[i, 0] <= n * intervalWindspeed):
- nWhichV = n
- break
- if (nWhichP > 0) and (nWhichV > 0):
- XBoxNumber[nWhichP - 1][nWhichV - 1] += 1
- for m in range(1,binNumOfPower+1):
- for n in range(1,binNumOfWindSpeed+1):
- XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
-
- return XBoxNumber
- def percentageDots(self,XBoxNumber, binNumOfPower, binNumOfWindSpeed,axis):
-
- BoxPercent = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=float)
- BinSum = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed, 1), dtype=int)
- for i in range(1,1+(binNumOfPower if axis == 'power' else binNumOfWindSpeed)):
- for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):
- BinSum[i-1] = BinSum[i-1] + (XBoxNumber[i-1,m-1] if axis == 'power' else XBoxNumber[m-1,i-1])
- for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):
- if BinSum[i-1]>0:
- if axis == 'power':
- BoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / BinSum[i-1])*100
- else:
- BoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / BinSum[i-1])*100
-
- return BoxPercent,BinSum
- def maxBoxPercentage(self,BoxPercent, binNumOfPower, binNumOfWindSpeed, axis):
-
- BoxMaxIndex = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = int)
- BoxMax = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = float)
- for m in range(1,(binNumOfPower if axis == 'power' else binNumOfWindSpeed)+1):
- BoxMaxIndex[m-1] = (np.argmax(BoxPercent[m-1, :])) if axis == 'power' else (np.argmax(BoxPercent[:, m-1]))
- BoxMax[m-1] = (np.max(BoxPercent[m-1, :]))if axis == 'power' else (np.max(BoxPercent[:, m-1]))
- return BoxMaxIndex, BoxMax
- def extendBoxPercent(self,m, BoxMax,TopP,BoxMaxIndex,BoxPercent,binNumOfPower,binNumOfWindSpeed):
-
- DotDense = np.zeros(binNumOfPower)
- DotDenseLeftRight = np.zeros((binNumOfPower,2))
- DotValve = m
- PDotDenseSum = 0
- for i in range(binNumOfPower - TopP):
- PDotDenseSum = BoxMax[i]
- iSpreadRight = 1
- iSpreadLeft = 1
- while PDotDenseSum < DotValve:
- if (BoxMaxIndex[i] + iSpreadRight) < binNumOfWindSpeed-1-1:
- PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] + iSpreadRight]
- iSpreadRight += 1
- else:
- break
- if (BoxMaxIndex[i] - iSpreadLeft) > 0:
- PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] - iSpreadLeft]
- iSpreadLeft += 1
- else:
- break
- iSpreadRight = iSpreadRight-1
- iSpreadLeft = iSpreadLeft-1
-
- DotDenseLeftRight[i, 0] = iSpreadLeft
- DotDenseLeftRight[i, 1] = iSpreadRight
- DotDense[i] = iSpreadLeft + iSpreadRight + 1
- return DotDenseLeftRight
- def calculatePWidth(self,binNumOfPower,TopP,DotDenseLeftRight,PBinSum):
-
- PowerLimit = np.zeros(binNumOfPower, dtype=int)
- WidthAverage = 0
- WidthAverage_L = 0
- nCounter = 0
- PowerLimitValve = 6
- N_Pcount = 20
- for i in range(binNumOfPower - TopP):
- if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):
- PowerLimit[i] = 1
-
- if DotDenseLeftRight[i, 1] <= PowerLimitValve:
- WidthAverage += DotDenseLeftRight[i, 1]
- WidthAverage_L += DotDenseLeftRight[i,1]
- nCounter += 1
- WidthAverage /= nCounter if nCounter > 0 else 1
- WidthAverage_L /= nCounter if nCounter > 0 else 1
- return WidthAverage, WidthAverage_L,PowerLimit
- def amendMaxBox(self,binNumOfPower,TopP,PowerLimit,BoxMaxIndex):
-
- for i in range(1, binNumOfPower - TopP+1):
- if (PowerLimit[i] == 1) and (abs(BoxMaxIndex[i] - BoxMaxIndex[i - 1]) > 5):
- BoxMaxIndex[i] = BoxMaxIndex[i - 1] + 1
- return BoxMaxIndex
- def markBoxLimit(self,binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,BoxMaxIndex):
-
- BBoxRemove = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)
- for m in range(binNumOfPower - TopP):
- for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
- BBoxRemove[m, n] = 1
- for n in range(int(BoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):
- BBoxRemove[m, n-1] = 2
- return BBoxRemove
- def markBoxPLimit(self,binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,BoxPercent,BoxMaxIndex,mm_value:int,BBoxRemove,nn_value:int):
-
- BBoxLimit = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)
- for i in range(2, binNumOfPower - TopP):
- if PowerLimit[i] == 1:
- BBoxLimit[i, int(BoxMaxIndex[i] + CurveWidthR + 1):binNumOfWindSpeed] = 1
- IsolateValve = 3
- for m in range(binNumOfPower - TopP):
- for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
- if BoxPercent[m, n] < IsolateValve:
- BBoxRemove[m, n] = 1
- for m in range(binNumOfPower - TopP, binNumOfPower):
- for n in range(binNumOfWindSpeed):
- BBoxRemove[m, n] = 3
-
- # 标记功率主带拐点左侧的欠发网格
- for m in range(mm_value - 1, binNumOfPower - TopP):
- for n in range(int(nn_value) - 2):
- BBoxRemove[m, n] = 2
-
- return BBoxLimit
-
- def markData(self,binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1):
-
- DzMarch809Sel = np.zeros(nCounter1, dtype=int)
- nWhichP = 0
- nWhichV = 0
- for i in range(nCounter1):
- for m in range( binNumOfPower ):
- if ((DzMarch809[i,1])> m * intervalPower) and ((DzMarch809[i,1]) <= (m+1) * intervalPower):
- nWhichP = m #m记录的是index
- break
- for n in range( binNumOfWindSpeed ):
- if DzMarch809[i,0] > ((n+1) * intervalWindspeed - intervalWindspeed/2) and DzMarch809[i,0] <= ((n+1) * intervalWindspeed + intervalWindspeed / 2):
- nWhichV = n
- break
- if nWhichP >= 0 and nWhichV >= 0:
- if BBoxRemove[nWhichP, nWhichV] == 1:
- DzMarch809Sel[i] = 1
- elif BBoxRemove[nWhichP, nWhichV] == 2:
- DzMarch809Sel[i] = 2
- elif BBoxRemove[nWhichP , nWhichV] == 3:
- DzMarch809Sel[i] = 0
- return DzMarch809Sel
-
- def windowFilter(self,nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line):
-
- PVLimit = np.zeros((nCounter1, 3))
- nLimitTotal = 0
- nWindowLength = 6
- LimitWindow = np.zeros(nWindowLength)
- UpLimit = 0
- LowLimit = 0
- PowerStd = 30
- nWindowNum = np.floor(nCounter1/nWindowLength)
- PowerLimitUp = ratedPower - 100
- PowerLimitLow = 100
- # 循环遍历每个窗口
- for i in range(int(nWindowNum)):
- start_idx = i * nWindowLength
- end_idx = start_idx + nWindowLength
- LimitWindow = DzMarch809[start_idx:end_idx, 1]
-
- bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)
- if not bAllInAreas:
- continue
-
- UpLimit = LimitWindow[0] + PowerStd
- LowLimit = LimitWindow[0] - PowerStd
-
- bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)
- if bAllInUpLow:
- DzMarch809Sel[start_idx:end_idx] = 4
-
- for j in range(nWindowLength):
- PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]
- PVLimit[nLimitTotal, 2] = Point_line[start_idx + j] # 对数据进行标识
- nLimitTotal += 1
- return PVLimit,nLimitTotal
- def store_points(self,DzMarch809, DzMarch809Sel,Point_line, nCounter1):
-
- PVDot = np.zeros((nCounter1, 3))
- PVBad = np.zeros((nCounter1, 3))
- nCounterPV = 0
- nCounterBad = 0
- for i in range(nCounter1):
- if DzMarch809Sel[i] == 0:
- nCounterPV += 1
- PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]
- PVDot[nCounterPV-1, 2] = Point_line[i]
- elif DzMarch809Sel[i] in [1, 2, 3]:
- nCounterBad += 1
- PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]
- PVBad[nCounterBad-1, 2] = Point_line[i]
-
- return PVDot, nCounterPV,PVBad,nCounterBad
- def markAllData(self,nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit):
- for i in range(nCounterPV):
- dataFramePartOfSCADA[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1
- #坏点
- for i in range(nCounterBad):
- dataFramePartOfSCADA[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5 # 坏点标识
- # 对所有数据中的限电点进行标注
- for i in range(nLimitTotal):
- dataFramePartOfSCADA[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4 # 限电点标识
- return dataFramePartOfSCADA
-
- # 4. 数据可视化
- def plotData(self,turbineName:str,ws:list, ap:list):
- fig = plt.figure()
- plt.scatter(ws, ap, s=1, c='black', marker='.')
- ax = plt.gca()
- ax.xaxis.set_major_locator(MultipleLocator(5))
- ax.yaxis.set_major_locator(MultipleLocator(500))
- plt.title(turbineName)
- plt.xlim((0, 30))
- plt.ylim((0, 2200))
- plt.tick_params(labelsize=8)
- plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8)
- plt.ylabel("P/kW", fontsize=8)
- plt.show()
-
-
- def main(self,confData: ConfBusiness,dataFrame:pd.DataFrame):
- dataFramePartOfSCADA = self.preprocessData(dataFrame, confData)
- powerMax = dataFrame[confData.field_power].max()
- binNumOfPower, binNumOfWindSpeed = self.calculateIntervals(powerMax,confData.rated_power,confData.rated_cut_out_windspeed)
- TopP = self.calculateTopP(powerMax,confData.rated_power)
- # 根据功率阈值对数据进行标签分配
- DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM = self.chooseData(dataFramePartOfSCADA,dataFrame,confData)
- XBoxNumber = self.gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809)
- PBoxPercent,PBinSum = self.percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'power')
- VBoxPercent,VBinSum = self.percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'speed')
- PBoxMaxIndex, PBoxMaxP = self.maxBoxPercentage(PBoxPercent, binNumOfPower, binNumOfWindSpeed, 'power')
- VBoxMaxIndex, VBoxMaxV = self.maxBoxPercentage(VBoxPercent, binNumOfPower, binNumOfWindSpeed, 'speed')
- if PBoxMaxIndex[0] > 14: PBoxMaxIndex[0] = 9
- DotDenseLeftRight = self.extendBoxPercent(90, PBoxMaxP,TopP,PBoxMaxIndex,PBoxPercent,binNumOfPower,binNumOfWindSpeed)
- WidthAverage, WidthAverage_L,PowerLimit = self.calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum)
- PBoxMaxIndex = self.amendMaxBox(binNumOfPower,TopP,PowerLimit,PBoxMaxIndex)
- # 计算功率主带的左右边界
- CurveWidthR = np.ceil(WidthAverage) + 2
- CurveWidthL = np.ceil(WidthAverage_L) + 2
- #确定功率主带的左上拐点,即额定风速位置的网格索引
- CurveTop = np.zeros((2, 1), dtype=int)
- BTopFind = 0
- mm_value = None
- nn_value = None
- for m in range(binNumOfPower - TopP, 0, -1):
- for n in range(int(np.floor(int(confData.rated_cut_in_windspeed) / intervalWindspeed)), binNumOfWindSpeed - 1):
- if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):
- CurveTop[0] = m
- CurveTop[1] = n #[第80个,第40个]
- BTopFind = 1
- mm_value = m
- nn_value = n
- break
- if BTopFind == 1:
- break
- #标记网格
- BBoxRemove = self.markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,PBoxMaxIndex)
- if mm_value is not None and nn_value is not None:
- BBoxLimit = self.markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,PBoxPercent,PBoxMaxIndex,mm_value,BBoxRemove,nn_value)
- DzMarch809Sel = self.markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1)
- PVLimit,nLimitTotal = self.windowFilter(nCounter1,confData.rated_power,DzMarch809,DzMarch809Sel,Point_line)
- #将功率滑动窗口主带平滑化
- nSmooth = 0
- for i in range(binNumOfPower - TopP - 1):
- PVLeftDown = np.zeros(2)
- PVRightUp = np.zeros(2)
- if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:
- # 计算左下和右上顶点的坐标
- PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125
- PVLeftDown[1] = (i) * 25
- PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125
- PVRightUp[1] = (i+1) * 25
-
- for m in range(nCounter1):
- # 检查当前点是否在锯齿区域内
- if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
- # 检查斜率是否大于对角连线
- if ((DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0])) > ((PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0])):
- # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器
- DzMarch809Sel[m] = 0
- nSmooth += 1
- # DzMarch809Sel 数组现在包含了锯齿平滑的选择结果,nSmooth 是选中的点数
- PVDot, nCounterPV,PVBad,nCounterBad = self.store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1)
- #标注
- dataFramePartOfSCADA = self.markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit)
- A = dataFramePartOfSCADA[:,-1]
- A=pd.DataFrame(A,columns=['lab'])
- dataFrame = pd.concat([dataFrame,A],axis=1)
- """
- 标识 说明
- 5 坏点
- 4 限功率点
- 1 好点
- 0 null
- -1 P<=10
- """
- print("lab unique :",dataFrame['lab'].unique())
- # data=dataFrame[dataFrame['lab']==1]
- # self.plotData(data[Field_NameOfTurbine].iloc[0],data[confData.field_wind_speed],data[confData.field_power])
- return dataFrame
-
- if __name__ == '__main__':
- main()
|