|
|
@@ -0,0 +1,632 @@
|
|
|
+import os
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+from matplotlib.pyplot import MultipleLocator
|
|
|
+import math
|
|
|
+import pdb
|
|
|
+# pdb.set_trace() # 设置断点
|
|
|
+
|
|
|
+intervalPower = 25 # For example
|
|
|
+intervalWindspeed = 0.25 # For example
|
|
|
+
|
|
|
+fieldRatedPower="额定功率"
|
|
|
+fieldRatedWindSpeed="额定风速"
|
|
|
+fieldWindSpeedCutIn="切入风速"
|
|
|
+fieldWindSpeedCutOut="切出风速"
|
|
|
+
|
|
|
+fieldTime="时间"
|
|
|
+fieldWindSpeed="风速"
|
|
|
+fieldActivePower="变频器电网侧有功功率"
|
|
|
+fieldLabel="lab"
|
|
|
+
|
|
|
+# 1. 数据加载和预处理函数
|
|
|
+def loadData(filePathSCADA:str, filePathTurbineInfo:str):
|
|
|
+ dataFrameSCADA = pd.read_csv(filePathSCADA, encoding="utf-8")
|
|
|
+ dataFrameTurbineInfo = pd.read_csv(filePathTurbineInfo)
|
|
|
+ return dataFrameSCADA, dataFrameTurbineInfo
|
|
|
+
|
|
|
+def extractTurbineParameters(turbineInfo:pd.DataFrame):
|
|
|
+ """
|
|
|
+ 解析风电机组参数
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ turbineInfo 风电机组信息DataFrame
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ PRated 额定功率(kw)
|
|
|
+ VCutOut 切出风速(m/s)
|
|
|
+ VCutIn 切入风速(m/s)
|
|
|
+ VRated 额定风速(m/s)
|
|
|
+ """
|
|
|
+ ratedPower = turbineInfo.loc[:, [fieldRatedPower]].values
|
|
|
+ windSpeedCutIn = turbineInfo.loc[:, [fieldWindSpeedCutIn]].values
|
|
|
+ windSpeedCutOut = turbineInfo.loc[:, [fieldWindSpeedCutOut]].values
|
|
|
+ ratedWindSpeed = turbineInfo.loc[:, [fieldRatedWindSpeed]].values
|
|
|
+
|
|
|
+ return ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed
|
|
|
+
|
|
|
+def preprocessData(dataFrameOfSCADA:pd.DataFrame):
|
|
|
+ """
|
|
|
+ 获取机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ dataFrameOfSCADA 机组SCADA数据
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ 由机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
|
|
|
+
|
|
|
+ """
|
|
|
+ timeStamp = dataFrameOfSCADA.loc[:, ['时间']]
|
|
|
+ activePower = dataFrameOfSCADA.loc[:, ['变频器电网侧有功功率']]
|
|
|
+ windSpeed = dataFrameOfSCADA.loc[:, ['风速']]
|
|
|
+ dataFramePartOfSCADA = pd.concat([timeStamp,activePower,windSpeed], axis=1)
|
|
|
+
|
|
|
+ # dataFramePartOfSCADA[fieldLabel]=0
|
|
|
+ # dataFramePartOfSCADA[fieldLabel]=dataFramePartOfSCADA[fieldLabel].astype(int)
|
|
|
+
|
|
|
+ return dataFramePartOfSCADA
|
|
|
+
|
|
|
+
|
|
|
+# 2. 数据标签分配和分箱计算
|
|
|
+def calculateIntervals(activePowerMax, ratedPower, windSpeedCutOut):
|
|
|
+ """
|
|
|
+ 按有功功率(以25kw为间隔)、风速(以0.25m/s为间隔)分仓
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ max_power 当前机组的有功功率最大值
|
|
|
+ PRated 机组额定功率
|
|
|
+ wind_speed_cutout 切出风速
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ interval_power 有功功率分仓间隔
|
|
|
+ interval_windspeed 风速分仓间隔
|
|
|
+ PNum 有功功率分仓数量
|
|
|
+ VNum 风速分仓数量
|
|
|
+ """
|
|
|
+ binNumOfPower = math.floor(activePowerMax / intervalPower) + 1 if activePowerMax >= ratedPower else math.floor(ratedPower / intervalPower)
|
|
|
+ binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed)
|
|
|
+
|
|
|
+ return binNumOfPower, binNumOfWindSpeed
|
|
|
+
|
|
|
+def calculateTopP(activePowerMax,ratedPower):
|
|
|
+ """
|
|
|
+ 计算额定功率以上功率仓的个数
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ max_power 当前机组的有功功率最大值
|
|
|
+ PRated 机组额定功率
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ TopP 额定功率以上功率仓的个数
|
|
|
+ """
|
|
|
+ TopP = 0
|
|
|
+ if activePowerMax >= ratedPower:
|
|
|
+ TopP = math.floor((activePowerMax - ratedPower) / intervalPower) + 1
|
|
|
+ else:
|
|
|
+ TopP = 0
|
|
|
+ return TopP
|
|
|
+
|
|
|
+def chooseData(dataFramePartOfSCADA:pd.DataFrame, dataFrameOfSCADA):
|
|
|
+ """
|
|
|
+ 根据特定条件对数据进行标签分配,例如功率和风速阈值。
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ dataFramePartOfSCADA (DataFrame): 包含时间和功率和风速数据的DataFrame。
|
|
|
+ dataFrameOfSCADA: 原始数据
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ DzMarch809: array:V P lab: 38181。
|
|
|
+ nCounter1: 个数
|
|
|
+ dataFramePartOfSCADA:
|
|
|
+ """
|
|
|
+ # 初始化标签列
|
|
|
+ SM1 = dataFramePartOfSCADA.shape #(52561,3)
|
|
|
+ AA1 = SM1[0]
|
|
|
+ lab = [[0] for _ in range(AA1)]
|
|
|
+ lab = pd.DataFrame(lab,columns=['lab'])
|
|
|
+ dataFramePartOfSCADA = pd.concat([dataFramePartOfSCADA,lab],axis=1) #在tpv后加一列标签列
|
|
|
+ dataFramePartOfSCADA = dataFramePartOfSCADA.values
|
|
|
+ SM = dataFramePartOfSCADA.shape #(52561,4)
|
|
|
+ AA = SM[0]
|
|
|
+ nCounter1 = 0
|
|
|
+ DzMarch809_0 = np.zeros((AA, 3))
|
|
|
+ Point_line = np.zeros(AA, dtype=int)
|
|
|
+ APower = dataFrameOfSCADA[fieldActivePower]
|
|
|
+ WSpeed = dataFrameOfSCADA[fieldWindSpeed]
|
|
|
+
|
|
|
+ for i in range(AA):
|
|
|
+ if (APower[i] > 10) & (WSpeed[i] > 0):
|
|
|
+ nCounter1 += 1
|
|
|
+ DzMarch809_0[nCounter1-1, 0] = WSpeed[i]
|
|
|
+ DzMarch809_0[nCounter1-1, 1] = APower[i]
|
|
|
+ Point_line[nCounter1-1] = i+1
|
|
|
+ if APower[i] <= 10:
|
|
|
+ dataFramePartOfSCADA[i,SM[1]-1] = -1
|
|
|
+ DzMarch809 = DzMarch809_0[:nCounter1, :]
|
|
|
+ return DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM
|
|
|
+
|
|
|
+def gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809):
|
|
|
+ """
|
|
|
+ 统计各网格中落入label!=-1的数据点个数
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ binNumOfWindSpeed: 风速分仓个数。
|
|
|
+ binNumOfPower: 功率分仓个数。
|
|
|
+ DataFrame: 带有新的'label'列的原始DataFrame。
|
|
|
+ nCounter1: 数据个数
|
|
|
+ DzMarch809
|
|
|
+ 返回:
|
|
|
+ XBoxNumber: 各网格中落入label!=-1的数据点个数的array。
|
|
|
+ """
|
|
|
+ # 遍历有效数据
|
|
|
+ XBoxNumber = np.ones((binNumOfPower, binNumOfWindSpeed),dtype=int)
|
|
|
+ for i in range(nCounter1):
|
|
|
+ for m in range(1, binNumOfPower + 1):
|
|
|
+ if (DzMarch809[i,1] > (m - 1) * intervalPower) and (DzMarch809[i,1] <= m * intervalPower):
|
|
|
+ nWhichP = m
|
|
|
+ break
|
|
|
+ for n in range(1, binNumOfWindSpeed + 1):
|
|
|
+ if (DzMarch809[i, 0] > (n - 1) * intervalWindspeed) and (DzMarch809[i, 0] <= n * intervalWindspeed):
|
|
|
+ nWhichV = n
|
|
|
+ break
|
|
|
+ if (nWhichP > 0) and (nWhichV > 0):
|
|
|
+ XBoxNumber[nWhichP - 1][nWhichV - 1] += 1
|
|
|
+ for m in range(1,binNumOfPower+1):
|
|
|
+ for n in range(1,binNumOfWindSpeed+1):
|
|
|
+ XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
|
|
|
+
|
|
|
+ return XBoxNumber
|
|
|
+
|
|
|
+def percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed,axis):
|
|
|
+ """
|
|
|
+ 计算分仓(水平/竖直)后每个网格占百分比
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ XBoxNumber: 各网格中落入label!=-1的数据点个数的array。
|
|
|
+ binNumOfPower: 功率分仓个数。
|
|
|
+ binNumOfWindSpeed: 风速分仓个数。
|
|
|
+ axis: "power"or"speed"分仓
|
|
|
+ 返回:
|
|
|
+ BoxPercent: 占比情况array。
|
|
|
+ """
|
|
|
+ BoxPercent = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=float)
|
|
|
+ BinSum = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed, 1), dtype=int)
|
|
|
+ for i in range(1,1+(binNumOfPower if axis == 'power' else binNumOfWindSpeed)):
|
|
|
+ for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):
|
|
|
+ BinSum[i-1] = BinSum[i-1] + (XBoxNumber[i-1,m-1] if axis == 'power' else XBoxNumber[m-1,i-1])
|
|
|
+ for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):
|
|
|
+ if BinSum[i-1]>0:
|
|
|
+ if axis == 'power':
|
|
|
+ BoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / BinSum[i-1])*100
|
|
|
+ else:
|
|
|
+ BoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / BinSum[i-1])*100
|
|
|
+
|
|
|
+ return BoxPercent,BinSum
|
|
|
+
|
|
|
+def maxBoxPercentage(BoxPercent, binNumOfPower, binNumOfWindSpeed, axis):
|
|
|
+ """
|
|
|
+ 计算分仓(水平/竖直)后占百分比最大的网格索引及值
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ BoxPercent: 占比情况array。
|
|
|
+ binNumOfPower: 功率分仓个数。
|
|
|
+ binNumOfWindSpeed: 风速分仓个数。
|
|
|
+ axis: "power"or"speed"分仓
|
|
|
+ 返回:
|
|
|
+ BoxMaxIndex: 占百分比最大的网格索引。
|
|
|
+ BoxMax: 占百分比最大的网格值
|
|
|
+ """
|
|
|
+ BoxMaxIndex = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = int)
|
|
|
+ BoxMax = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = float)
|
|
|
+ for m in range(1,(binNumOfPower if axis == 'power' else binNumOfWindSpeed)+1):
|
|
|
+ BoxMaxIndex[m-1] = (np.argmax(BoxPercent[m-1, :])) if axis == 'power' else (np.argmax(BoxPercent[:, m-1]))
|
|
|
+ BoxMax[m-1] = (np.max(BoxPercent[m-1, :]))if axis == 'power' else (np.max(BoxPercent[:, m-1]))
|
|
|
+
|
|
|
+ return BoxMaxIndex, BoxMax
|
|
|
+
|
|
|
+def extendBoxPercent(m, BoxMax,TopP,BoxMaxIndex,BoxPercent,binNumOfPower,binNumOfWindSpeed):
|
|
|
+ """
|
|
|
+ 以中心最大水平功率带为基准,向两侧对称扩展网格,使网格散点百分比总值达到阈值m
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ m: 设定总和百分比阈值。
|
|
|
+ BoxMax: 占百分比最大的网格值。
|
|
|
+ TopP: 额定功率以上功率仓个数。
|
|
|
+ BoxMaxIndex: 占百分比最大的网格索引。
|
|
|
+ BoxPercent: 占比情况array。
|
|
|
+ binNumOfPower: 功率分仓个数。
|
|
|
+ binNumOfWindSpeed: 风速分仓个数。
|
|
|
+ 返回:
|
|
|
+ DotDense: 每个功率仓内网格的个数。
|
|
|
+ DotDenseLeftRight: 向左向右拓展的网格个数
|
|
|
+ """
|
|
|
+ DotDense = np.zeros(binNumOfPower)
|
|
|
+ DotDenseLeftRight = np.zeros((binNumOfPower,2))
|
|
|
+ DotValve = m
|
|
|
+ PDotDenseSum = 0
|
|
|
+ for i in range(binNumOfPower - TopP):
|
|
|
+ PDotDenseSum = BoxMax[i]
|
|
|
+ iSpreadRight = 1
|
|
|
+ iSpreadLeft = 1
|
|
|
+ while PDotDenseSum < DotValve:
|
|
|
+ if (BoxMaxIndex[i] + iSpreadRight) < binNumOfWindSpeed-1-1:
|
|
|
+ PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] + iSpreadRight]
|
|
|
+ iSpreadRight += 1
|
|
|
+ else:
|
|
|
+ break
|
|
|
+ if (BoxMaxIndex[i] - iSpreadLeft) > 0:
|
|
|
+ PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] - iSpreadLeft]
|
|
|
+ iSpreadLeft += 1
|
|
|
+ else:
|
|
|
+ break
|
|
|
+ iSpreadRight = iSpreadRight-1
|
|
|
+ iSpreadLeft = iSpreadLeft-1
|
|
|
+
|
|
|
+ DotDenseLeftRight[i, 0] = iSpreadLeft
|
|
|
+ DotDenseLeftRight[i, 1] = iSpreadRight
|
|
|
+ DotDense[i] = iSpreadLeft + iSpreadRight + 1
|
|
|
+
|
|
|
+ return DotDenseLeftRight
|
|
|
+
|
|
|
+def calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum):
|
|
|
+ """
|
|
|
+ 计算功率主带的平均宽度
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ binNumOfPower: 功率分仓个数。
|
|
|
+ TopP: 额定功率以上功率仓个数。
|
|
|
+ DotDenseLeftRight: 向左向右拓展的网格个数
|
|
|
+ PBinSum: 功率仓内数据点总和
|
|
|
+ 返回:
|
|
|
+ DotDense: 每个功率仓内网格的个数。
|
|
|
+ DotDenseLeftRight: 向左向右拓展的网格个数
|
|
|
+ PowerLimit: 各水平功率带是否为限功率标识,1:是;0:不是
|
|
|
+ """
|
|
|
+
|
|
|
+ PowerLimit = np.zeros(binNumOfPower, dtype=int)
|
|
|
+ WidthAverage = 0
|
|
|
+ WidthAverage_L = 0
|
|
|
+ nCounter = 0
|
|
|
+ PowerLimitValve = 6
|
|
|
+ N_Pcount = 20
|
|
|
+ for i in range(binNumOfPower - TopP):
|
|
|
+ if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):
|
|
|
+ PowerLimit[i] = 1
|
|
|
+
|
|
|
+ if DotDenseLeftRight[i, 1] <= PowerLimitValve:
|
|
|
+ WidthAverage += DotDenseLeftRight[i, 1]
|
|
|
+ WidthAverage_L += DotDenseLeftRight[i,1]
|
|
|
+ nCounter += 1
|
|
|
+ WidthAverage /= nCounter if nCounter > 0 else 1
|
|
|
+ WidthAverage_L /= nCounter if nCounter > 0 else 1
|
|
|
+
|
|
|
+ return WidthAverage, WidthAverage_L,PowerLimit
|
|
|
+
|
|
|
+def amendMaxBox(binNumOfPower,TopP,PowerLimit,BoxMaxIndex):
|
|
|
+ """
|
|
|
+ 对限负荷水平功率带的最大网格进行修正
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ binNumOfPower: 功率分仓个数。
|
|
|
+ TopP: 额定功率以上功率仓个数。
|
|
|
+ PowerLimit:标识限功率水平功率带,1:是;0:不是
|
|
|
+ BoxMaxIndex: 占百分比最大的网格索引
|
|
|
+ 返回:
|
|
|
+ BoxMaxIndex: 修正后的最大占比网格索引
|
|
|
+ """
|
|
|
+
|
|
|
+ for i in range(1, binNumOfPower - TopP+1):
|
|
|
+ if (PowerLimit[i] == 1) and (abs(BoxMaxIndex[i] - BoxMaxIndex[i - 1]) > 5):
|
|
|
+ BoxMaxIndex[i] = BoxMaxIndex[i - 1] + 1
|
|
|
+
|
|
|
+ return BoxMaxIndex
|
|
|
+
|
|
|
+def markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,BoxMaxIndex):
|
|
|
+ '''
|
|
|
+ 标记需剔除的网格
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ binNumOfPower: 功率分仓个数。
|
|
|
+ binNumOfWindSpeed:风速分仓个数
|
|
|
+ TopP: 额定功率以上功率仓个数。
|
|
|
+ CurveWidthR:功率主带轮廓
|
|
|
+ CurveWidthL
|
|
|
+ BoxMaxIndex: 修正后的最大占比网格索引
|
|
|
+ 返回:
|
|
|
+ BBoxRemove: 标识需剔除的网格
|
|
|
+ '''
|
|
|
+ BBoxRemove = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)
|
|
|
+ for m in range(binNumOfPower - TopP):
|
|
|
+ for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
|
|
|
+ BBoxRemove[m, n] = 1
|
|
|
+ for n in range(int(BoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):
|
|
|
+ BBoxRemove[m, n-1] = 2
|
|
|
+ return BBoxRemove
|
|
|
+
|
|
|
+def markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,BoxPercent,BoxMaxIndex,mm,BBoxRemove,nn):
|
|
|
+ '''
|
|
|
+ 标记限功率网格
|
|
|
+ 1:右侧欠发 2:左侧超发 3:额定功率以上超发
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ binNumOfPower: 功率分仓个数。
|
|
|
+ binNumOfWindSpeed:风速分仓个数
|
|
|
+ TopP: 额定功率以上功率仓个数。
|
|
|
+ CurveWidthR:功率主带轮廓
|
|
|
+ PowerLimit: 标识限功率水平功率带,1:是;0:不是
|
|
|
+ BoxMaxIndex: 修正后的最大占比网格索引
|
|
|
+ mm: 拐点所在功率仓
|
|
|
+ BBoxRemove:需剔除的网格
|
|
|
+ CurveTop1:拐点对应列
|
|
|
+ 返回:
|
|
|
+ BBoxLimit:标识限功率网格
|
|
|
+ '''
|
|
|
+ BBoxLimit = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)
|
|
|
+ for i in range(2, binNumOfPower - TopP):
|
|
|
+ if PowerLimit[i] == 1:
|
|
|
+ BBoxLimit[i, int(BoxMaxIndex[i] + CurveWidthR + 1):binNumOfWindSpeed] = 1
|
|
|
+ IsolateValve = 3
|
|
|
+ for m in range(binNumOfPower - TopP):
|
|
|
+ for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
|
|
|
+ if BoxPercent[m, n] < IsolateValve:
|
|
|
+ BBoxRemove[m, n] = 1
|
|
|
+
|
|
|
+ for m in range(binNumOfPower - TopP, binNumOfPower):
|
|
|
+ for n in range(binNumOfWindSpeed):
|
|
|
+ BBoxRemove[m, n] = 3
|
|
|
+
|
|
|
+ # 标记功率主带拐点左侧的欠发网格
|
|
|
+ for m in range(mm-1, binNumOfPower - TopP):
|
|
|
+ for n in range(int(nn) - 2):
|
|
|
+ BBoxRemove[m, n] = 2
|
|
|
+
|
|
|
+ return BBoxLimit
|
|
|
+
|
|
|
+def markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1):
|
|
|
+ '''
|
|
|
+ 根据网格标识来标记数据点
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ nCounter1
|
|
|
+ binNumOfPower: 功率分仓个数。
|
|
|
+ binNumOfWindSpeed:风速分仓个数
|
|
|
+ DzMarch809: array V P lab: 38181。
|
|
|
+ BBoxRemove:需剔除的网格
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ DzMarch809Sel:数组现在包含了每个数据点的标识
|
|
|
+ '''
|
|
|
+ DzMarch809Sel = np.zeros(nCounter1, dtype=int)
|
|
|
+ nWhichP = 0
|
|
|
+ nWhichV = 0
|
|
|
+ for i in range(nCounter1):
|
|
|
+ for m in range( binNumOfPower ):
|
|
|
+ if ((DzMarch809[i,1])> m * intervalPower) and ((DzMarch809[i,1]) <= (m+1) * intervalPower):
|
|
|
+ nWhichP = m #m记录的是index
|
|
|
+ break
|
|
|
+ for n in range( binNumOfWindSpeed ):
|
|
|
+ if DzMarch809[i,0] > ((n+1) * intervalWindspeed - intervalWindspeed/2) and DzMarch809[i,0] <= ((n+1) * intervalWindspeed + intervalWindspeed / 2):
|
|
|
+ nWhichV = n
|
|
|
+ break
|
|
|
+ if nWhichP >= 0 and nWhichV >= 0:
|
|
|
+ if BBoxRemove[nWhichP, nWhichV] == 1:
|
|
|
+ DzMarch809Sel[i] = 1
|
|
|
+ elif BBoxRemove[nWhichP, nWhichV] == 2:
|
|
|
+ DzMarch809Sel[i] = 2
|
|
|
+ elif BBoxRemove[nWhichP , nWhichV] == 3:
|
|
|
+ DzMarch809Sel[i] = 0
|
|
|
+
|
|
|
+ return DzMarch809Sel
|
|
|
+
|
|
|
+
|
|
|
+def windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line):
|
|
|
+ '''
|
|
|
+ 滑动窗口方法,进一步标记数据坏点
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ nCounter1:
|
|
|
+ ratedPower:
|
|
|
+ Point_line:
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ PVLimit: 限负荷数据
|
|
|
+ nLimitTotal: 是限负荷数据的总数
|
|
|
+ '''
|
|
|
+
|
|
|
+ PVLimit = np.zeros((nCounter1, 3))
|
|
|
+ nLimitTotal = 0
|
|
|
+ nWindowLength = 6
|
|
|
+ LimitWindow = np.zeros(nWindowLength)
|
|
|
+ UpLimit = 0
|
|
|
+ LowLimit = 0
|
|
|
+ PowerStd = 30
|
|
|
+ nWindowNum = np.floor(nCounter1/nWindowLength)
|
|
|
+ PowerLimitUp = ratedPower - 100
|
|
|
+ PowerLimitLow = 100
|
|
|
+
|
|
|
+ # 循环遍历每个窗口
|
|
|
+ for i in range(int(nWindowNum)):
|
|
|
+ start_idx = i * nWindowLength
|
|
|
+ end_idx = start_idx + nWindowLength
|
|
|
+ LimitWindow = DzMarch809[start_idx:end_idx, 1]
|
|
|
+
|
|
|
+ bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)
|
|
|
+ if not bAllInAreas:
|
|
|
+ continue
|
|
|
+
|
|
|
+ UpLimit = LimitWindow[0] + PowerStd
|
|
|
+ LowLimit = LimitWindow[0] - PowerStd
|
|
|
+
|
|
|
+ bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)
|
|
|
+ if bAllInUpLow:
|
|
|
+ DzMarch809Sel[start_idx:end_idx] = 4
|
|
|
+
|
|
|
+ for j in range(nWindowLength):
|
|
|
+ PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]
|
|
|
+ PVLimit[nLimitTotal, 2] = Point_line[start_idx + j] # 对数据进行标识
|
|
|
+ nLimitTotal += 1
|
|
|
+ return PVLimit,nLimitTotal
|
|
|
+
|
|
|
+def store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1):
|
|
|
+ """
|
|
|
+ 存储好点,并返回存储好的点的数组和计数。
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ DzMarch809: array:V P lab: 38181。
|
|
|
+ DzMarch809Sel: 数组现在包含了每个数据点的标识
|
|
|
+ Point_line:
|
|
|
+ nCounter1:
|
|
|
+ axis: 'good' or 'bad'
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ PVDot: 数据
|
|
|
+ nCounterPV: 数据个数
|
|
|
+
|
|
|
+ """
|
|
|
+ PVDot = np.zeros((nCounter1, 3))
|
|
|
+ PVBad = np.zeros((nCounter1, 3))
|
|
|
+
|
|
|
+ nCounterPV = 0
|
|
|
+ nCounterBad = 0
|
|
|
+ for i in range(nCounter1):
|
|
|
+ if DzMarch809Sel[i] == 0:
|
|
|
+ nCounterPV += 1
|
|
|
+ PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]
|
|
|
+ PVDot[nCounterPV-1, 2] = Point_line[i]
|
|
|
+ elif DzMarch809Sel[i] in [1, 2, 3]:
|
|
|
+ nCounterBad += 1
|
|
|
+ PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]
|
|
|
+ PVBad[nCounterBad-1, 2] = Point_line[i]
|
|
|
+
|
|
|
+ return PVDot, nCounterPV,PVBad,nCounterBad
|
|
|
+
|
|
|
+def markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit):
|
|
|
+ """
|
|
|
+ 标记好点、坏点、限电点。
|
|
|
+
|
|
|
+ 参数:
|
|
|
+ nCounterPV
|
|
|
+ nCounterBad
|
|
|
+ dataFramePartOfSCADA
|
|
|
+ PVDot
|
|
|
+ PVBad
|
|
|
+ SM
|
|
|
+ nLimitTotal
|
|
|
+ PVLimit
|
|
|
+
|
|
|
+ 返回:
|
|
|
+ dataFramePartOfSCADA
|
|
|
+
|
|
|
+ """
|
|
|
+
|
|
|
+ for i in range(nCounterPV):
|
|
|
+ dataFramePartOfSCADA[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1
|
|
|
+ #坏点
|
|
|
+ for i in range(nCounterBad):
|
|
|
+ dataFramePartOfSCADA[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5 # 坏点标识
|
|
|
+
|
|
|
+ # 对所有数据中的限电点进行标注
|
|
|
+ for i in range(nLimitTotal):
|
|
|
+ dataFramePartOfSCADA[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4 # 限电点标识
|
|
|
+
|
|
|
+ return dataFramePartOfSCADA
|
|
|
+# 4. 数据可视化
|
|
|
+def plot_data(ws:list, ap:list):
|
|
|
+ fig = plt.figure()
|
|
|
+ plt.scatter(ws, ap, s=1, c='black', marker='.')
|
|
|
+ ax = plt.gca()
|
|
|
+ ax.xaxis.set_major_locator(MultipleLocator(5))
|
|
|
+ ax.yaxis.set_major_locator(MultipleLocator(500))
|
|
|
+ plt.xlim((0, 30))
|
|
|
+ plt.ylim((0, 2200))
|
|
|
+ plt.tick_params(labelsize=8)
|
|
|
+ plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8)
|
|
|
+ plt.ylabel("P/kW", fontsize=8)
|
|
|
+ plt.show()
|
|
|
+
|
|
|
+# 5. Main Execution
|
|
|
+def main():
|
|
|
+ turbine=85
|
|
|
+ basePath=r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72'
|
|
|
+ filePathSCADA = r'{}\{}.csv'.format(basePath,turbine)
|
|
|
+ filePathTurbineInfo = r'{}\info.csv'.format(basePath)
|
|
|
+ outputFilePathOfSCADA=r"{}\labeled\labeled_{}.csv".format(basePath,turbine)
|
|
|
+
|
|
|
+ dataFrameOfSCADA, turbineInfo = loadData(filePathSCADA, filePathTurbineInfo)
|
|
|
+ ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed = extractTurbineParameters(turbineInfo)
|
|
|
+ dataFramePartOfSCADA = preprocessData(dataFrameOfSCADA)
|
|
|
+ powerMax=dataFramePartOfSCADA[fieldActivePower].max()
|
|
|
+
|
|
|
+ binNumOfPower, binNumOfWindSpeed = calculateIntervals(powerMax,ratedPower,windSpeedCutOut)
|
|
|
+ TopP = calculateTopP(powerMax,ratedPower)
|
|
|
+ # 根据功率阈值对数据进行标签分配
|
|
|
+ DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM = chooseData(dataFramePartOfSCADA, dataFrameOfSCADA)
|
|
|
+ XBoxNumber = gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809)
|
|
|
+ PBoxPercent,PBinSum = percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'power')
|
|
|
+ VBoxPercent,VBinSum = percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'speed')
|
|
|
+
|
|
|
+ PBoxMaxIndex, PBoxMaxP = maxBoxPercentage(PBoxPercent, binNumOfPower, binNumOfWindSpeed, 'power')
|
|
|
+ VBoxMaxIndex, VBoxMaxV = maxBoxPercentage(VBoxPercent, binNumOfPower, binNumOfWindSpeed, 'speed')
|
|
|
+ if PBoxMaxIndex[0] > 14: PBoxMaxIndex[0] = 9
|
|
|
+ DotDenseLeftRight = extendBoxPercent(90, PBoxMaxP,TopP,PBoxMaxIndex,PBoxPercent,binNumOfPower,binNumOfWindSpeed)
|
|
|
+ # pdb.set_trace() # 设置断点
|
|
|
+ WidthAverage, WidthAverage_L,PowerLimit = calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum)
|
|
|
+ PBoxMaxIndex = amendMaxBox(binNumOfPower,TopP,PowerLimit,PBoxMaxIndex)
|
|
|
+ # 计算功率主带的左右边界
|
|
|
+ CurveWidthR = np.ceil(WidthAverage) + 2
|
|
|
+ CurveWidthL = np.ceil(WidthAverage_L) + 2
|
|
|
+ #确定功率主带的左上拐点,即额定风速位置的网格索引
|
|
|
+ CurveTop = np.zeros((2, 1), dtype=int)
|
|
|
+ BTopFind = 0
|
|
|
+ for m in range(binNumOfPower - TopP, 0, -1):
|
|
|
+ for n in range(int(np.floor(int(windSpeedCutIn) / intervalWindspeed)), binNumOfWindSpeed - 1):
|
|
|
+ if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):
|
|
|
+ CurveTop[0] = m
|
|
|
+ CurveTop[1] = n #[第80个,第40个]
|
|
|
+ BTopFind = 1
|
|
|
+ mm = m
|
|
|
+ nn = n
|
|
|
+ break
|
|
|
+ if BTopFind == 1:
|
|
|
+ break
|
|
|
+ #标记网格
|
|
|
+ BBoxRemove = markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,PBoxMaxIndex)
|
|
|
+ BBoxLimit = markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,PBoxPercent,PBoxMaxIndex,mm,BBoxRemove,nn)
|
|
|
+ DzMarch809Sel = markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1)
|
|
|
+ PVLimit,nLimitTotal = windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line)
|
|
|
+ #将功率滑动窗口主带平滑化
|
|
|
+ nSmooth = 0
|
|
|
+ for i in range(binNumOfPower - TopP - 1):
|
|
|
+ PVLeftDown = np.zeros(2)
|
|
|
+ PVRightUp = np.zeros(2)
|
|
|
+ if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:
|
|
|
+ # 计算左下和右上顶点的坐标
|
|
|
+ PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125
|
|
|
+ PVLeftDown[1] = (i) * 25
|
|
|
+ PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125
|
|
|
+ PVRightUp[1] = (i+1) * 25
|
|
|
+
|
|
|
+ for m in range(nCounter1):
|
|
|
+ # 检查当前点是否在锯齿区域内
|
|
|
+ if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
|
|
|
+ # 检查斜率是否大于对角连线
|
|
|
+ if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0]):
|
|
|
+ # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器
|
|
|
+ DzMarch809Sel[m] = 0
|
|
|
+ nSmooth += 1
|
|
|
+ # DzMarch809Sel 数组现在包含了锯齿平滑的选择结果,nSmooth 是选中的点数
|
|
|
+ PVDot, nCounterPV,PVBad,nCounterBad = store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1)
|
|
|
+ #标注
|
|
|
+ dataFramePartOfSCADA = markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit)
|
|
|
+ A = dataFramePartOfSCADA[:,3]
|
|
|
+ A=pd.DataFrame(A,columns=['lab'])
|
|
|
+
|
|
|
+ labeledData = pd.concat([dataFrameOfSCADA,A],axis=1)
|
|
|
+ D = labeledData[labeledData['lab'].isin([-1,0,1,2,3,4,5])]#选择为1的行
|
|
|
+ labeledData.to_csv(outputFilePathOfSCADA,encoding='utf-8')
|
|
|
+ plot_data(D[fieldWindSpeed], D[fieldActivePower])
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ main()
|