SCADA_10min_category_3.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. import os
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. from matplotlib.pyplot import MultipleLocator
  6. import math
  7. import pdb
  8. # pdb.set_trace() # 设置断点
  9. intervalPower = 25 # For example
  10. intervalWindspeed = 0.25 # For example
  11. fieldRatedPower="额定功率"
  12. fieldRatedWindSpeed="额定风速"
  13. fieldWindSpeedCutIn="切入风速"
  14. fieldWindSpeedCutOut="切出风速"
  15. fieldTime="时间"
  16. fieldWindSpeed="风速"
  17. fieldActivePower="变频器电网侧有功功率"
  18. fieldLabel="lab"
  19. # 1. 数据加载和预处理函数
  20. def loadData(filePathSCADA:str, filePathTurbineInfo:str):
  21. dataFrameSCADA = pd.read_csv(filePathSCADA, encoding="utf-8")
  22. dataFrameTurbineInfo = pd.read_csv(filePathTurbineInfo)
  23. return dataFrameSCADA, dataFrameTurbineInfo
  24. def extractTurbineParameters(turbineInfo:pd.DataFrame):
  25. """
  26. 解析风电机组参数
  27. 参数:
  28. turbineInfo 风电机组信息DataFrame
  29. 返回:
  30. PRated 额定功率(kw)
  31. VCutOut 切出风速(m/s)
  32. VCutIn 切入风速(m/s)
  33. VRated 额定风速(m/s)
  34. """
  35. ratedPower = turbineInfo.loc[:, [fieldRatedPower]].values
  36. windSpeedCutIn = turbineInfo.loc[:, [fieldWindSpeedCutIn]].values
  37. windSpeedCutOut = turbineInfo.loc[:, [fieldWindSpeedCutOut]].values
  38. ratedWindSpeed = turbineInfo.loc[:, [fieldRatedWindSpeed]].values
  39. return ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed
  40. def preprocessData(dataFrameOfSCADA:pd.DataFrame):
  41. """
  42. 获取机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
  43. 参数:
  44. dataFrameOfSCADA 机组SCADA数据
  45. 返回:
  46. 由机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
  47. """
  48. timeStamp = dataFrameOfSCADA.loc[:, ['时间']]
  49. activePower = dataFrameOfSCADA.loc[:, ['变频器电网侧有功功率']]
  50. windSpeed = dataFrameOfSCADA.loc[:, ['风速']]
  51. dataFramePartOfSCADA = pd.concat([timeStamp,activePower,windSpeed], axis=1)
  52. # dataFramePartOfSCADA[fieldLabel]=0
  53. # dataFramePartOfSCADA[fieldLabel]=dataFramePartOfSCADA[fieldLabel].astype(int)
  54. return dataFramePartOfSCADA
  55. # 2. 数据标签分配和分箱计算
  56. def calculateIntervals(activePowerMax, ratedPower, windSpeedCutOut):
  57. """
  58. 按有功功率(以25kw为间隔)、风速(以0.25m/s为间隔)分仓
  59. 参数:
  60. max_power 当前机组的有功功率最大值
  61. PRated 机组额定功率
  62. wind_speed_cutout 切出风速
  63. 返回:
  64. interval_power 有功功率分仓间隔
  65. interval_windspeed 风速分仓间隔
  66. PNum 有功功率分仓数量
  67. VNum 风速分仓数量
  68. """
  69. binNumOfPower = math.floor(activePowerMax / intervalPower) + 1 if activePowerMax >= ratedPower else math.floor(ratedPower / intervalPower)
  70. binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed)
  71. return binNumOfPower, binNumOfWindSpeed
  72. def calculateTopP(activePowerMax,ratedPower):
  73. """
  74. 计算额定功率以上功率仓的个数
  75. 参数:
  76. max_power 当前机组的有功功率最大值
  77. PRated 机组额定功率
  78. 返回:
  79. TopP 额定功率以上功率仓的个数
  80. """
  81. TopP = 0
  82. if activePowerMax >= ratedPower:
  83. TopP = math.floor((activePowerMax - ratedPower) / intervalPower) + 1
  84. else:
  85. TopP = 0
  86. return TopP
  87. def chooseData(dataFramePartOfSCADA:pd.DataFrame, dataFrameOfSCADA):
  88. """
  89. 根据特定条件对数据进行标签分配,例如功率和风速阈值。
  90. 参数:
  91. dataFramePartOfSCADA (DataFrame): 包含时间和功率和风速数据的DataFrame。
  92. dataFrameOfSCADA: 原始数据
  93. 返回:
  94. DzMarch809: array:V P lab: 38181。
  95. nCounter1: 个数
  96. dataFramePartOfSCADA:
  97. """
  98. # 初始化标签列
  99. SM1 = dataFramePartOfSCADA.shape #(52561,3)
  100. AA1 = SM1[0]
  101. lab = [[0] for _ in range(AA1)]
  102. lab = pd.DataFrame(lab,columns=['lab'])
  103. dataFramePartOfSCADA = pd.concat([dataFramePartOfSCADA,lab],axis=1) #在tpv后加一列标签列
  104. dataFramePartOfSCADA = dataFramePartOfSCADA.values
  105. SM = dataFramePartOfSCADA.shape #(52561,4)
  106. AA = SM[0]
  107. nCounter1 = 0
  108. DzMarch809_0 = np.zeros((AA, 3))
  109. Point_line = np.zeros(AA, dtype=int)
  110. APower = dataFrameOfSCADA[fieldActivePower]
  111. WSpeed = dataFrameOfSCADA[fieldWindSpeed]
  112. for i in range(AA):
  113. if (APower[i] > 10) & (WSpeed[i] > 0):
  114. nCounter1 += 1
  115. DzMarch809_0[nCounter1-1, 0] = WSpeed[i]
  116. DzMarch809_0[nCounter1-1, 1] = APower[i]
  117. Point_line[nCounter1-1] = i+1
  118. if APower[i] <= 10:
  119. dataFramePartOfSCADA[i,SM[1]-1] = -1
  120. DzMarch809 = DzMarch809_0[:nCounter1, :]
  121. return DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM
  122. def gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809):
  123. """
  124. 统计各网格中落入label!=-1的数据点个数
  125. 参数:
  126. binNumOfWindSpeed: 风速分仓个数。
  127. binNumOfPower: 功率分仓个数。
  128. DataFrame: 带有新的'label'列的原始DataFrame。
  129. nCounter1: 数据个数
  130. DzMarch809
  131. 返回:
  132. XBoxNumber: 各网格中落入label!=-1的数据点个数的array。
  133. """
  134. # 遍历有效数据
  135. XBoxNumber = np.ones((binNumOfPower, binNumOfWindSpeed),dtype=int)
  136. for i in range(nCounter1):
  137. for m in range(1, binNumOfPower + 1):
  138. if (DzMarch809[i,1] > (m - 1) * intervalPower) and (DzMarch809[i,1] <= m * intervalPower):
  139. nWhichP = m
  140. break
  141. for n in range(1, binNumOfWindSpeed + 1):
  142. if (DzMarch809[i, 0] > (n - 1) * intervalWindspeed) and (DzMarch809[i, 0] <= n * intervalWindspeed):
  143. nWhichV = n
  144. break
  145. if (nWhichP > 0) and (nWhichV > 0):
  146. XBoxNumber[nWhichP - 1][nWhichV - 1] += 1
  147. for m in range(1,binNumOfPower+1):
  148. for n in range(1,binNumOfWindSpeed+1):
  149. XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
  150. return XBoxNumber
  151. def percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed,axis):
  152. """
  153. 计算分仓(水平/竖直)后每个网格占百分比
  154. 参数:
  155. XBoxNumber: 各网格中落入label!=-1的数据点个数的array。
  156. binNumOfPower: 功率分仓个数。
  157. binNumOfWindSpeed: 风速分仓个数。
  158. axis: "power"or"speed"分仓
  159. 返回:
  160. BoxPercent: 占比情况array。
  161. """
  162. BoxPercent = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=float)
  163. BinSum = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed, 1), dtype=int)
  164. for i in range(1,1+(binNumOfPower if axis == 'power' else binNumOfWindSpeed)):
  165. for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):
  166. BinSum[i-1] = BinSum[i-1] + (XBoxNumber[i-1,m-1] if axis == 'power' else XBoxNumber[m-1,i-1])
  167. for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):
  168. if BinSum[i-1]>0:
  169. if axis == 'power':
  170. BoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / BinSum[i-1])*100
  171. else:
  172. BoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / BinSum[i-1])*100
  173. return BoxPercent,BinSum
  174. def maxBoxPercentage(BoxPercent, binNumOfPower, binNumOfWindSpeed, axis):
  175. """
  176. 计算分仓(水平/竖直)后占百分比最大的网格索引及值
  177. 参数:
  178. BoxPercent: 占比情况array。
  179. binNumOfPower: 功率分仓个数。
  180. binNumOfWindSpeed: 风速分仓个数。
  181. axis: "power"or"speed"分仓
  182. 返回:
  183. BoxMaxIndex: 占百分比最大的网格索引。
  184. BoxMax: 占百分比最大的网格值
  185. """
  186. BoxMaxIndex = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = int)
  187. BoxMax = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = float)
  188. for m in range(1,(binNumOfPower if axis == 'power' else binNumOfWindSpeed)+1):
  189. BoxMaxIndex[m-1] = (np.argmax(BoxPercent[m-1, :])) if axis == 'power' else (np.argmax(BoxPercent[:, m-1]))
  190. BoxMax[m-1] = (np.max(BoxPercent[m-1, :]))if axis == 'power' else (np.max(BoxPercent[:, m-1]))
  191. return BoxMaxIndex, BoxMax
  192. def extendBoxPercent(m, BoxMax,TopP,BoxMaxIndex,BoxPercent,binNumOfPower,binNumOfWindSpeed):
  193. """
  194. 以中心最大水平功率带为基准,向两侧对称扩展网格,使网格散点百分比总值达到阈值m
  195. 参数:
  196. m: 设定总和百分比阈值。
  197. BoxMax: 占百分比最大的网格值。
  198. TopP: 额定功率以上功率仓个数。
  199. BoxMaxIndex: 占百分比最大的网格索引。
  200. BoxPercent: 占比情况array。
  201. binNumOfPower: 功率分仓个数。
  202. binNumOfWindSpeed: 风速分仓个数。
  203. 返回:
  204. DotDense: 每个功率仓内网格的个数。
  205. DotDenseLeftRight: 向左向右拓展的网格个数
  206. """
  207. DotDense = np.zeros(binNumOfPower)
  208. DotDenseLeftRight = np.zeros((binNumOfPower,2))
  209. DotValve = m
  210. PDotDenseSum = 0
  211. for i in range(binNumOfPower - TopP):
  212. PDotDenseSum = BoxMax[i]
  213. iSpreadRight = 1
  214. iSpreadLeft = 1
  215. while PDotDenseSum < DotValve:
  216. if (BoxMaxIndex[i] + iSpreadRight) < binNumOfWindSpeed-1-1:
  217. PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] + iSpreadRight]
  218. iSpreadRight += 1
  219. else:
  220. break
  221. if (BoxMaxIndex[i] - iSpreadLeft) > 0:
  222. PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] - iSpreadLeft]
  223. iSpreadLeft += 1
  224. else:
  225. break
  226. iSpreadRight = iSpreadRight-1
  227. iSpreadLeft = iSpreadLeft-1
  228. DotDenseLeftRight[i, 0] = iSpreadLeft
  229. DotDenseLeftRight[i, 1] = iSpreadRight
  230. DotDense[i] = iSpreadLeft + iSpreadRight + 1
  231. return DotDenseLeftRight
  232. def calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum):
  233. """
  234. 计算功率主带的平均宽度
  235. 参数:
  236. binNumOfPower: 功率分仓个数。
  237. TopP: 额定功率以上功率仓个数。
  238. DotDenseLeftRight: 向左向右拓展的网格个数
  239. PBinSum: 功率仓内数据点总和
  240. 返回:
  241. DotDense: 每个功率仓内网格的个数。
  242. DotDenseLeftRight: 向左向右拓展的网格个数
  243. PowerLimit: 各水平功率带是否为限功率标识,1:是;0:不是
  244. """
  245. PowerLimit = np.zeros(binNumOfPower, dtype=int)
  246. WidthAverage = 0
  247. WidthAverage_L = 0
  248. nCounter = 0
  249. PowerLimitValve = 6
  250. N_Pcount = 20
  251. for i in range(binNumOfPower - TopP):
  252. if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):
  253. PowerLimit[i] = 1
  254. if DotDenseLeftRight[i, 1] <= PowerLimitValve:
  255. WidthAverage += DotDenseLeftRight[i, 1]
  256. WidthAverage_L += DotDenseLeftRight[i,1]
  257. nCounter += 1
  258. WidthAverage /= nCounter if nCounter > 0 else 1
  259. WidthAverage_L /= nCounter if nCounter > 0 else 1
  260. return WidthAverage, WidthAverage_L,PowerLimit
  261. def amendMaxBox(binNumOfPower,TopP,PowerLimit,BoxMaxIndex):
  262. """
  263. 对限负荷水平功率带的最大网格进行修正
  264. 参数:
  265. binNumOfPower: 功率分仓个数。
  266. TopP: 额定功率以上功率仓个数。
  267. PowerLimit:标识限功率水平功率带,1:是;0:不是
  268. BoxMaxIndex: 占百分比最大的网格索引
  269. 返回:
  270. BoxMaxIndex: 修正后的最大占比网格索引
  271. """
  272. for i in range(1, binNumOfPower - TopP+1):
  273. if (PowerLimit[i] == 1) and (abs(BoxMaxIndex[i] - BoxMaxIndex[i - 1]) > 5):
  274. BoxMaxIndex[i] = BoxMaxIndex[i - 1] + 1
  275. return BoxMaxIndex
  276. def markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,BoxMaxIndex):
  277. '''
  278. 标记需剔除的网格
  279. 参数:
  280. binNumOfPower: 功率分仓个数。
  281. binNumOfWindSpeed:风速分仓个数
  282. TopP: 额定功率以上功率仓个数。
  283. CurveWidthR:功率主带轮廓
  284. CurveWidthL
  285. BoxMaxIndex: 修正后的最大占比网格索引
  286. 返回:
  287. BBoxRemove: 标识需剔除的网格
  288. '''
  289. BBoxRemove = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)
  290. for m in range(binNumOfPower - TopP):
  291. for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
  292. BBoxRemove[m, n] = 1
  293. for n in range(int(BoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):
  294. BBoxRemove[m, n-1] = 2
  295. return BBoxRemove
  296. def markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,BoxPercent,BoxMaxIndex,mm,BBoxRemove,nn):
  297. '''
  298. 标记限功率网格
  299. 1:右侧欠发 2:左侧超发 3:额定功率以上超发
  300. 参数:
  301. binNumOfPower: 功率分仓个数。
  302. binNumOfWindSpeed:风速分仓个数
  303. TopP: 额定功率以上功率仓个数。
  304. CurveWidthR:功率主带轮廓
  305. PowerLimit: 标识限功率水平功率带,1:是;0:不是
  306. BoxMaxIndex: 修正后的最大占比网格索引
  307. mm: 拐点所在功率仓
  308. BBoxRemove:需剔除的网格
  309. CurveTop1:拐点对应列
  310. 返回:
  311. BBoxLimit:标识限功率网格
  312. '''
  313. BBoxLimit = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)
  314. for i in range(2, binNumOfPower - TopP):
  315. if PowerLimit[i] == 1:
  316. BBoxLimit[i, int(BoxMaxIndex[i] + CurveWidthR + 1):binNumOfWindSpeed] = 1
  317. IsolateValve = 3
  318. for m in range(binNumOfPower - TopP):
  319. for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
  320. if BoxPercent[m, n] < IsolateValve:
  321. BBoxRemove[m, n] = 1
  322. for m in range(binNumOfPower - TopP, binNumOfPower):
  323. for n in range(binNumOfWindSpeed):
  324. BBoxRemove[m, n] = 3
  325. # 标记功率主带拐点左侧的欠发网格
  326. for m in range(mm-1, binNumOfPower - TopP):
  327. for n in range(int(nn) - 2):
  328. BBoxRemove[m, n] = 2
  329. return BBoxLimit
  330. def markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1):
  331. '''
  332. 根据网格标识来标记数据点
  333. 参数:
  334. nCounter1
  335. binNumOfPower: 功率分仓个数。
  336. binNumOfWindSpeed:风速分仓个数
  337. DzMarch809: array V P lab: 38181。
  338. BBoxRemove:需剔除的网格
  339. 返回:
  340. DzMarch809Sel:数组现在包含了每个数据点的标识
  341. '''
  342. DzMarch809Sel = np.zeros(nCounter1, dtype=int)
  343. nWhichP = 0
  344. nWhichV = 0
  345. for i in range(nCounter1):
  346. for m in range( binNumOfPower ):
  347. if ((DzMarch809[i,1])> m * intervalPower) and ((DzMarch809[i,1]) <= (m+1) * intervalPower):
  348. nWhichP = m #m记录的是index
  349. break
  350. for n in range( binNumOfWindSpeed ):
  351. if DzMarch809[i,0] > ((n+1) * intervalWindspeed - intervalWindspeed/2) and DzMarch809[i,0] <= ((n+1) * intervalWindspeed + intervalWindspeed / 2):
  352. nWhichV = n
  353. break
  354. if nWhichP >= 0 and nWhichV >= 0:
  355. if BBoxRemove[nWhichP, nWhichV] == 1:
  356. DzMarch809Sel[i] = 1
  357. elif BBoxRemove[nWhichP, nWhichV] == 2:
  358. DzMarch809Sel[i] = 2
  359. elif BBoxRemove[nWhichP , nWhichV] == 3:
  360. DzMarch809Sel[i] = 0
  361. return DzMarch809Sel
  362. def windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line):
  363. '''
  364. 滑动窗口方法,进一步标记数据坏点
  365. 参数:
  366. nCounter1:
  367. ratedPower:
  368. Point_line:
  369. 返回:
  370. PVLimit: 限负荷数据
  371. nLimitTotal: 是限负荷数据的总数
  372. '''
  373. PVLimit = np.zeros((nCounter1, 3))
  374. nLimitTotal = 0
  375. nWindowLength = 6
  376. LimitWindow = np.zeros(nWindowLength)
  377. UpLimit = 0
  378. LowLimit = 0
  379. PowerStd = 30
  380. nWindowNum = np.floor(nCounter1/nWindowLength)
  381. PowerLimitUp = ratedPower - 100
  382. PowerLimitLow = 100
  383. # 循环遍历每个窗口
  384. for i in range(int(nWindowNum)):
  385. start_idx = i * nWindowLength
  386. end_idx = start_idx + nWindowLength
  387. LimitWindow = DzMarch809[start_idx:end_idx, 1]
  388. bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)
  389. if not bAllInAreas:
  390. continue
  391. UpLimit = LimitWindow[0] + PowerStd
  392. LowLimit = LimitWindow[0] - PowerStd
  393. bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)
  394. if bAllInUpLow:
  395. DzMarch809Sel[start_idx:end_idx] = 4
  396. for j in range(nWindowLength):
  397. PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]
  398. PVLimit[nLimitTotal, 2] = Point_line[start_idx + j] # 对数据进行标识
  399. nLimitTotal += 1
  400. return PVLimit,nLimitTotal
  401. def store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1):
  402. """
  403. 存储好点,并返回存储好的点的数组和计数。
  404. 参数:
  405. DzMarch809: array:V P lab: 38181。
  406. DzMarch809Sel: 数组现在包含了每个数据点的标识
  407. Point_line:
  408. nCounter1:
  409. axis: 'good' or 'bad'
  410. 返回:
  411. PVDot: 数据
  412. nCounterPV: 数据个数
  413. """
  414. PVDot = np.zeros((nCounter1, 3))
  415. PVBad = np.zeros((nCounter1, 3))
  416. nCounterPV = 0
  417. nCounterBad = 0
  418. for i in range(nCounter1):
  419. if DzMarch809Sel[i] == 0:
  420. nCounterPV += 1
  421. PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]
  422. PVDot[nCounterPV-1, 2] = Point_line[i]
  423. elif DzMarch809Sel[i] in [1, 2, 3]:
  424. nCounterBad += 1
  425. PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]
  426. PVBad[nCounterBad-1, 2] = Point_line[i]
  427. return PVDot, nCounterPV,PVBad,nCounterBad
  428. def markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit):
  429. """
  430. 标记好点、坏点、限电点。
  431. 参数:
  432. nCounterPV
  433. nCounterBad
  434. dataFramePartOfSCADA
  435. PVDot
  436. PVBad
  437. SM
  438. nLimitTotal
  439. PVLimit
  440. 返回:
  441. dataFramePartOfSCADA
  442. """
  443. for i in range(nCounterPV):
  444. dataFramePartOfSCADA[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1
  445. #坏点
  446. for i in range(nCounterBad):
  447. dataFramePartOfSCADA[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5 # 坏点标识
  448. # 对所有数据中的限电点进行标注
  449. for i in range(nLimitTotal):
  450. dataFramePartOfSCADA[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4 # 限电点标识
  451. return dataFramePartOfSCADA
  452. # 4. 数据可视化
  453. def plot_data(ws:list, ap:list):
  454. fig = plt.figure()
  455. plt.scatter(ws, ap, s=1, c='black', marker='.')
  456. ax = plt.gca()
  457. ax.xaxis.set_major_locator(MultipleLocator(5))
  458. ax.yaxis.set_major_locator(MultipleLocator(500))
  459. plt.xlim((0, 30))
  460. plt.ylim((0, 2200))
  461. plt.tick_params(labelsize=8)
  462. plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8)
  463. plt.ylabel("P/kW", fontsize=8)
  464. plt.show()
  465. # 5. Main Execution
  466. def main():
  467. turbine=85
  468. basePath=r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72'
  469. filePathSCADA = r'{}\{}.csv'.format(basePath,turbine)
  470. filePathTurbineInfo = r'{}\info.csv'.format(basePath)
  471. outputFilePathOfSCADA=r"{}\labeled\labeled_{}.csv".format(basePath,turbine)
  472. dataFrameOfSCADA, turbineInfo = loadData(filePathSCADA, filePathTurbineInfo)
  473. ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed = extractTurbineParameters(turbineInfo)
  474. dataFramePartOfSCADA = preprocessData(dataFrameOfSCADA)
  475. powerMax=dataFramePartOfSCADA[fieldActivePower].max()
  476. binNumOfPower, binNumOfWindSpeed = calculateIntervals(powerMax,ratedPower,windSpeedCutOut)
  477. TopP = calculateTopP(powerMax,ratedPower)
  478. # 根据功率阈值对数据进行标签分配
  479. DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM = chooseData(dataFramePartOfSCADA, dataFrameOfSCADA)
  480. XBoxNumber = gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809)
  481. PBoxPercent,PBinSum = percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'power')
  482. VBoxPercent,VBinSum = percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'speed')
  483. PBoxMaxIndex, PBoxMaxP = maxBoxPercentage(PBoxPercent, binNumOfPower, binNumOfWindSpeed, 'power')
  484. VBoxMaxIndex, VBoxMaxV = maxBoxPercentage(VBoxPercent, binNumOfPower, binNumOfWindSpeed, 'speed')
  485. if PBoxMaxIndex[0] > 14: PBoxMaxIndex[0] = 9
  486. DotDenseLeftRight = extendBoxPercent(90, PBoxMaxP,TopP,PBoxMaxIndex,PBoxPercent,binNumOfPower,binNumOfWindSpeed)
  487. # pdb.set_trace() # 设置断点
  488. WidthAverage, WidthAverage_L,PowerLimit = calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum)
  489. PBoxMaxIndex = amendMaxBox(binNumOfPower,TopP,PowerLimit,PBoxMaxIndex)
  490. # 计算功率主带的左右边界
  491. CurveWidthR = np.ceil(WidthAverage) + 2
  492. CurveWidthL = np.ceil(WidthAverage_L) + 2
  493. #确定功率主带的左上拐点,即额定风速位置的网格索引
  494. CurveTop = np.zeros((2, 1), dtype=int)
  495. BTopFind = 0
  496. for m in range(binNumOfPower - TopP, 0, -1):
  497. for n in range(int(np.floor(int(windSpeedCutIn) / intervalWindspeed)), binNumOfWindSpeed - 1):
  498. if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):
  499. CurveTop[0] = m
  500. CurveTop[1] = n #[第80个,第40个]
  501. BTopFind = 1
  502. mm = m
  503. nn = n
  504. break
  505. if BTopFind == 1:
  506. break
  507. #标记网格
  508. BBoxRemove = markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,PBoxMaxIndex)
  509. BBoxLimit = markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,PBoxPercent,PBoxMaxIndex,mm,BBoxRemove,nn)
  510. DzMarch809Sel = markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1)
  511. PVLimit,nLimitTotal = windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line)
  512. #将功率滑动窗口主带平滑化
  513. nSmooth = 0
  514. for i in range(binNumOfPower - TopP - 1):
  515. PVLeftDown = np.zeros(2)
  516. PVRightUp = np.zeros(2)
  517. if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:
  518. # 计算左下和右上顶点的坐标
  519. PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125
  520. PVLeftDown[1] = (i) * 25
  521. PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125
  522. PVRightUp[1] = (i+1) * 25
  523. for m in range(nCounter1):
  524. # 检查当前点是否在锯齿区域内
  525. if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
  526. # 检查斜率是否大于对角连线
  527. if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0]):
  528. # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器
  529. DzMarch809Sel[m] = 0
  530. nSmooth += 1
  531. # DzMarch809Sel 数组现在包含了锯齿平滑的选择结果,nSmooth 是选中的点数
  532. PVDot, nCounterPV,PVBad,nCounterBad = store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1)
  533. #标注
  534. dataFramePartOfSCADA = markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit)
  535. A = dataFramePartOfSCADA[:,3]
  536. A=pd.DataFrame(A,columns=['lab'])
  537. labeledData = pd.concat([dataFrameOfSCADA,A],axis=1)
  538. D = labeledData[labeledData['lab'].isin([-1,0,1,2,3,4,5])]#选择为1的行
  539. labeledData.to_csv(outputFilePathOfSCADA,encoding='utf-8')
  540. plot_data(D[fieldWindSpeed], D[fieldActivePower])
  541. if __name__ == '__main__':
  542. main()