SCADA_10min_category_3.py 24 KB


  1. import os
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. from matplotlib.pyplot import MultipleLocator
  6. import math
  7. import pdb
  8. # pdb.set_trace() # 设置断点
  9. intervalPower = 25 # For example
  10. intervalWindspeed = 0.25 # For example
  11. fieldRatedPower="额定功率"
  12. fieldRatedWindSpeed="额定风速"
  13. fieldWindSpeedCutIn="切入风速"
  14. fieldWindSpeedCutOut="切出风速"
  15. fieldTime="时间"
  16. fieldWindSpeed="风速"
  17. fieldActivePower="变频器电网侧有功功率"
  18. fieldLabel="lab"
  19. # 1. 数据加载和预处理函数
  20. def loadData(filePathSCADA:str, filePathTurbineInfo:str):
  21. dataFrameSCADA = pd.read_csv(filePathSCADA, encoding="utf-8")
  22. dataFrameTurbineInfo = pd.read_csv(filePathTurbineInfo)
  23. return dataFrameSCADA, dataFrameTurbineInfo
  24. def extractTurbineParameters(turbineInfo:pd.DataFrame):
  25. """
  26. 解析风电机组参数
  27. 参数:
  28. turbineInfo 风电机组信息DataFrame
  29. 返回:
  30. PRated 额定功率(kw)
  31. VCutOut 切出风速(m/s)
  32. VCutIn 切入风速(m/s)
  33. VRated 额定风速(m/s)
  34. """
  35. ratedPower = turbineInfo.loc[:, [fieldRatedPower]].values
  36. windSpeedCutIn = turbineInfo.loc[:, [fieldWindSpeedCutIn]].values
  37. windSpeedCutOut = turbineInfo.loc[:, [fieldWindSpeedCutOut]].values
  38. ratedWindSpeed = turbineInfo.loc[:, [fieldRatedWindSpeed]].values
  39. return ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed
  40. def preprocessData(dataFrameOfSCADA:pd.DataFrame):
  41. """
  42. 获取机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
  43. 参数:
  44. dataFrameOfSCADA 机组SCADA数据
  45. 返回:
  46. 由机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
  47. """
  48. timeStamp = dataFrameOfSCADA.loc[:, ['时间']]
  49. activePower = dataFrameOfSCADA.loc[:, ['变频器电网侧有功功率']]
  50. windSpeed = dataFrameOfSCADA.loc[:, ['风速']]
  51. dataFramePartOfSCADA = pd.concat([timeStamp,activePower,windSpeed], axis=1)
  52. # dataFramePartOfSCADA[fieldLabel]=0
  53. # dataFramePartOfSCADA[fieldLabel]=dataFramePartOfSCADA[fieldLabel].astype(int)
  54. return dataFramePartOfSCADA
  55. # 2. 数据标签分配和分箱计算
  56. def calculateIntervals(activePowerMax, ratedPower, windSpeedCutOut):
  57. """
  58. 按有功功率(以25kw为间隔)、风速(以0.25m/s为间隔)分仓
  59. 参数:
  60. max_power 当前机组的有功功率最大值
  61. PRated 机组额定功率
  62. wind_speed_cutout 切出风速
  63. 返回:
  64. interval_power 有功功率分仓间隔
  65. interval_windspeed 风速分仓间隔
  66. PNum 有功功率分仓数量
  67. VNum 风速分仓数量
  68. """
  69. binNumOfPower = math.floor(activePowerMax / intervalPower) + 1 if activePowerMax >= ratedPower else math.floor(ratedPower / intervalPower)
  70. binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed)
  71. return binNumOfPower, binNumOfWindSpeed
  72. def calculateTopP(activePowerMax,ratedPower):
  73. """
  74. 计算额定功率以上功率仓的个数
  75. 参数:
  76. max_power 当前机组的有功功率最大值
  77. PRated 机组额定功率
  78. 返回:
  79. TopP 额定功率以上功率仓的个数
  80. """
  81. TopP = 0
  82. if activePowerMax >= ratedPower:
  83. TopP = math.floor((activePowerMax - ratedPower) / intervalPower) + 1
  84. else:
  85. TopP = 0
  86. return TopP
  87. def chooseData(dataFramePartOfSCADA:pd.DataFrame, dataFrameOfSCADA):
  88. """
  89. 根据特定条件对数据进行标签分配,例如功率和风速阈值。
  90. 参数:
  91. dataFramePartOfSCADA (DataFrame): 包含时间和功率和风速数据的DataFrame。
  92. dataFrameOfSCADA: 原始数据
  93. 返回:
  94. DzMarch809: array:V P lab: 38181。
  95. nCounter1: 个数
  96. dataFramePartOfSCADA:
  97. """
  98. # 初始化标签列
  99. SM1 = dataFramePartOfSCADA.shape #(52561,3)
  100. AA1 = SM1[0]
  101. lab = [[0] for _ in range(AA1)]
  102. lab = pd.DataFrame(lab,columns=['lab'])
  103. dataFramePartOfSCADA = pd.concat([dataFramePartOfSCADA,lab],axis=1) #在tpv后加一列标签列
  104. dataFramePartOfSCADA = dataFramePartOfSCADA.values
  105. SM = dataFramePartOfSCADA.shape #(52561,4)
  106. AA = SM[0]
  107. nCounter1 = 0
  108. DzMarch809_0 = np.zeros((AA, 3))
  109. Point_line = np.zeros(AA, dtype=int)
  110. APower = dataFrameOfSCADA[fieldActivePower]
  111. WSpeed = dataFrameOfSCADA[fieldWindSpeed]
  112. for i in range(AA):
  113. if (APower[i] > 10) & (WSpeed[i] > 0):
  114. nCounter1 += 1
  115. DzMarch809_0[nCounter1-1, 0] = WSpeed[i]
  116. DzMarch809_0[nCounter1-1, 1] = APower[i]
  117. Point_line[nCounter1-1] = i+1
  118. if APower[i] <= 10:
  119. dataFramePartOfSCADA[i,SM[1]-1] = -1
  120. DzMarch809 = DzMarch809_0[:nCounter1, :]
  121. return DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM
  122. def gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809):
  123. """
  124. 统计各网格中落入label!=-1的数据点个数
  125. 参数:
  126. binNumOfWindSpeed: 风速分仓个数。
  127. binNumOfPower: 功率分仓个数。
  128. DataFrame: 带有新的'label'列的原始DataFrame。
  129. nCounter1: 数据个数
  130. DzMarch809
  131. 返回:
  132. XBoxNumber: 各网格中落入label!=-1的数据点个数的array。
  133. """
  134. # 遍历有效数据
  135. XBoxNumber = np.ones((binNumOfPower, binNumOfWindSpeed),dtype=int)
  136. for i in range(nCounter1):
  137. for m in range(1, binNumOfPower + 1):
  138. if (DzMarch809[i,1] > (m - 1) * intervalPower) and (DzMarch809[i,1] <= m * intervalPower):
  139. nWhichP = m
  140. break
  141. for n in range(1, binNumOfWindSpeed + 1):
  142. if (DzMarch809[i, 0] > (n - 1) * intervalWindspeed) and (DzMarch809[i, 0] <= n * intervalWindspeed):
  143. nWhichV = n
  144. break
  145. if (nWhichP > 0) and (nWhichV > 0):
  146. XBoxNumber[nWhichP - 1][nWhichV - 1] += 1
  147. for m in range(1,binNumOfPower+1):
  148. for n in range(1,binNumOfWindSpeed+1):
  149. XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
  150. return XBoxNumber
  151. def percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed,axis):
  152. """
  153. 计算分仓(水平/竖直)后每个网格占百分比
  154. 参数:
  155. XBoxNumber: 各网格中落入label!=-1的数据点个数的array。
  156. binNumOfPower: 功率分仓个数。
  157. binNumOfWindSpeed: 风速分仓个数。
  158. axis: "power"or"speed"分仓
  159. 返回:
  160. BoxPercent: 占比情况array。
  161. """
  162. BoxPercent = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=float)
  163. BinSum = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed, 1), dtype=int)
  164. for i in range(1,1+(binNumOfPower if axis == 'power' else binNumOfWindSpeed)):
  165. for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):
  166. BinSum[i-1] = BinSum[i-1] + (XBoxNumber[i-1,m-1] if axis == 'power' else XBoxNumber[m-1,i-1])
  167. for m in range(1,(binNumOfWindSpeed if axis == 'power' else binNumOfPower)+1):
  168. if BinSum[i-1]>0:
  169. if axis == 'power':
  170. BoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / BinSum[i-1])*100
  171. else:
  172. BoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / BinSum[i-1])*100
  173. return BoxPercent,BinSum
  174. def maxBoxPercentage(BoxPercent, binNumOfPower, binNumOfWindSpeed, axis):
  175. """
  176. 计算分仓(水平/竖直)后占百分比最大的网格索引及值
  177. 参数:
  178. BoxPercent: 占比情况array。
  179. binNumOfPower: 功率分仓个数。
  180. binNumOfWindSpeed: 风速分仓个数。
  181. axis: "power"or"speed"分仓
  182. 返回:
  183. BoxMaxIndex: 占百分比最大的网格索引。
  184. BoxMax: 占百分比最大的网格值
  185. """
  186. BoxMaxIndex = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = int)
  187. BoxMax = np.zeros((binNumOfPower if axis == 'power' else binNumOfWindSpeed,1),dtype = float)
  188. for m in range(1,(binNumOfPower if axis == 'power' else binNumOfWindSpeed)+1):
  189. BoxMaxIndex[m-1] = (np.argmax(BoxPercent[m-1, :])) if axis == 'power' else (np.argmax(BoxPercent[:, m-1]))
  190. BoxMax[m-1] = (np.max(BoxPercent[m-1, :]))if axis == 'power' else (np.max(BoxPercent[:, m-1]))
  191. return BoxMaxIndex, BoxMax
  192. def extendBoxPercent(m, BoxMax,TopP,BoxMaxIndex,BoxPercent,binNumOfPower,binNumOfWindSpeed):
  193. """
  194. 以中心最大水平功率带为基准,向两侧对称扩展网格,使网格散点百分比总值达到阈值m
  195. 参数:
  196. m: 设定总和百分比阈值。
  197. BoxMax: 占百分比最大的网格值。
  198. TopP: 额定功率以上功率仓个数。
  199. BoxMaxIndex: 占百分比最大的网格索引。
  200. BoxPercent: 占比情况array。
  201. binNumOfPower: 功率分仓个数。
  202. binNumOfWindSpeed: 风速分仓个数。
  203. 返回:
  204. DotDense: 每个功率仓内网格的个数。
  205. DotDenseLeftRight: 向左向右拓展的网格个数
  206. """
  207. DotDense = np.zeros(binNumOfPower)
  208. DotDenseLeftRight = np.zeros((binNumOfPower,2))
  209. DotValve = m
  210. PDotDenseSum = 0
  211. for i in range(binNumOfPower - TopP):
  212. PDotDenseSum = BoxMax[i]
  213. iSpreadRight = 1
  214. iSpreadLeft = 1
  215. while PDotDenseSum < DotValve:
  216. if (BoxMaxIndex[i] + iSpreadRight) < binNumOfWindSpeed-1-1:
  217. PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] + iSpreadRight]
  218. iSpreadRight += 1
  219. else:
  220. break
  221. if (BoxMaxIndex[i] - iSpreadLeft) > 0:
  222. PDotDenseSum += BoxPercent[i, BoxMaxIndex[i] - iSpreadLeft]
  223. iSpreadLeft += 1
  224. else:
  225. break
  226. iSpreadRight = iSpreadRight-1
  227. iSpreadLeft = iSpreadLeft-1
  228. DotDenseLeftRight[i, 0] = iSpreadLeft
  229. DotDenseLeftRight[i, 1] = iSpreadRight
  230. DotDense[i] = iSpreadLeft + iSpreadRight + 1
  231. return DotDenseLeftRight
  232. def calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum):
  233. """
  234. 计算功率主带的平均宽度
  235. 参数:
  236. binNumOfPower: 功率分仓个数。
  237. TopP: 额定功率以上功率仓个数。
  238. DotDenseLeftRight: 向左向右拓展的网格个数
  239. PBinSum: 功率仓内数据点总和
  240. 返回:
  241. DotDense: 每个功率仓内网格的个数。
  242. DotDenseLeftRight: 向左向右拓展的网格个数
  243. PowerLimit: 各水平功率带是否为限功率标识,1:是;0:不是
  244. """
  245. PowerLimit = np.zeros(binNumOfPower, dtype=int)
  246. WidthAverage = 0
  247. WidthAverage_L = 0
  248. nCounter = 0
  249. PowerLimitValve = 6
  250. N_Pcount = 20
  251. for i in range(binNumOfPower - TopP):
  252. if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):
  253. PowerLimit[i] = 1
  254. if DotDenseLeftRight[i, 1] <= PowerLimitValve:
  255. WidthAverage += DotDenseLeftRight[i, 1]
  256. WidthAverage_L += DotDenseLeftRight[i,1]
  257. nCounter += 1
  258. WidthAverage /= nCounter if nCounter > 0 else 1
  259. WidthAverage_L /= nCounter if nCounter > 0 else 1
  260. return WidthAverage, WidthAverage_L,PowerLimit
  261. def amendMaxBox(binNumOfPower,TopP,PowerLimit,BoxMaxIndex):
  262. """
  263. 对限负荷水平功率带的最大网格进行修正
  264. 参数:
  265. binNumOfPower: 功率分仓个数。
  266. TopP: 额定功率以上功率仓个数。
  267. PowerLimit:标识限功率水平功率带,1:是;0:不是
  268. BoxMaxIndex: 占百分比最大的网格索引
  269. 返回:
  270. BoxMaxIndex: 修正后的最大占比网格索引
  271. """
  272. for i in range(1, binNumOfPower - TopP+1):
  273. if (PowerLimit[i] == 1) and (abs(BoxMaxIndex[i] - BoxMaxIndex[i - 1]) > 5):
  274. BoxMaxIndex[i] = BoxMaxIndex[i - 1] + 1
  275. return BoxMaxIndex
  276. def markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,BoxMaxIndex):
  277. '''
  278. 标记需剔除的网格
  279. 参数:
  280. binNumOfPower: 功率分仓个数。
  281. binNumOfWindSpeed:风速分仓个数
  282. TopP: 额定功率以上功率仓个数。
  283. CurveWidthR:功率主带轮廓
  284. CurveWidthL
  285. BoxMaxIndex: 修正后的最大占比网格索引
  286. 返回:
  287. BBoxRemove: 标识需剔除的网格
  288. '''
  289. BBoxRemove = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)
  290. for m in range(binNumOfPower - TopP):
  291. for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
  292. BBoxRemove[m, n] = 1
  293. for n in range(int(BoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1):
  294. BBoxRemove[m, n-1] = 2
  295. return BBoxRemove
  296. def markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,BoxPercent,BoxMaxIndex,mm,BBoxRemove,nn):
  297. '''
  298. 标记限功率网格
  299. 1:右侧欠发 2:左侧超发 3:额定功率以上超发
  300. 参数:
  301. binNumOfPower: 功率分仓个数。
  302. binNumOfWindSpeed:风速分仓个数
  303. TopP: 额定功率以上功率仓个数。
  304. CurveWidthR:功率主带轮廓
  305. PowerLimit: 标识限功率水平功率带,1:是;0:不是
  306. BoxMaxIndex: 修正后的最大占比网格索引
  307. mm: 拐点所在功率仓
  308. BBoxRemove:需剔除的网格
  309. CurveTop1:拐点对应列
  310. 返回:
  311. BBoxLimit:标识限功率网格
  312. '''
  313. BBoxLimit = np.zeros((binNumOfPower, binNumOfWindSpeed), dtype=int)
  314. for i in range(2, binNumOfPower - TopP):
  315. if PowerLimit[i] == 1:
  316. BBoxLimit[i, int(BoxMaxIndex[i] + CurveWidthR + 1):binNumOfWindSpeed] = 1
  317. IsolateValve = 3
  318. for m in range(binNumOfPower - TopP):
  319. for n in range(int(BoxMaxIndex[m]) + int(CurveWidthR), binNumOfWindSpeed):
  320. if BoxPercent[m, n] < IsolateValve:
  321. BBoxRemove[m, n] = 1
  322. for m in range(binNumOfPower - TopP, binNumOfPower):
  323. for n in range(binNumOfWindSpeed):
  324. BBoxRemove[m, n] = 3
  325. # 标记功率主带拐点左侧的欠发网格
  326. for m in range(mm-1, binNumOfPower - TopP):
  327. for n in range(int(nn) - 2):
  328. BBoxRemove[m, n] = 2
  329. return BBoxLimit
  330. def markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1):
  331. '''
  332. 根据网格标识来标记数据点
  333. 参数:
  334. nCounter1
  335. binNumOfPower: 功率分仓个数。
  336. binNumOfWindSpeed:风速分仓个数
  337. DzMarch809: array V P lab: 38181。
  338. BBoxRemove:需剔除的网格
  339. 返回:
  340. DzMarch809Sel:数组现在包含了每个数据点的标识
  341. '''
  342. DzMarch809Sel = np.zeros(nCounter1, dtype=int)
  343. nWhichP = 0
  344. nWhichV = 0
  345. for i in range(nCounter1):
  346. for m in range( binNumOfPower ):
  347. if ((DzMarch809[i,1])> m * intervalPower) and ((DzMarch809[i,1]) <= (m+1) * intervalPower):
  348. nWhichP = m #m记录的是index
  349. break
  350. for n in range( binNumOfWindSpeed ):
  351. if DzMarch809[i,0] > ((n+1) * intervalWindspeed - intervalWindspeed/2) and DzMarch809[i,0] <= ((n+1) * intervalWindspeed + intervalWindspeed / 2):
  352. nWhichV = n
  353. break
  354. if nWhichP >= 0 and nWhichV >= 0:
  355. if BBoxRemove[nWhichP, nWhichV] == 1:
  356. DzMarch809Sel[i] = 1
  357. elif BBoxRemove[nWhichP, nWhichV] == 2:
  358. DzMarch809Sel[i] = 2
  359. elif BBoxRemove[nWhichP , nWhichV] == 3:
  360. DzMarch809Sel[i] = 0
  361. return DzMarch809Sel
  362. def windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line):
  363. '''
  364. 滑动窗口方法,进一步标记数据坏点
  365. 参数:
  366. nCounter1:
  367. ratedPower:
  368. Point_line:
  369. 返回:
  370. PVLimit: 限负荷数据
  371. nLimitTotal: 是限负荷数据的总数
  372. '''
  373. PVLimit = np.zeros((nCounter1, 3))
  374. nLimitTotal = 0
  375. nWindowLength = 6
  376. LimitWindow = np.zeros(nWindowLength)
  377. UpLimit = 0
  378. LowLimit = 0
  379. PowerStd = 30
  380. nWindowNum = np.floor(nCounter1/nWindowLength)
  381. PowerLimitUp = ratedPower - 100
  382. PowerLimitLow = 100
  383. # 循环遍历每个窗口
  384. for i in range(int(nWindowNum)):
  385. start_idx = i * nWindowLength
  386. end_idx = start_idx + nWindowLength
  387. LimitWindow = DzMarch809[start_idx:end_idx, 1]
  388. bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)
  389. if not bAllInAreas:
  390. continue
  391. UpLimit = LimitWindow[0] + PowerStd
  392. LowLimit = LimitWindow[0] - PowerStd
  393. bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)
  394. if bAllInUpLow:
  395. DzMarch809Sel[start_idx:end_idx] = 4
  396. for j in range(nWindowLength):
  397. PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]
  398. PVLimit[nLimitTotal, 2] = Point_line[start_idx + j] # 对数据进行标识
  399. nLimitTotal += 1
  400. return PVLimit,nLimitTotal
  401. def store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1):
  402. """
  403. 存储好点,并返回存储好的点的数组和计数。
  404. 参数:
  405. DzMarch809: array:V P lab: 38181。
  406. DzMarch809Sel: 数组现在包含了每个数据点的标识
  407. Point_line:
  408. nCounter1:
  409. axis: 'good' or 'bad'
  410. 返回:
  411. PVDot: 数据
  412. nCounterPV: 数据个数
  413. """
  414. PVDot = np.zeros((nCounter1, 3))
  415. PVBad = np.zeros((nCounter1, 3))
  416. nCounterPV = 0
  417. nCounterBad = 0
  418. for i in range(nCounter1):
  419. if DzMarch809Sel[i] == 0:
  420. nCounterPV += 1
  421. PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]
  422. PVDot[nCounterPV-1, 2] = Point_line[i]
  423. elif DzMarch809Sel[i] in [1, 2, 3]:
  424. nCounterBad += 1
  425. PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]
  426. PVBad[nCounterBad-1, 2] = Point_line[i]
  427. return PVDot, nCounterPV,PVBad,nCounterBad
  428. def markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit):
  429. """
  430. 标记好点、坏点、限电点。
  431. 参数:
  432. nCounterPV
  433. nCounterBad
  434. dataFramePartOfSCADA
  435. PVDot
  436. PVBad
  437. SM
  438. nLimitTotal
  439. PVLimit
  440. 返回:
  441. dataFramePartOfSCADA
  442. """
  443. for i in range(nCounterPV):
  444. dataFramePartOfSCADA[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1
  445. #坏点
  446. for i in range(nCounterBad):
  447. dataFramePartOfSCADA[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5 # 坏点标识
  448. # 对所有数据中的限电点进行标注
  449. for i in range(nLimitTotal):
  450. dataFramePartOfSCADA[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4 # 限电点标识
  451. return dataFramePartOfSCADA
  452. # 4. 数据可视化
  453. def plot_data(ws:list, ap:list):
  454. fig = plt.figure()
  455. plt.scatter(ws, ap, s=1, c='black', marker='.')
  456. ax = plt.gca()
  457. ax.xaxis.set_major_locator(MultipleLocator(5))
  458. ax.yaxis.set_major_locator(MultipleLocator(500))
  459. plt.xlim((0, 30))
  460. plt.ylim((0, 2200))
  461. plt.tick_params(labelsize=8)
  462. plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8)
  463. plt.ylabel("P/kW", fontsize=8)
  464. plt.show()
  465. # 5. Main Execution
  466. def main():
  467. turbine=85
  468. basePath=r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72'
  469. filePathSCADA = r'{}\{}.csv'.format(basePath,turbine)
  470. filePathTurbineInfo = r'{}\info.csv'.format(basePath)
  471. outputFilePathOfSCADA=r"{}\labeled\labeled_{}.csv".format(basePath,turbine)
  472. dataFrameOfSCADA, turbineInfo = loadData(filePathSCADA, filePathTurbineInfo)
  473. ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed = extractTurbineParameters(turbineInfo)
  474. dataFramePartOfSCADA = preprocessData(dataFrameOfSCADA)
  475. powerMax=dataFramePartOfSCADA[fieldActivePower].max()
  476. binNumOfPower, binNumOfWindSpeed = calculateIntervals(powerMax,ratedPower,windSpeedCutOut)
  477. TopP = calculateTopP(powerMax,ratedPower)
  478. # 根据功率阈值对数据进行标签分配
  479. DzMarch809,nCounter1,dataFramePartOfSCADA,Point_line,SM = chooseData(dataFramePartOfSCADA, dataFrameOfSCADA)
  480. XBoxNumber = gridCount(binNumOfWindSpeed,binNumOfPower,nCounter1,DzMarch809)
  481. PBoxPercent,PBinSum = percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'power')
  482. VBoxPercent,VBinSum = percentageDots(XBoxNumber, binNumOfPower, binNumOfWindSpeed, 'speed')
  483. PBoxMaxIndex, PBoxMaxP = maxBoxPercentage(PBoxPercent, binNumOfPower, binNumOfWindSpeed, 'power')
  484. VBoxMaxIndex, VBoxMaxV = maxBoxPercentage(VBoxPercent, binNumOfPower, binNumOfWindSpeed, 'speed')
  485. if PBoxMaxIndex[0] > 14: PBoxMaxIndex[0] = 9
  486. DotDenseLeftRight = extendBoxPercent(90, PBoxMaxP,TopP,PBoxMaxIndex,PBoxPercent,binNumOfPower,binNumOfWindSpeed)
  487. # pdb.set_trace() # 设置断点
  488. WidthAverage, WidthAverage_L,PowerLimit = calculatePWidth(binNumOfPower,TopP,DotDenseLeftRight,PBinSum)
  489. PBoxMaxIndex = amendMaxBox(binNumOfPower,TopP,PowerLimit,PBoxMaxIndex)
  490. # 计算功率主带的左右边界
  491. CurveWidthR = np.ceil(WidthAverage) + 2
  492. CurveWidthL = np.ceil(WidthAverage_L) + 2
  493. #确定功率主带的左上拐点,即额定风速位置的网格索引
  494. CurveTop = np.zeros((2, 1), dtype=int)
  495. BTopFind = 0
  496. for m in range(binNumOfPower - TopP, 0, -1):
  497. for n in range(int(np.floor(int(windSpeedCutIn) / intervalWindspeed)), binNumOfWindSpeed - 1):
  498. if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):
  499. CurveTop[0] = m
  500. CurveTop[1] = n #[第80个,第40个]
  501. BTopFind = 1
  502. mm = m
  503. nn = n
  504. break
  505. if BTopFind == 1:
  506. break
  507. #标记网格
  508. BBoxRemove = markBoxLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,CurveWidthL,PBoxMaxIndex)
  509. BBoxLimit = markBoxPLimit(binNumOfPower,binNumOfWindSpeed,TopP,CurveWidthR,PowerLimit,PBoxPercent,PBoxMaxIndex,mm,BBoxRemove,nn)
  510. DzMarch809Sel = markData(binNumOfPower, binNumOfWindSpeed,DzMarch809,BBoxRemove,nCounter1)
  511. PVLimit,nLimitTotal = windowFilter(nCounter1,ratedPower,DzMarch809,DzMarch809Sel,Point_line)
  512. #将功率滑动窗口主带平滑化
  513. nSmooth = 0
  514. for i in range(binNumOfPower - TopP - 1):
  515. PVLeftDown = np.zeros(2)
  516. PVRightUp = np.zeros(2)
  517. if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:
  518. # 计算左下和右上顶点的坐标
  519. PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125
  520. PVLeftDown[1] = (i) * 25
  521. PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125
  522. PVRightUp[1] = (i+1) * 25
  523. for m in range(nCounter1):
  524. # 检查当前点是否在锯齿区域内
  525. if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
  526. # 检查斜率是否大于对角连线
  527. if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0]):
  528. # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器
  529. DzMarch809Sel[m] = 0
  530. nSmooth += 1
  531. # DzMarch809Sel 数组现在包含了锯齿平滑的选择结果,nSmooth 是选中的点数
  532. PVDot, nCounterPV,PVBad,nCounterBad = store_points(DzMarch809, DzMarch809Sel,Point_line, nCounter1)
  533. #标注
  534. dataFramePartOfSCADA = markAllData(nCounterPV,nCounterBad,dataFramePartOfSCADA,PVDot,PVBad,SM,nLimitTotal,PVLimit)
  535. A = dataFramePartOfSCADA[:,3]
  536. A=pd.DataFrame(A,columns=['lab'])
  537. labeledData = pd.concat([dataFrameOfSCADA,A],axis=1)
  538. D = labeledData[labeledData['lab'].isin([-1,0,1,2,3,4,5])]#选择为1的行
  539. labeledData.to_csv(outputFilePathOfSCADA,encoding='utf-8')
  540. plot_data(D[fieldWindSpeed], D[fieldActivePower])
  541. if __name__ == '__main__':
  542. main()