SCADA_10min_category_1.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. import os
  2. import re
  3. import math
  4. import pandas as pd
  5. import numpy as np
  6. import matplotlib.pyplot as plt
  7. from matplotlib.pyplot import MultipleLocator#设定固定刻度
  8. def scada_10min_category():
  9. turbine_number=24
  10. fpath = 'D:/赵雅丽/实习/算法/min_scada_LuoTuoGou/72/'
  11. # 定义一个正则表达式来匹配纯数字文件名且扩展名为.csv
  12. pattern = re.compile(r'^\d+\.csv$')
  13. # 列出指定路径下的所有文件和文件夹
  14. files_in_dir = os.listdir(fpath)
  15. for file in files_in_dir:
  16. # 使用正则表达式匹配文件名
  17. if pattern.match(file):
  18. # 拼接文件的完整路径
  19. fname = os.path.join(fpath, file)
  20. # 读取csv文件,保持原始变量名而不忽略任何行
  21. scada_10min = pd.read_csv(fname)
  22. # 显示数据
  23. time_stamp = scada_10min.loc[:,['时间']] #dataframe
  24. active_power = scada_10min.loc[:,['变频器电网侧有功功率']]
  25. wind_speed = scada_10min.loc[:,['风速']]
  26. LM = pd.concat([time_stamp,active_power,wind_speed],axis=1) #dataframe
  27. # lm=LM.values #array
  28. xx = data_label(LM,fpath)#dataframe
  29. mergedTable = pd.concat([scada_10min,xx],axis=1)#合并dataframe
  30. D = mergedTable[mergedTable['lab'] == 1]#选择为1的行
  31. ws = D["风速"]#series
  32. ap = D["变频器电网侧有功功率"]
  33. ##绘图
  34. # fig = plt.figure(figsize=(10,6),dpi=500) #figsize是图形大小,dpi像素
  35. plt.scatter(ws,ap,s=8,c='black',marker='.',label='好点')
  36. # x_major_locator=MultipleLocator(5)
  37. # y_major_locator=MultipleLocator(500)
  38. # ax=plt.gca()
  39. # ax.xaxis.set_major_locator(x_major_locator)
  40. # ax.yaxis.set_major_locator(y_major_locator)
  41. # plt.xlim((0,30))
  42. # plt.ylim((0,2200))
  43. # plt.tick_params(labelsize=20)
  44. # # plt.grid(c='dimgray',alpha=0.2)
  45. # plt.xlabel("V/(m$·$s$^{-1}$)",fontsize=20)
  46. # plt.ylabel("P/kW",fontsize=20)
  47. # # plt.savefig(r'D:\赵雅丽\研究生学习资料\学习资料\劣化度健康度\spyder\大论文\图\风速-功率.jpg',bbox_inches='tight')
  48. # plt.show()
  49. def data_label(x1,x2): # LM:T P V path:文件获取路径
  50. fpath2 = x2
  51. fname2 = os.path.join(fpath2, "info.csv") #读取数据文件2(额定风速额定功率等)
  52. # 参数na_filter=False仅阻止了pandas自动检测这些缺失值,并不能忽略
  53. # 但请注意,pandas没有直接的'omitrow'选项,如果需要忽略包含缺失值的行,需要在后续处理中处理
  54. turbine_info = pd.read_csv(fname2, na_filter=False)
  55. # 删除包含任何缺失值的行
  56. turbine_info = turbine_info.dropna()
  57. PRated = turbine_info.loc[:,["额定功率"]] #dataframe
  58. VCutOut = turbine_info.loc[:,["切出风速"]]
  59. VCutIn = turbine_info.loc[:,["切入风速"]]
  60. VRated = turbine_info.loc[:,["额定风速"]]
  61. #网格法确定风速风向分区数量,功率方向分区数量
  62. Labeled_March809 = x1
  63. APower = Labeled_March809["active_power"] #series读入有功功率
  64. WSpeed = Labeled_March809["wind_speed"] #读入风速
  65. maxP=np.max(APower)
  66. intervalP=25 #ceil(PRated*0.01)#功率分区间隔为额定功率的1%
  67. intervalwindspeed=0.25 #风速分区间隔0.25m/s
  68. #初始化
  69. PNum = 0
  70. TopP = 0
  71. # 根据条件计算PNum和TopP
  72. if maxP >= PRated:
  73. PNum = math.floor(maxP / intervalP) + 1
  74. TopP = math.floor((maxP - PRated) / intervalP) + 1
  75. else:
  76. PNum = math.floor(PRated / intervalP)
  77. TopP = 0
  78. VNum = math.ceil(VCutOut / intervalwindspeed)
  79. SM1 = Labeled_March809.shape
  80. AA1 = SM1[0] #运行数据的条数
  81. lab = [[0] for _ in range(AA1)] #创建全0空列表
  82. lab = pd.DataFrame(lab,columns=['lab'])
  83. Labeled_March809 = pd.concat([Labeled_March809,lab],axis=1) #在tpv后加一列标签列
  84. SM = Labeled_March809.shape #(52561,4)
  85. AA = SM[0]
  86. #存储功率大于0的运行数据
  87. #标识功率为0的点,标识-1
  88. DzMarch809_0 = np.zeros(AA, 3) #array(52561,3)
  89. nCounter1 = 1
  90. Point_line=np.zeros(AA,1)
  91. #考虑到很多功率小于10的数据存在,将<10的功率视为0
  92. for i in range(AA):
  93. if (APower[i] > 10) & (WSpeed[i] > 0):
  94. nCounter1 += 1 #共有nCounter1个功率大于0的正常数据
  95. DzMarch809_0[nCounter1-1, 0] = WSpeed[i]
  96. DzMarch809_0[nCounter1-1, 1] = APower[i]
  97. Point_line[nCounter1-1] = i+1 # 记录nCounter1记下的数据在原始数据中的位置
  98. if APower[i] <= 10:
  99. Labeled_March809[i,SM[1]-1] = -1 # 功率为0标识为-1 array类型
  100. # 截取DzMarch809_0中实际存储的数据 其他全为0
  101. DzMarch809 = DzMarch809_0[:nCounter1, :]
  102. #统计各网格落入的散点个数
  103. XBoxNumber = np.ones((PNum, VNum),dtype=int) #(86 100)
  104. nWhichP = 0
  105. nWhichV = 0
  106. # 循环遍历DzMarch809中的有效数据
  107. for i in range(nCounter1):
  108. # 查找功率所在的区间
  109. for m in range(1, PNum + 1): # 注意Python的range是左闭右开的,所以需要+1
  110. if (DzMarch809[i,1] > (m - 1) * intervalP) and (DzMarch809[i,1] <= m * intervalP):
  111. nWhichP = m
  112. break
  113. # 查找风速所在的区间
  114. for n in range(1, VNum + 1): # 同样需要+1
  115. if (DzMarch809[i, 0] > (n - 1)*intervalwindspeed) and (DzMarch809[i, 0] <= n*intervalwindspeed):
  116. nWhichV = n
  117. break
  118. # 如果功率和风速都在有效区间内,增加对应网格的计数
  119. if (nWhichP > 0) and (nWhichV > 0):
  120. XBoxNumber[nWhichP - 1, nWhichV - 1] += 1 # 注意Python的索引是从0开始的,所以需要减1
  121. # XBoxNumber现在包含了每个网格的计数[PNum行, VNum列]
  122. for m in range(1,PNum+1):
  123. for n in range(1,VNum+1):
  124. XBoxNumber[m-1,n-1] = XBoxNumber[m-1,n-1] - 1
  125. #在功率方向将网格内散点绝对个数转换为相对百分比,备用
  126. PBoxPercent = np.zeros((PNum, VNum)) #(86 100) #计算后会出现浮点型,所以不能定义int类型
  127. PBinSum = np.zeros((PNum,1),dtype=int)
  128. for i in range(1,PNum+1):
  129. for m in range(1,VNum+1):
  130. PBinSum[i-1] = PBinSum[i-1] + XBoxNumber[i-1,m-1]
  131. for m in range(1,VNum+1):
  132. if PBinSum[i-1]>0:
  133. PBoxPercent[i-1,m-1] = (XBoxNumber[i-1,m-1] / PBinSum[i-1])*100
  134. #在风速方向将网格内散点绝对个数转换为相对百分比,备用
  135. VBoxPercent = np.zeros((PNum, VNum)) #(86 100) #计算后会出现浮点型,所以不能定义int类型
  136. VBinSum = np.zeros((VNum,1),dtype=int)
  137. for i in range(1,VNum+1):
  138. for m in range(1,PNum+1):
  139. VBinSum[i-1] = VBinSum[i-1] + XBoxNumber[m-1,i-1]
  140. for m in range(1,PNum+1):
  141. if VBinSum[i-1]>0:
  142. VBoxPercent[m-1,i-1] = (XBoxNumber[m-1,i-1] / VBinSum[i-1])*100
  143. # VBoxPercent PBoxPercent 左上-右下
  144. # 将数据颠倒一下 左下-右上 第一行换为倒数第一行 方便可视化
  145. InvXBoxNumber = np.zeros((PNum,VNum),dtype = int)
  146. InvPBoxPercent = np.zeros((PNum,VNum),dtype = float)
  147. InvVBoxPercent = np.zeros((PNum,VNum),dtype = float)
  148. for m in range(1,PNum+1):
  149. for n in range(1,VNum+1):
  150. InvXBoxNumber[m-1,n-1] = XBoxNumber[PNum-(m-1)-1,n-1]
  151. InvPBoxPercent[m-1,n-1] = PBoxPercent[PNum-(m-1)-1,n-1]
  152. InvVBoxPercent[m-1,n-1] = VBoxPercent[PNum-(m-1)-1,n-1]
  153. #以水平功率带方向为准,分析每个水平功率带中,功率主带中心,即找百分比最大的网格位置。
  154. PBoxMaxIndex = np.zeros((PNum,1),dtype = int) #水平功率带最大网格位置索引
  155. PBoxMaxP = np.zeros((PNum,1),dtype = float) #水平功率带最大网格百分比
  156. for m in range(1,PNum+1):
  157. PBoxMaxIndex[m-1] = np.argmax(PBoxPercent[m-1, :]) #argmax返回最大值的索引
  158. PBoxMaxP[m-1] = np.max(PBoxPercent[m-1, :])
  159. #以垂直风速方向为准,分析每个垂直风速带中,功率主带中心,即找百分比最大的网格位置。
  160. VBoxMaxIndex = np.zeros((VNum,1),dtype = int)
  161. VBoxMaxV = np.zeros((VNum,1),dtype = float)
  162. for m in range(1,VNum+1):
  163. VBoxMaxIndex[m-1] = np.argmax(VBoxPercent[:, m-1])
  164. VBoxMaxV[m-1] = np.max(VBoxPercent[:, m-1])
  165. #切入风速特殊处理,如果切入风速过于偏右,向左拉回
  166. if PBoxMaxIndex[0]>14: #第一个值对应的是风速最小处 即切入风速
  167. PBoxMaxIndex[0] = 9
  168. #以水平功率带方向为基准,进行分析
  169. DotDense = np.zeros(PNum) #每一水平功率带的功率主带包含的网格数
  170. DotDenseLeftRight = np.zeros((PNum,2)) #存储每一水平功率带的功率主带以最大网格为中心,向左,向右扩展的网格数
  171. DotValve = 90 #从中心向左右对称扩展网格的散点百分比和的阈值。
  172. PDotDenseSum = 0
  173. for i in range(PNum - TopP): # 从最下层水平功率带开始,向上分析到特定的功率带
  174. PDotDenseSum = PBoxMaxP[i] # 以中心最大水平功率带为基准,向左向右对称扩展网格,累加各网格散点百分比
  175. iSpreadRight = 1
  176. iSpreadLeft = 1
  177. while PDotDenseSum < DotValve:
  178. if (PBoxMaxIndex[i] + iSpreadRight) < VNum-1-1:
  179. PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] + iSpreadRight] # 向右侧扩展
  180. iSpreadRight += 1
  181. else:
  182. break
  183. if (PBoxMaxIndex[i] - iSpreadLeft) > 0:
  184. PDotDenseSum += PBoxPercent[i, PBoxMaxIndex[i] - iSpreadLeft] # 向左侧扩展
  185. iSpreadLeft += 1
  186. else:
  187. break
  188. iSpreadRight = iSpreadRight-1
  189. iSpreadLeft = iSpreadLeft-1
  190. #向左右扩展完毕
  191. DotDenseLeftRight[i, 0] = iSpreadLeft # 左
  192. DotDenseLeftRight[i, 1] = iSpreadRight # 右
  193. DotDense[i] = iSpreadLeft + iSpreadRight + 1 # 记录向左向右扩展的个数及每个功率仓内网格的个数
  194. # 此时DotDense和DotDenseLeftRight数组已经包含了所需信息
  195. #各行功率主带右侧宽度的中位数最具有代表性(因为先右后左)
  196. DotDenseWidthLeft = np.zeros((PNum-TopP))
  197. for i in range(PNum-TopP):
  198. DotDenseWidthLeft[i] = DotDenseLeftRight[i,1] #DotDenseLeftRight[i,1]:向右延伸个数
  199. MainBandRight = np.median(DotDenseWidthLeft) #计算中位数
  200. # 初始化变量
  201. PowerLimit = np.zeros(PNum, dtype=int) # 各水平功率带是否为限功率标识,1:是;0:不是
  202. WidthAverage = 0 # 功率主带右侧平均宽度
  203. WidthAverage_L = 0 # 功率主带左侧平均宽度
  204. WidthVar = 0 # 功率主带方差(此变量在提供的代码中并未使用)
  205. PowerLimitValve = 6 # 限功率主带判别阈值
  206. N_Pcount = 20 # 阈值
  207. nCounterLimit = 0 # 限功率的个数
  208. nCounter = 0 # 正常水平功率带的个数
  209. # 循环遍历水平功率带,从第1个到第PNum-TopP个
  210. for i in range(PNum - TopP):
  211. # 如果向右扩展网格数大于阈值,且该水平功率带点总数大于20,则标记为限功率带
  212. if (DotDenseLeftRight[i, 1] > PowerLimitValve) and (PBinSum[i] > N_Pcount):
  213. PowerLimit[i] = 1
  214. nCounterLimit += 1 #限功率的个数
  215. # 如果向右扩展网格数小于等于阈值,则累加右侧宽度(左侧宽度在代码中似乎有误)
  216. if DotDenseLeftRight[i, 1] <= PowerLimitValve:
  217. WidthAverage += DotDenseLeftRight[i, 1] # 统计正常水平功率带右侧宽度
  218. WidthAverage_L += DotDenseLeftRight[i,1] #统计正常水平功率带左侧宽度
  219. nCounter += 1
  220. # 计算平均宽度
  221. WidthAverage /= nCounter if nCounter > 0 else 1 # 避免除以0的情况
  222. WidthAverage_L /= nCounter if nCounter > 0 else 1
  223. #计算正常(即非限功率)水平功率带的功率主带宽度的方差,以此来反映从下到上宽度是否一致
  224. WidthVar = 0 # 功率主带宽度的方差
  225. for i in range(PNum - TopP):
  226. # 如果向右扩展网格数小于等于阈值,则计算当前宽度与平均宽度的差值平方
  227. if DotDenseLeftRight[i, 1] <= PowerLimitValve:
  228. WidthVar += (DotDenseLeftRight[i, 1] - WidthAverage) ** 2
  229. # 计算方差(注意:除以nCounter-1是为了得到样本方差)
  230. WidthVar = np.sqrt(WidthVar / (nCounter - 1) if nCounter > 1 else 0) # 避免除以0的情况
  231. #各水平功率带,功率主带的风速范围,右侧扩展网格数*2*0.25
  232. PowerBandWidth = WidthAverage*intervalwindspeed+WidthAverage_L*intervalwindspeed
  233. # 对限负荷水平功率带的最大网格进行修正
  234. for i in range(1, PNum - TopP+1):
  235. if (PowerLimit[i] == 1) and (abs(PBoxMaxIndex[i] - PBoxMaxIndex[i - 1]) > 5):
  236. PBoxMaxIndex[i] = PBoxMaxIndex[i - 1] + 1
  237. # 输出各层功率主带的左右边界网格索引
  238. DotDenseInverse = np.flipud(DotDenseLeftRight) # 上下翻转数组以得到反向顺序
  239. # 计算功率主带的左右边界
  240. CurveWidthR = np.ceil(WidthAverage) + 2 # 功率主带的右边界 + 2
  241. CurveWidthL = np.ceil(WidthAverage_L) + 2 # 功率主带的左边界 + 2
  242. # 网格是否为限功率网格的标识数组
  243. BBoxLimit = np.zeros((PNum, VNum), dtype=int)
  244. # 标记限功率网格
  245. for i in range(2, PNum - TopP):
  246. if PowerLimit[i] == 1:
  247. BBoxLimit[i, int(PBoxMaxIndex[i] + CurveWidthR + 1):VNum] = 1
  248. # 初始化数据异常需要剔除的网格标识数组
  249. BBoxRemove = np.zeros((PNum, VNum), dtype=int)
  250. # 标记需要剔除的网格
  251. for m in range(PNum - TopP):
  252. for n in range(int(PBoxMaxIndex[m]) + int(CurveWidthR), VNum): # 注意Python中的索引从0开始,因此需要减去1
  253. BBoxRemove[m, n] = 1
  254. # 功率主带左侧的超发网格,从最大索引向左直到第一个网格
  255. for n in range(int(PBoxMaxIndex[m]) - int(CurveWidthL)+1, 0, -1): # 使用range的步长参数来实现从右向左的迭代
  256. BBoxRemove[m, n-1] = 2 # 注意Python中的索引从0开始,因此需要减去1
  257. # 初始化变量
  258. CurveTop = np.zeros((2, 1), dtype=int)
  259. CurveTopValve = 1 # 网格的百分比阈值
  260. BTopFind = 0
  261. mm = 0
  262. #确定功率主带的左上拐点,即额定风速位置的网格索引
  263. CurveTop = np.zeros((2, 1), dtype=int)
  264. CurveTopValve = 1 # 网格的百分比阈值
  265. BTopFind = 0
  266. mm = 0
  267. for m in range(PNum - TopP, 0, -1): # 注意Python的range是左闭右开区间,所以这里从PNum-TopP开始到1(不包括0)
  268. for n in range(int(np.floor(int(VCutIn) / intervalwindspeed)), VNum - 1): # 使用floor函数来向下取整
  269. if (VBoxPercent[m, n - 1] < VBoxPercent[m, n]) and (VBoxPercent[m, n] <= VBoxPercent[m, n + 1]) and (XBoxNumber[m, n] >= 3):
  270. CurveTop[0] = m
  271. CurveTop[1] = n #[第80个,第40个]
  272. BTopFind = 1
  273. mm = m # mm是拐点所在功率仓,对应其index
  274. break # 找到后退出内层循环
  275. if BTopFind == 1:
  276. break # 找到后退出外层循环
  277. IsolateValve = 3 #功率主带右侧孤立点占比功率仓阈值 3%
  278. # 遍历功率仓和网格
  279. for m in range(PNum - TopP):
  280. for n in range(int(PBoxMaxIndex[m]) + int(CurveWidthR), VNum):
  281. # 检查PBoxPercent是否小于阈值,如果是,则标记BBoxRemove为1
  282. if PBoxPercent[m, n] < IsolateValve:
  283. BBoxRemove[m, n] = 1
  284. #功率主带顶部宽度
  285. CurveWidthT = np.floor((maxP - PRated) / intervalP) + 1
  286. # 标记额定功率以上的超发点(PNum-PTop之间)
  287. for m in range(PNum - TopP, PNum):
  288. for n in range(VNum):
  289. BBoxRemove[m, n] = 3
  290. # 标记功率主带拐点左侧的欠发网格
  291. for m in range(mm-1, PNum - TopP):
  292. for n in range(int(CurveTop[1]) - 2):
  293. BBoxRemove[m, n] = 2 # BBoxRemove数组现在包含了根据条件标记的超发点和欠发网格的信息
  294. #以网格的标识,决定该网格内数据的标识。
  295. # DzMarch809Sel数组现在包含了每个数据点的标识
  296. DzMarch809Sel = np.zeros(nCounter1, dtype=int) # 初始化标识数组
  297. nWhichP = 0
  298. nWhichV = 0
  299. nBadA = 0
  300. for i in range(nCounter1):
  301. for m in range( PNum ):
  302. if (DzMarch809[i, 1] > m * intervalP) and (DzMarch809[i, 1] <= (m+1) * intervalP):
  303. nWhichP = m #m记录的是index
  304. break
  305. for n in range( VNum ): # 注意Python的range是左闭右开区间,所以这里到VNum+1
  306. if DzMarch809[i, 0] > ((n+1) * intervalwindspeed - intervalwindspeed/2) and DzMarch809[i, 0] <= ((n+1) * intervalwindspeed + intervalwindspeed / 2):
  307. nWhichV = n #index
  308. break
  309. if nWhichP >= 0 and nWhichV >= 0:
  310. if BBoxRemove[nWhichP, nWhichV] == 1:
  311. DzMarch809Sel[i] = 1
  312. nBadA += 1
  313. elif BBoxRemove[nWhichP, nWhichV] == 2:
  314. DzMarch809Sel[i] = 2
  315. elif BBoxRemove[nWhichP , nWhichV] == 3:
  316. DzMarch809Sel[i] = 0 # 额定风速以上的超发功率点认为是正常点,不再标识
  317. # DzMarch809Sel数组现在包含了每个数据点的标识
  318. ##############################滑动窗口方法
  319. # 存储限负荷数据
  320. PVLimit = np.zeros((nCounter1, 3)) #存储限负荷数据 %第3列用于存储限电的点所在的行数
  321. nLimitTotal = 0
  322. nWindowLength = 6 #滑动窗口长度设置为6
  323. LimitWindow = np.zeros(nWindowLength) #滑动窗口空列表
  324. UpLimit = 0 #上限
  325. LowLimit = 0 #下限
  326. PowerStd = 30 # 功率波动方差
  327. nWindowNum = np.floor(nCounter1/nWindowLength) #6587
  328. PowerLimitUp = PRated - 100
  329. PowerLimitLow = 100
  330. # 循环遍历每个窗口
  331. for i in range(int(nWindowNum)):
  332. start_idx = i * nWindowLength
  333. end_idx = start_idx + nWindowLength
  334. LimitWindow = DzMarch809[start_idx:end_idx, 1] # 提取当前窗口的数据
  335. # 检查窗口内所有数据是否在功率范围内
  336. bAllInAreas = np.all(LimitWindow >= PowerLimitLow) and np.all(LimitWindow <= PowerLimitUp)
  337. if not bAllInAreas:
  338. continue
  339. # 计算方差上下限
  340. UpLimit = LimitWindow[0] + PowerStd
  341. LowLimit = LimitWindow[0] - PowerStd
  342. # 检查窗口内数据是否在方差范围内
  343. bAllInUpLow = np.all(LimitWindow >= LowLimit) and np.all(LimitWindow <= UpLimit)
  344. if bAllInUpLow:
  345. # 标识窗口内的数据为限负荷数据
  346. DzMarch809Sel[start_idx:end_idx] = 4
  347. # 存储限负荷数据
  348. for j in range(nWindowLength):
  349. PVLimit[nLimitTotal, :2] = DzMarch809[start_idx + j, :2]
  350. PVLimit[nLimitTotal, 2] = Point_line[start_idx + j] # 对数据进行标识
  351. nLimitTotal += 1
  352. # PVLimit现在包含了限负荷数据,nLimitTotal是限负荷数据的总数
  353. #将功率滑动窗口主带平滑化
  354. # 初始化锯齿平滑的计数器
  355. nSmooth = 0
  356. # 遍历除了最后 TopP+1 个元素之外的所有 PBoxMaxIndex 元素
  357. for i in range(PNum - TopP - 1):
  358. PVLeftDown = np.zeros(2)
  359. PVRightUp = np.zeros(2)
  360. # 检查当前与下一个 PBoxMaxIndex 之间的距离是否大于等于1
  361. if PBoxMaxIndex[i + 1] - PBoxMaxIndex[i] >= 1:
  362. # 计算左下和右上顶点的坐标
  363. PVLeftDown[0] = (PBoxMaxIndex[i]+1 + CurveWidthR) * 0.25 - 0.125
  364. PVLeftDown[1] = (i) * 25
  365. PVRightUp[0] = (PBoxMaxIndex[i+1]+1 + CurveWidthR) * 0.25 - 0.125
  366. PVRightUp[1] = (i+1) * 25
  367. # 遍历 DzMarch809 数组
  368. for m in range(nCounter1):
  369. # 检查当前点是否在锯齿区域内
  370. if (DzMarch809[m, 0] > PVLeftDown[0]) and (DzMarch809[m, 0] < PVRightUp[0]) and (DzMarch809[m, 1] > PVLeftDown[1]) and (DzMarch809[m, 1] < PVRightUp[1]):
  371. # 检查斜率是否大于对角连线
  372. if (DzMarch809[m, 1] - PVLeftDown[1]) / (DzMarch809[m, 0] - PVLeftDown[0]) > (PVRightUp[1] - PVLeftDown[1]) / (PVRightUp[0] - PVLeftDown[0]):
  373. # 如果在锯齿左上三角形中,则选中并增加锯齿平滑计数器
  374. DzMarch809Sel[m] = 0
  375. nSmooth += 1
  376. # DzMarch809Sel 数组现在+包含了锯齿平滑的选择结果,nSmooth 是选中的点数
  377. ###################################存储数据
  378. # 存储好点
  379. nCounterPV = 0 # 初始化计数器
  380. PVDot = np.zeros((nCounter1, 3)) # 初始化存储好点的数组 nCounter1是p>0的数
  381. for i in range(nCounter1):
  382. if DzMarch809Sel[i] == 0:
  383. nCounterPV += 1
  384. PVDot[nCounterPV-1, :2] = DzMarch809[i, :2]
  385. PVDot[nCounterPV-1, 2] = Point_line[i] # 好点 Point_line记录nCounter1在原始数据中的位置
  386. nCounterVP = nCounterPV
  387. # 对所有数据中的好点进行标注
  388. for i in range(nCounterVP):
  389. Labeled_March809[int(PVDot[i, 2] - 1), (SM[1]-1)] = 1 # 注意Python的索引从0开始,并且需要转换为整数索引
  390. # 存储坏点
  391. nCounterBad = 0 # 初始化计数器
  392. PVBad = np.zeros((nCounter1, 3)) # 初始化存储坏点的数组
  393. for i in range(nCounter1):
  394. if DzMarch809Sel[i] in [1, 2, 3]:
  395. nCounterBad += 1
  396. PVBad[nCounterBad-1, :2] = DzMarch809[i, :2]
  397. PVBad[nCounterBad-1, 2] = Point_line[i]
  398. # 对所有数据中的坏点进行标注
  399. for i in range(nCounterBad):
  400. Labeled_March809[int(PVBad[i, 2] - 1),(SM[1]-1)] = 5 # 坏点标识
  401. # 对所有数据中的限电点进行标注
  402. for i in range(nLimitTotal):
  403. Labeled_March809[int(PVLimit[i, 2] - 1),(SM[1]-1)] = 4 # 限电点标识
  404. # 对所有的数据点进行标注
  405. # Labeled_March809是array,提取所第四列的值保存为dataframe
  406. A = Labeled_March809[:,3]
  407. A=pd.DataFrame(A,columns=['lab'])
  408. return A
  409. # scada_10min_category()