SCADA_10min_category_2.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. import os
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. from matplotlib.pyplot import MultipleLocator
  6. import math
  7. intervalPower = 25 # For example
  8. intervalWindspeed = 0.25 # For example
  9. fieldRatedPower="额定功率"
  10. fieldRatedWindSpeed="额定风速"
  11. fieldWindSpeedCutIn="切入风速"
  12. fieldWindSpeedCutOut="切出风速"
  13. fieldTime="时间"
  14. fieldWindSpeed="风速"
  15. fieldActivePower="变频器电网侧有功功率"
  16. fieldLabel="lab"
  17. # 1. 数据加载和预处理函数
  18. def loadData(filePathSCADA:str, filePathTurbineInfo:str):
  19. dataFrameSCADA = pd.read_csv(filePathSCADA, encoding="utf-8")
  20. dataFrameTurbineInfo = pd.read_csv(filePathTurbineInfo)
  21. return dataFrameSCADA, dataFrameTurbineInfo
  22. def extractTurbineParameters(turbineInfo:pd.DataFrame):
  23. """
  24. 解析风电机组参数
  25. 参数:
  26. turbineInfo 风电机组信息DataFrame
  27. 返回:
  28. PRated 额定功率(kw)
  29. VCutOut 切出风速(m/s)
  30. VCutIn 切入风速(m/s)
  31. VRated 额定风速(m/s)
  32. """
  33. ratedPower = turbineInfo.loc[:, [fieldRatedPower]].values
  34. windSpeedCutIn = turbineInfo.loc[:, [fieldWindSpeedCutIn]].values
  35. windSpeedCutOut = turbineInfo.loc[:, [fieldWindSpeedCutOut]].values
  36. ratedWindSpeed = turbineInfo.loc[:, [fieldRatedWindSpeed]].values
  37. return ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed
  38. def preprocessData(dataFrameOfSCADA:pd.DataFrame):
  39. """
  40. 获取机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
  41. 参数:
  42. dataFrameOfSCADA 机组SCADA数据
  43. 返回:
  44. 由机组SCADA数据的 时间、有功功率、风速,构建新的DataFrame变量
  45. """
  46. timeStamp = dataFrameOfSCADA.loc[:, ['时间']]
  47. activePower = dataFrameOfSCADA.loc[:, ['变频器电网侧有功功率']]
  48. windSpeed = dataFrameOfSCADA.loc[:, ['风速']]
  49. dataFramePartOfSCADA = pd.concat([timeStamp, activePower, windSpeed], axis=1)
  50. dataFramePartOfSCADA[fieldLabel]=0
  51. dataFramePartOfSCADA[fieldLabel]=dataFramePartOfSCADA[fieldLabel].astype(int)
  52. return dataFramePartOfSCADA
  53. # 2. 数据标签分配和分箱计算
  54. def calculateIntervals(activePowerMax, ratedPower, windSpeedCutOut):
  55. """
  56. 按有功功率(以25kw为间隔)、风速(以0.25m/s为间隔)分仓
  57. 参数:
  58. max_power 当前机组的有功功率最大值
  59. PRated 机组额定功率
  60. wind_speed_cutout 切出风速
  61. 返回:
  62. interval_power 有功功率分仓间隔
  63. interval_windspeed 风速分仓间隔
  64. PNum 有功功率分仓数量
  65. VNum 风速分仓数量
  66. """
  67. binNumOfPower = math.floor(activePowerMax / intervalPower) + 1 if activePowerMax >= ratedPower else math.floor(ratedPower / intervalPower)
  68. binNumOfWindSpeed = math.ceil(windSpeedCutOut / intervalWindspeed)
  69. return binNumOfPower, binNumOfWindSpeed
  70. def labelData(dataFramePartOfSCADA:pd.DataFrame, conditions):
  71. """
  72. 根据特定条件对数据进行标签分配,例如功率和风速阈值。
  73. 参数:
  74. LM (DataFrame): 包含功率和风速数据的DataFrame。
  75. conditions (dict): 字典,键为条件名称,值为相应的阈值。
  76. 返回:
  77. DataFrame: 带有新的'label'列的原始DataFrame。
  78. """
  79. # 初始化标签列
  80. dataFramePartOfSCADA['label'] = 0
  81. # 根据条件进行数据标签分配
  82. for condition, threshold in conditions.items():
  83. if condition == 'power_below':
  84. dataFramePartOfSCADA.loc[dataFramePartOfSCADA[fieldActivePower] <= threshold, 'label'] = -1
  85. elif condition == 'power_above':
  86. dataFramePartOfSCADA.loc[dataFramePartOfSCADA[fieldActivePower] >= threshold, 'label'] = 1
  87. return dataFramePartOfSCADA
  88. def computeBins(data, intervals):
  89. """为给定数据计算统计箱。
  90. 参数:
  91. data (DataFrame): 需要进行分箱的数据。
  92. intervals (dict): 字典,为每个列指定间隔大小。
  93. 返回:
  94. DataFrame: 分箱数据作为区间内的计数或百分比。
  95. """
  96. binsResults = {}
  97. for column, interval in intervals.items():
  98. minValue = data[column].min()
  99. maxValue = data[column].max()
  100. bins = np.arange(minValue, maxValue + interval, interval)
  101. binnedData = pd.cut(data[column], bins, include_lowest=True)
  102. binCounts = pd.value_counts(binnedData, sort=False)
  103. binsResults[column] = binCounts
  104. return pd.DataFrame(binsResults)
  105. # 3. 应用标签函数
  106. def applyLabels(data, labels):
  107. """根据外部或计算出的标签对数据应用标签。
  108. 参数:
  109. data (DataFrame): 需要应用标签的数据。
  110. labels (Series或array): 应用的标签;必须与数据的索引或长度相匹配。
  111. 返回:
  112. DataFrame: 应用标签后的数据。
  113. """
  114. data['label'] = labels
  115. return data
  116. # 4. 数据可视化
  117. def plot_data(ws:list, ap:list):
  118. fig = plt.figure()
  119. plt.scatter(ws, ap, s=1, c='black', marker='.')
  120. ax = plt.gca()
  121. ax.xaxis.set_major_locator(MultipleLocator(5))
  122. ax.yaxis.set_major_locator(MultipleLocator(500))
  123. plt.xlim((0, 30))
  124. plt.ylim((0, 2200))
  125. plt.tick_params(labelsize=8)
  126. plt.xlabel("V/(m$·$s$^{-1}$)", fontsize=8)
  127. plt.ylabel("P/kW", fontsize=8)
  128. plt.show()
  129. # 5. Main Execution
  130. def main():
  131. turbine=82
  132. filePathSCADA = r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\{}.csv'.format(turbine)
  133. filePathTurbineInfo = r'E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\info.csv'
  134. outputFilePathOfSCADA=r"E:\BaiduNetdiskDownload\test\min_scada_LuoTuoGou\72\labeled\labeled_{}.csv".format(turbine)
  135. dataFrameOfSCADA, turbineInfo = loadData(filePathSCADA, filePathTurbineInfo)
  136. ratedPower, windSpeedCutOut, windSpeedCutIn, ratedWindSpeed = extractTurbineParameters(turbineInfo)
  137. dataFramePartOfSCADA = preprocessData(dataFrameOfSCADA)
  138. powerMax=dataFramePartOfSCADA[fieldActivePower].max()
  139. binNumOfPower, binNumOfWindSpeed=calculateIntervals(powerMax,ratedPower,windSpeedCutOut)
  140. # 根据功率阈值对数据进行标签分配
  141. conditions = {'power_below': 10, 'power_above': ratedPower[0][0]}
  142. labeledData = labelData(dataFramePartOfSCADA, conditions)
  143. # 为功率和风速计算分箱
  144. intervals = {fieldActivePower: 100, fieldWindSpeed: 1}
  145. binnedData = computeBins(labeledData, intervals)
  146. # 应用标签(假设某些外部标签被提供或在其他地方计算)
  147. externalLabels = np.random.choice([0, 1], size=len(labeledData)) # 随机示例
  148. labeledData = applyLabels(labeledData, externalLabels)
  149. labeledData.to_csv(outputFilePathOfSCADA)
  150. plot_data(labeledData[fieldWindSpeed], labeledData[fieldActivePower])
  151. if __name__ == '__main__':
  152. main()