productionIndicatorAnalyst.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. import os
  2. import numpy as np
  3. import pandas as pd
  4. from scipy.interpolate import interp1d
  5. import plotly.graph_objects as go
  6. from algorithmContract.confBusiness import *
  7. from algorithmContract.contract import Contract
  8. from behavior.analystNotFilter import AnalystNotFilter
  9. from utils.jsonUtil import JsonUtil
  10. class ProductionIndicatorAnalyst(AnalystNotFilter):
  11. """
  12. 风电机组功率曲线散点分析。
  13. 10分钟数据进行分析
  14. """
  15. def typeAnalyst(self):
  16. return "production_indicator"
  17. def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes):
  18. dictionary = self.processTurbineData(turbineCodes, conf, [
  19. Field_DeviceCode, Field_Time, Field_WindSpeed, Field_ActiverPower,Field_LableFlag])
  20. dataFrameOfTurbines = self.userDataFrame(
  21. dictionary, conf.dataContract.configAnalysis, self)
  22. # 检查所需列是否存在
  23. required_columns = {Field_WindSpeed, Field_ActiverPower,Field_LableFlag}
  24. if not required_columns.issubset(dataFrameOfTurbines.columns):
  25. raise ValueError(f"DataFrame缺少必要的列。需要的列有: {required_columns}")
  26. turbrineInfos = self.common.getTurbineInfos(
  27. conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
  28. groupedOfTurbineModel = turbrineInfos.groupby(Field_MillTypeCode)
  29. resultsoftotal=[]
  30. returnDatas = []
  31. for turbineModelCode, group in groupedOfTurbineModel:
  32. currTurbineCodes = group[Field_CodeOfTurbine].unique().tolist()
  33. currTurbineModeInfo = self.common.getTurbineModelByCode(
  34. turbineModelCode, self.turbineModelInfo)
  35. # 获取合同功率曲线数据
  36. dataFrameOfContractPowerCurve = self.dataFrameContractOfTurbine[
  37. self.dataFrameContractOfTurbine[Field_MillTypeCode] == turbineModelCode]
  38. currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
  39. currTurbineCodes)]
  40. currDataFrameOfTurbines[Field_RatedPowerSUM] = self.currPowerFarmInfo[Field_RatedPowerSUM]
  41. results,result_df=self.get_result(currDataFrameOfTurbines,outputAnalysisDir,conf,currTurbineModeInfo,dataFrameOfContractPowerCurve)
  42. resultsoftotal.append(results)
  43. returnDatas.append(result_df)
  44. returnResult = pd.concat(returnDatas, ignore_index=True)
  45. # 计算总场站指标
  46. Result_df = pd.concat(resultsoftotal, ignore_index=True)
  47. results_total=self.get_total_result(Result_df, outputAnalysisDir, conf)
  48. # 连接机组生产指标以及总场站生产指标
  49. combined_df = pd.concat([results_total, returnResult], axis=0, ignore_index=True)
  50. return combined_df
  51. def calculate_metrics(self, df, dataFrameGuaranteePowerCurve):
  52. '''
  53. 计算机组生产效能指标
  54. '''
  55. # 实发电量
  56. EPActualTotal = (df.loc[df[Field_ActiverPower] >= 0, Field_ActiverPower] / 6).sum(skipna=True)
  57. # 场站总额定功率
  58. RatedPowerSUM=df[Field_RatedPowerSUM].iloc[0]
  59. # 风电机组利用小时
  60. Thi=EPActualTotal/self.turbineInfo[Field_RatedPower].iloc[0]
  61. # 弃风电量所需变量(Qdl)
  62. if self.turbineInfo[Field_Sightcing].iloc[0]==1:#是否为样板机
  63. Qyb=EPActualTotal
  64. # 限电时段理论发电量
  65. Qdl=(Qyb/self.turbineInfo[Field_RatedPower].iloc[0])*RatedPowerSUM
  66. else:
  67. Qdl=0
  68. # 平均风速
  69. WindSpeedAvr = df[Field_WindSpeed].mean()
  70. # 切入风速
  71. CutInWS = self.turbineModelInfo[Field_CutInWS]
  72. cut_in_ws=CutInWS.iloc[0]
  73. # 风机可利用率
  74. nShouldGP = (df[Field_WindSpeed] >= cut_in_ws).sum()
  75. nRealGP = ((df[Field_WindSpeed] >= cut_in_ws) & (df[Field_ActiverPower] > 0)).sum()
  76. TurbineRunRate = (nRealGP / nShouldGP) * 100 if nShouldGP > 0 else 0
  77. print("风机可利用率:", TurbineRunRate)
  78. print(df[Field_WindSpeed].max())
  79. df.dropna(subset=[Field_WindSpeed], inplace=True)
  80. df.reset_index(drop=True, inplace=True)
  81. # 计算应发电量
  82. EPIdealTotalAAA = 0
  83. for i in range(df.shape[0]):
  84. # if df.loc[i, Field_ActiverPower] >= 0:
  85. nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i,Field_WindSpeed], side='right') - 1
  86. if 0 <= nWhichBin< dataFrameGuaranteePowerCurve.shape[0]-1:
  87. IdealPower = np.interp(df.loc[i,Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve.loc[:,Field_ActiverPower])
  88. EPIdealTotalAAA += IdealPower / 6
  89. # 风机能量利用率
  90. TurbinePowerRate=EPActualTotal/EPIdealTotalAAA*100
  91. # 计算停机损失
  92. EPLostStopTotal = 0
  93. for i in range(df.shape[0]):
  94. if df.loc[i, Field_LableFlag] == -1:
  95. nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i,Field_WindSpeed], side='right') - 1
  96. if 0 <= nWhichBin < dataFrameGuaranteePowerCurve.shape[0] - 1:
  97. IdealPower = np.interp(df.loc[i,Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower])
  98. EPLostStopTotal += IdealPower / 6
  99. # 计算欠发损失
  100. EPLostBadTotal = 0
  101. for i in range(df.shape[0]):
  102. if df.loc[i, Field_LableFlag] == 1:
  103. nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i, Field_WindSpeed], side='right') - 1
  104. if 0 <= nWhichBin < dataFrameGuaranteePowerCurve.shape[0] - 1:
  105. IdealPower = np.interp(df.loc[i, Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower])
  106. EPLostBadTotal += abs(IdealPower - df.loc[i, Field_ActiverPower]) / 6
  107. # 额定风速以上超发功率点
  108. EPOverTotal = 0
  109. for i in range(df.shape[0]):
  110. if df.loc[i, Field_LableFlag] == 3:
  111. EPOverTotal += (df.loc[i, Field_ActiverPower] - self.turbineInfo[Field_RatedPower].iloc[0]) / 6
  112. # 构造好点数据集
  113. dfGoodPoint = df[df[Field_LableFlag] == 0].reset_index(drop=True)
  114. # 计算功率曲线未达标损失电量
  115. EPLostPerformTotal = 0
  116. for i in range(dfGoodPoint.shape[0]):
  117. nWhichBinI = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], dfGoodPoint.loc[i, Field_WindSpeed], side='right') - 1
  118. if 0 <= nWhichBinI < dataFrameGuaranteePowerCurve.shape[0] - 1:
  119. IdealPower = np.interp(dfGoodPoint.loc[i, Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower])
  120. EPLostPerformTotal += (IdealPower - dfGoodPoint.loc[i, Field_ActiverPower]) / 6
  121. # 计算限电损失电量
  122. EPLostLimitTotal = 0
  123. for i in range(df.shape[0]):
  124. if df.loc[i, Field_LableFlag] == 4:
  125. nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i, Field_WindSpeed], side='right') - 1
  126. if 0 <= nWhichBin < dataFrameGuaranteePowerCurve.shape[0] - 1:
  127. IdealPower = np.interp(df.loc[i, Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower])
  128. EPLostLimitTotal += abs(IdealPower - df.loc[i, Field_ActiverPower]) / 6
  129. # 计算功率曲线一致性系数
  130. Ws=self.get_Ws(dfGoodPoint,dataFrameGuaranteePowerCurve)
  131. if EPLostPerformTotal >= 0:
  132. EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal + EPLostPerformTotal
  133. else:
  134. EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal
  135. EPLostStopPercent = EPLostStopTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0
  136. EPLostBadPercent = EPLostBadTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0
  137. EPLostPerformPercent = EPLostPerformTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0
  138. EPLostLimitPercent = EPLostLimitTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0
  139. wind_speed_ranges, mean_width, variance_width=self.power_bin_width(df)
  140. # print(wind_speed_ranges)
  141. result= {
  142. 'EPActualTotal':round(EPActualTotal, 2),#实发电量
  143. 'TurbinePowerRate':round(TurbinePowerRate, 2),#风机能量利用率
  144. 'EPLostStopPercent': round(EPLostStopPercent, 2),#停机损失百分比
  145. 'EPLostBadPercent': round(EPLostBadPercent, 2),#欠发损失百分比
  146. 'EPLostPerformPercent': round(EPLostPerformPercent, 2),#功率曲线未达标损失百分比
  147. 'EPLostLimitPercent': round(EPLostLimitPercent, 2),#限电损失百分比
  148. 'TurbineRunRate':round(TurbineRunRate, 2),#风机可利用率
  149. 'mean_width':round(mean_width, 2),#功率水平平均宽度
  150. 'variance_width':round(variance_width, 2),#功率水平方差
  151. 'WindSpeedAvr':round(WindSpeedAvr, 2),#平均风速
  152. 'Thi':round(Thi, 2),#利用小时
  153. 'Ws':round(Ws, 2),#功率曲线一致性系数
  154. 'Qdl':Qdl,#限电时段理论发电量
  155. 'RatedPowerSUM':RatedPowerSUM#场站总额定容量
  156. }
  157. return result
  158. def power_bin_width(self, df):
  159. '''
  160. 计算各功率水平的风速区间
  161. '''
  162. # 1. 筛选出Field_LabelFlag等于0的行
  163. filtered_df = df[df[Field_LableFlag] == 0]
  164. # 2. 确定功率区间的边界
  165. min_power = filtered_df[Field_ActiverPower].min()
  166. max_power = filtered_df[Field_ActiverPower].max()
  167. # 确保最大值所在的仓也被包括在内
  168. bins = np.arange(min_power - min_power % 25, max_power + 25, 25)
  169. # 3. 将Field_ActivePower分到不同的仓中
  170. filtered_df['Power_Bin'] = pd.cut(filtered_df[Field_ActiverPower], bins=bins, right=False)
  171. # 4. 按仓分组,计算每个组内风速的范围
  172. grouped = filtered_df.groupby('Power_Bin')
  173. wind_speed_ranges = grouped[Field_WindSpeed].agg(lambda x: x.max() - x.min())
  174. # 5. 计算平均宽度和方差
  175. mean_width = wind_speed_ranges.mean()
  176. variance_width = wind_speed_ranges.var()
  177. # 返回结果
  178. return wind_speed_ranges, mean_width, variance_width
  179. def Production_indicators(self, result_df):
  180. '''
  181. 风场生产指标
  182. '''
  183. # 风场发电量
  184. Qp=sum(result_df['EPActualTotal'])
  185. print("Qp:",Qp)
  186. # 计划发电量完成率(Qj为计划发电量)
  187. # Rj=Qp/Qj
  188. # 风场等效利用小时
  189. Thc=Qp/result_df['RatedPowerSUM'].iloc[0]
  190. # 风场弃风电量
  191. Qdr=sum(result_df['Qdl'])-Qp
  192. # 电网弃风率
  193. if Qp+Qdr==0:
  194. Rdr=None
  195. else:
  196. Rdr=Qdr/(Qp+Qdr)
  197. return Qp,Thc,Qdr,Rdr
  198. def get_Ws(self,df,dataFrameGuaranteePowerCurve):
  199. '''
  200. 计算功率特性一致性系数
  201. '''
  202. # 确定新的风速范围
  203. # 下限:切入风速减1m/s
  204. cut_in_ws = self.turbineModelInfo[Field_CutInWS].iloc[0]
  205. lower_limit = cut_in_ws - 1.0
  206. # 找到对应85%额定功率的风速
  207. df_ideal_sorted = dataFrameGuaranteePowerCurve.sort_values(by=Field_WindSpeed)
  208. power_to_ws_interp = interp1d(df_ideal_sorted[Field_ActiverPower], df_ideal_sorted[Field_WindSpeed], kind='linear', fill_value='extrapolate')
  209. power_85 = 0.85 * self.turbineInfo[Field_RatedPower].iloc[0]
  210. ws_85 = power_to_ws_interp(power_85)
  211. # 上限:对应85%额定功率风速的1.5倍
  212. upper_limit = ws_85 * 1.5
  213. # 生成取样点的中心,以0.5m/s为步长,在[lower_limit,upper_limit]之间
  214. start_center = np.ceil((lower_limit + 0.25) / 0.5) * 0.5
  215. sample_centers = np.arange(start_center, upper_limit + 0.25, 0.5)
  216. # 计算每个区间内的实际功率平均值
  217. actual_power_means = []
  218. for center in sample_centers:
  219. lower = center - 0.25
  220. upper = center + 0.25
  221. df_subset = df[(df[Field_WindSpeed] >= lower) & (df[Field_WindSpeed] <= upper)]
  222. if not df_subset.empty:
  223. actual_power_mean = df_subset[Field_ActiverPower].mean()
  224. actual_power_means.append(actual_power_mean)
  225. else:
  226. actual_power_means.append(np.nan)
  227. # 获取对应中心风速的理论功率
  228. ideal_power_interp = interp1d(df_ideal_sorted[Field_WindSpeed], df_ideal_sorted[Field_ActiverPower], kind='linear', fill_value='extrapolate')
  229. ideal_powers = ideal_power_interp(sample_centers)
  230. # 计算百分比差异,并求均值
  231. actual_power_means = np.array(actual_power_means)
  232. ideal_powers = ideal_powers[:len(actual_power_means)]
  233. valid_indices = (ideal_powers != 0) & (~np.isnan(actual_power_means)) & (~np.isnan(ideal_powers))
  234. percentage_diff = ((actual_power_means[valid_indices] - ideal_powers[valid_indices]) / ideal_powers[valid_indices]) * 100
  235. Ws = np.mean(percentage_diff)
  236. return Ws
  237. def get_result(self, dataFrameMerge: pd.DataFrame, outputAnalysisDir, conf: Contract, turbineModelInfo: pd.Series,dataFrameOfContractPowerCurve:pd.DataFrame):
  238. # 按设备名分组数据
  239. grouped = dataFrameMerge.groupby(
  240. [Field_NameOfTurbine, Field_CodeOfTurbine])
  241. results = []
  242. result_rows=[]
  243. # 计算每个设备的功率曲线
  244. for name, group in grouped:
  245. # 创建结果字典,首先添加风机名称wind_turbine_name
  246. result = {'wind_turbine_name':name[0]}
  247. # 更新结果字典,添加计算的指标
  248. result.update(self.calculate_metrics(group, dataFrameOfContractPowerCurve))
  249. results.append(result)
  250. # 将results转换成DataFrame
  251. results = pd.DataFrame(results)
  252. Ws_mean=results['Ws'].mean()
  253. results['Wr']=results['Ws']/Ws_mean
  254. #保存为csv文件
  255. a=f"{turbineModelInfo[Field_MachineTypeCode]}-production-indicator.csv"
  256. filepath=self.escape_special_characters(a)
  257. filePathOfproductionindicator = os.path.join(
  258. # outputAnalysisDir, f"production_indicator{turbineModelInfo[Field_MachineTypeCode]}{CSVSuffix}")
  259. outputAnalysisDir, filepath)
  260. results.to_csv(filePathOfproductionindicator, index=False)
  261. result_rows.append({
  262. Field_Return_TypeAnalyst: self.typeAnalyst(),
  263. Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  264. Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  265. Field_CodeOfTurbine: "total",
  266. Field_Return_FilePath: filePathOfproductionindicator,
  267. Field_Return_IsSaveDatabase: True
  268. })
  269. result_df = pd.DataFrame(result_rows)
  270. return results,result_df
  271. def get_total_result(self,dataFrameresults: pd.DataFrame, outputAnalysisDir, conf: Contract):
  272. # 创建一个空的 DataFrame
  273. dataFrameResult_total = pd.DataFrame()
  274. # 计算场站总体指标Qp, Thc, Rdr
  275. Qp, Thc, Qdr,Rdr = self.Production_indicators(dataFrameresults)
  276. Qp=round(Qp,2)
  277. Thc=round(Thc,2)
  278. Qdr=round(Qdr,2)
  279. Rdr=round(Rdr,2)
  280. # 将Qp, Thc, Rdr添加到results_df中
  281. dataFrameResult_total['Qp'] = [Qp]#风场总发电量
  282. dataFrameResult_total['Thc'] = [Thc]#风场等效利用小时
  283. dataFrameResult_total['Rdr'] = [Rdr]#风场弃风率
  284. dataFrameResult_total['Qdr'] = [Qdr]#风场弃风电量
  285. #保存为csv文件
  286. # print("dataFrameResult_total:",dataFrameResult_total)
  287. filePathOfproductionindicator_total = os.path.join(
  288. outputAnalysisDir, f"production_indicator_total.csv")
  289. dataFrameResult_total.to_csv(filePathOfproductionindicator_total, index=False)
  290. result_rows=[]
  291. result_rows.append({
  292. Field_Return_TypeAnalyst: self.typeAnalyst(),
  293. Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  294. Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  295. Field_CodeOfTurbine: "total",
  296. Field_Return_FilePath: filePathOfproductionindicator_total,
  297. Field_Return_IsSaveDatabase: True
  298. })
  299. result_df = pd.DataFrame(result_rows)
  300. return result_df