productionIndicatorAnalyst.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. import os
  2. import numpy as np
  3. import pandas as pd
  4. from scipy.interpolate import interp1d
  5. import plotly.graph_objects as go
  6. from algorithmContract.confBusiness import *
  7. from algorithmContract.contract import Contract
  8. from behavior.analystNotFilter import AnalystNotFilter
  9. from utils.jsonUtil import JsonUtil
  10. class ProductionIndicatorAnalyst(AnalystNotFilter):
  11. """
  12. 风电机组功率曲线散点分析。
  13. 10分钟数据进行分析
  14. """
  15. def typeAnalyst(self):
  16. return "production_indicator"
  17. def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes):
  18. dictionary = self.processTurbineData(turbineCodes, conf, [
  19. Field_DeviceCode, Field_Time, Field_WindSpeed, Field_ActiverPower,Field_LableFlag])
  20. dataFrameOfTurbines = self.userDataFrame(
  21. dictionary, conf.dataContract.configAnalysis, self)
  22. # 检查所需列是否存在
  23. required_columns = {Field_WindSpeed, Field_ActiverPower,Field_LableFlag}
  24. if not required_columns.issubset(dataFrameOfTurbines.columns):
  25. raise ValueError(f"DataFrame缺少必要的列。需要的列有: {required_columns}")
  26. turbrineInfos = self.common.getTurbineInfos(
  27. conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
  28. groupedOfTurbineModel = turbrineInfos.groupby(Field_MillTypeCode)
  29. resultsoftotal=[]
  30. returnDatas = []
  31. for turbineModelCode, group in groupedOfTurbineModel:
  32. currTurbineCodes = group[Field_CodeOfTurbine].unique().tolist()
  33. currTurbineModeInfo = self.common.getTurbineModelByCode(
  34. turbineModelCode, self.turbineModelInfo)
  35. # 获取合同功率曲线数据
  36. dataFrameOfContractPowerCurve = self.dataFrameContractOfTurbine[
  37. self.dataFrameContractOfTurbine[Field_MillTypeCode] == turbineModelCode]
  38. currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
  39. currTurbineCodes)]
  40. currDataFrameOfTurbines[Field_RatedPowerSUM] = self.currPowerFarmInfo[Field_RatedPowerSUM]
  41. results,result_df=self.get_result(currDataFrameOfTurbines,outputAnalysisDir,conf,currTurbineModeInfo,dataFrameOfContractPowerCurve)
  42. resultsoftotal.append(results)
  43. returnDatas.append(result_df)
  44. returnResult = pd.concat(returnDatas, ignore_index=True)
  45. # 计算总场站指标
  46. Result_df = pd.concat(resultsoftotal, ignore_index=True)
  47. results_total=self.get_total_result(Result_df, outputAnalysisDir, conf)
  48. # 连接机组生产指标以及总场站生产指标
  49. combined_df = pd.concat([results_total, returnResult], axis=0, ignore_index=True)
  50. return combined_df
  51. def calculate_metrics(self, df, dataFrameGuaranteePowerCurve):
  52. '''
  53. 计算机组生产效能指标
  54. '''
  55. # 实发电量
  56. EPActualTotal = (df.loc[df[Field_ActiverPower] >= 0, Field_ActiverPower] / 6).sum(skipna=True)
  57. # 场站总额定功率
  58. RatedPowerSUM=df[Field_RatedPowerSUM].iloc[0]
  59. # 风电机组利用小时
  60. Thi=EPActualTotal/self.turbineInfo[Field_RatedPower].iloc[0]
  61. # 弃风电量所需变量(Qdl)
  62. if self.turbineInfo[Field_Sightcing].iloc[0]==1:#是否为样板机
  63. Qyb=EPActualTotal
  64. # 限电时段理论发电量
  65. Qdl=(Qyb/self.turbineInfo[Field_RatedPower].iloc[0])*RatedPowerSUM
  66. else:
  67. Qdl=0
  68. # 平均风速
  69. WindSpeedAvr = df[Field_WindSpeed].mean()
  70. # 切入风速
  71. CutInWS = self.turbineModelInfo[Field_CutInWS]
  72. cut_in_ws=CutInWS.iloc[0]
  73. # 风机可利用率
  74. nShouldGP = (df[Field_WindSpeed] >= cut_in_ws).sum()
  75. nRealGP = ((df[Field_WindSpeed] >= cut_in_ws) & (df[Field_ActiverPower] > 0)).sum()
  76. TurbineRunRate = (nRealGP / nShouldGP) * 100 if nShouldGP > 0 else 0
  77. df.dropna(subset=[Field_WindSpeed], inplace=True)
  78. df.reset_index(drop=True, inplace=True)
  79. # 计算应发电量
  80. EPIdealTotalAAA = 0
  81. for i in range(df.shape[0]):
  82. # if df.loc[i, Field_ActiverPower] >= 0:
  83. nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i,Field_WindSpeed], side='right') - 1
  84. if 0 <= nWhichBin< dataFrameGuaranteePowerCurve.shape[0]-1:
  85. IdealPower = np.interp(df.loc[i,Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve.loc[:,Field_ActiverPower])
  86. EPIdealTotalAAA += IdealPower / 6
  87. # 风机能量利用率
  88. TurbinePowerRate=EPActualTotal/EPIdealTotalAAA*100
  89. # 计算停机损失
  90. EPLostStopTotal = 0
  91. for i in range(df.shape[0]):
  92. if df.loc[i, Field_LableFlag] == -1:
  93. nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i,Field_WindSpeed], side='right') - 1
  94. if 0 <= nWhichBin < dataFrameGuaranteePowerCurve.shape[0] - 1:
  95. IdealPower = np.interp(df.loc[i,Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower])
  96. EPLostStopTotal += IdealPower / 6
  97. # 计算欠发损失
  98. EPLostBadTotal = 0
  99. for i in range(df.shape[0]):
  100. if df.loc[i, Field_LableFlag] == 1:
  101. nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i, Field_WindSpeed], side='right') - 1
  102. if 0 <= nWhichBin < dataFrameGuaranteePowerCurve.shape[0] - 1:
  103. IdealPower = np.interp(df.loc[i, Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower])
  104. EPLostBadTotal += abs(IdealPower - df.loc[i, Field_ActiverPower]) / 6
  105. # 额定风速以上超发功率点
  106. EPOverTotal = 0
  107. for i in range(df.shape[0]):
  108. if df.loc[i, Field_LableFlag] == 3:
  109. EPOverTotal += (df.loc[i, Field_ActiverPower] - self.turbineInfo[Field_RatedPower].iloc[0]) / 6
  110. # 构造好点数据集
  111. dfGoodPoint = df[df[Field_LableFlag] == 0].reset_index(drop=True)
  112. # 计算功率曲线未达标损失电量
  113. EPLostPerformTotal = 0
  114. for i in range(dfGoodPoint.shape[0]):
  115. nWhichBinI = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], dfGoodPoint.loc[i, Field_WindSpeed], side='right') - 1
  116. if 0 <= nWhichBinI < dataFrameGuaranteePowerCurve.shape[0] - 1:
  117. IdealPower = np.interp(dfGoodPoint.loc[i, Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower])
  118. EPLostPerformTotal += (IdealPower - dfGoodPoint.loc[i, Field_ActiverPower]) / 6
  119. # 计算限电损失电量
  120. EPLostLimitTotal = 0
  121. for i in range(df.shape[0]):
  122. if df.loc[i, Field_LableFlag] == 4:
  123. nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i, Field_WindSpeed], side='right') - 1
  124. if 0 <= nWhichBin < dataFrameGuaranteePowerCurve.shape[0] - 1:
  125. IdealPower = np.interp(df.loc[i, Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower])
  126. EPLostLimitTotal += abs(IdealPower - df.loc[i, Field_ActiverPower]) / 6
  127. # 计算功率曲线一致性系数
  128. Ws=self.get_Ws(dfGoodPoint,dataFrameGuaranteePowerCurve)
  129. if EPLostPerformTotal >= 0:
  130. EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal + EPLostPerformTotal
  131. else:
  132. EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal
  133. EPLostStopPercent = EPLostStopTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0
  134. EPLostBadPercent = EPLostBadTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0
  135. EPLostPerformPercent = EPLostPerformTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0
  136. EPLostLimitPercent = EPLostLimitTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0
  137. wind_speed_ranges, mean_width, variance_width=self.power_bin_width(df)
  138. # print(wind_speed_ranges)
  139. result= {
  140. 'EPActualTotal':round(EPActualTotal, 2),#实发电量
  141. 'TurbinePowerRate':round(TurbinePowerRate, 2),#风机能量利用率
  142. 'EPLostStopPercent': round(EPLostStopPercent, 2),#停机损失百分比
  143. 'EPLostBadPercent': round(EPLostBadPercent, 2),#欠发损失百分比
  144. 'EPLostPerformPercent': round(EPLostPerformPercent, 2),#功率曲线未达标损失百分比
  145. 'EPLostLimitPercent': round(EPLostLimitPercent, 2),#限电损失百分比
  146. 'TurbineRunRate':round(TurbineRunRate, 2),#风机可利用率
  147. 'mean_width':round(mean_width, 2),#功率水平平均宽度
  148. 'variance_width':round(variance_width, 2),#功率水平方差
  149. 'WindSpeedAvr':round(WindSpeedAvr, 2),#平均风速
  150. 'Thi':round(Thi, 2),#利用小时
  151. 'Ws':round(Ws, 2),#功率曲线一致性系数
  152. 'Qdl':Qdl,#限电时段理论发电量
  153. 'RatedPowerSUM':RatedPowerSUM#场站总额定容量
  154. }
  155. return result
  156. def power_bin_width(self, df):
  157. '''
  158. 计算各功率水平的风速区间
  159. '''
  160. # 1. 筛选出Field_LabelFlag等于0的行
  161. filtered_df = df[df[Field_LableFlag] == 0]
  162. # 2. 确定功率区间的边界
  163. min_power = filtered_df[Field_ActiverPower].min()
  164. max_power = filtered_df[Field_ActiverPower].max()
  165. # 确保最大值所在的仓也被包括在内
  166. bins = np.arange(min_power - min_power % 25, max_power + 25, 25)
  167. # 3. 将Field_ActivePower分到不同的仓中
  168. filtered_df['Power_Bin'] = pd.cut(filtered_df[Field_ActiverPower], bins=bins, right=False)
  169. # 4. 按仓分组,计算每个组内风速的范围
  170. grouped = filtered_df.groupby('Power_Bin')
  171. wind_speed_ranges = grouped[Field_WindSpeed].agg(lambda x: x.max() - x.min())
  172. # 5. 计算平均宽度和方差
  173. mean_width = wind_speed_ranges.mean()
  174. variance_width = wind_speed_ranges.var()
  175. # 返回结果
  176. return wind_speed_ranges, mean_width, variance_width
  177. def Production_indicators(self, result_df):
  178. '''
  179. 风场生产指标
  180. '''
  181. # 风场发电量
  182. Qp=sum(result_df['EPActualTotal'])
  183. # 计划发电量完成率(Qj为计划发电量)
  184. # Rj=Qp/Qj
  185. # 风场等效利用小时
  186. Thc=Qp/result_df['RatedPowerSUM'].iloc[0]
  187. # 风场弃风电量
  188. Qdr=sum(result_df['Qdl'])-Qp
  189. # 电网弃风率
  190. if Qp+Qdr==0:
  191. Rdr=0
  192. else:
  193. Rdr=Qdr/(Qp+Qdr)
  194. return Qp,Thc,Qdr,Rdr
  195. def get_Ws(self,df,dataFrameGuaranteePowerCurve):
  196. '''
  197. 计算功率特性一致性系数
  198. '''
  199. # 确定新的风速范围
  200. # 下限:切入风速减1m/s
  201. cut_in_ws = self.turbineModelInfo[Field_CutInWS].iloc[0]
  202. lower_limit = cut_in_ws - 1.0
  203. # 找到对应85%额定功率的风速
  204. df_ideal_sorted = dataFrameGuaranteePowerCurve.sort_values(by=Field_WindSpeed)
  205. power_to_ws_interp = interp1d(df_ideal_sorted[Field_ActiverPower], df_ideal_sorted[Field_WindSpeed], kind='linear', fill_value='extrapolate')
  206. power_85 = 0.85 * self.turbineInfo[Field_RatedPower].iloc[0]
  207. ws_85 = power_to_ws_interp(power_85)
  208. # 上限:对应85%额定功率风速的1.5倍
  209. upper_limit = ws_85 * 1.5
  210. # 生成取样点的中心,以0.5m/s为步长,在[lower_limit,upper_limit]之间
  211. start_center = np.ceil((lower_limit + 0.25) / 0.5) * 0.5
  212. sample_centers = np.arange(start_center, upper_limit + 0.25, 0.5)
  213. # 计算每个区间内的实际功率平均值
  214. actual_power_means = []
  215. for center in sample_centers:
  216. lower = center - 0.25
  217. upper = center + 0.25
  218. df_subset = df[(df[Field_WindSpeed] >= lower) & (df[Field_WindSpeed] <= upper)]
  219. if not df_subset.empty:
  220. actual_power_mean = df_subset[Field_ActiverPower].mean()
  221. actual_power_means.append(actual_power_mean)
  222. else:
  223. actual_power_means.append(np.nan)
  224. # 获取对应中心风速的理论功率
  225. ideal_power_interp = interp1d(df_ideal_sorted[Field_WindSpeed], df_ideal_sorted[Field_ActiverPower], kind='linear', fill_value='extrapolate')
  226. ideal_powers = ideal_power_interp(sample_centers)
  227. # 计算百分比差异,并求均值
  228. actual_power_means = np.array(actual_power_means)
  229. ideal_powers = ideal_powers[:len(actual_power_means)]
  230. valid_indices = (ideal_powers != 0) & (~np.isnan(actual_power_means)) & (~np.isnan(ideal_powers))
  231. percentage_diff = ((actual_power_means[valid_indices] - ideal_powers[valid_indices]) / ideal_powers[valid_indices]) * 100
  232. Ws = np.mean(percentage_diff)
  233. return Ws
  234. def get_result(self, dataFrameMerge: pd.DataFrame, outputAnalysisDir, conf: Contract, turbineModelInfo: pd.Series,dataFrameOfContractPowerCurve:pd.DataFrame):
  235. # 按设备名分组数据
  236. grouped = dataFrameMerge.groupby(
  237. [Field_NameOfTurbine, Field_CodeOfTurbine])
  238. results = []
  239. result_rows=[]
  240. # 计算每个设备的功率曲线
  241. for name, group in grouped:
  242. # 创建结果字典,首先添加风机名称wind_turbine_name
  243. result = {'wind_turbine_name':name[0]}
  244. # 更新结果字典,添加计算的指标
  245. result.update(self.calculate_metrics(group, dataFrameOfContractPowerCurve))
  246. results.append(result)
  247. # 将results转换成DataFrame
  248. results = pd.DataFrame(results)
  249. Ws_mean=results['Ws'].mean()
  250. results['Wr']=results['Ws']/Ws_mean
  251. #保存为csv文件
  252. a=f"{turbineModelInfo[Field_MachineTypeCode]}-production-indicator.csv"
  253. filepath=self.escape_special_characters(a)
  254. filePathOfproductionindicator = os.path.join(
  255. # outputAnalysisDir, f"production_indicator{turbineModelInfo[Field_MachineTypeCode]}{CSVSuffix}")
  256. outputAnalysisDir, filepath)
  257. results.to_csv(filePathOfproductionindicator, index=False)
  258. result_rows.append({
  259. Field_Return_TypeAnalyst: self.typeAnalyst(),
  260. Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  261. Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  262. Field_CodeOfTurbine: "total",
  263. Field_MillTypeCode:turbineModelInfo[Field_MillTypeCode],
  264. Field_Return_FilePath: filePathOfproductionindicator,
  265. Field_Return_IsSaveDatabase: True
  266. })
  267. result_df = pd.DataFrame(result_rows)
  268. return results,result_df
  269. def get_total_result(self,dataFrameresults: pd.DataFrame, outputAnalysisDir, conf: Contract):
  270. # 创建一个空的 DataFrame
  271. dataFrameResult_total = pd.DataFrame()
  272. # 计算场站总体指标Qp, Thc, Rdr
  273. Qp, Thc, Qdr,Rdr = self.Production_indicators(dataFrameresults)
  274. Qp=round(Qp,2)
  275. Thc=round(Thc,2)
  276. Rdr = Rdr if Rdr is not None else 0
  277. print("Rdr:",Rdr)
  278. Qdr=round(Qdr,2)
  279. # 将Qp, Thc, Rdr添加到results_df中
  280. dataFrameResult_total['Qp'] = [Qp]#风场总发电量
  281. dataFrameResult_total['Thc'] = [Thc]#风场等效利用小时
  282. dataFrameResult_total['Rdr'] = [Rdr]#风场弃风率
  283. dataFrameResult_total['Qdr'] = [Qdr]#风场弃风电量
  284. #保存为csv文件
  285. # print("dataFrameResult_total:",dataFrameResult_total)
  286. filePathOfproductionindicator_total = os.path.join(
  287. outputAnalysisDir, f"production_indicator_total.csv")
  288. dataFrameResult_total.to_csv(filePathOfproductionindicator_total, index=False)
  289. result_rows=[]
  290. result_rows.append({
  291. Field_Return_TypeAnalyst: self.typeAnalyst(),
  292. Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  293. Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  294. Field_CodeOfTurbine: "total",
  295. Field_MillTypeCode:"total_production_indicator",
  296. Field_Return_FilePath: filePathOfproductionindicator_total,
  297. Field_Return_IsSaveDatabase: True
  298. })
  299. result_df = pd.DataFrame(result_rows)
  300. return result_df