import os import numpy as np import pandas as pd from scipy.interpolate import interp1d import plotly.graph_objects as go from algorithmContract.confBusiness import * from algorithmContract.contract import Contract from behavior.analystNotFilter import AnalystNotFilter from utils.jsonUtil import JsonUtil class ProductionIndicatorAnalyst(AnalystNotFilter): """ 风电机组功率曲线散点分析。 10分钟数据进行分析 """ def typeAnalyst(self): return "production_indicator" def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes): dictionary = self.processTurbineData(turbineCodes, conf, [ Field_DeviceCode, Field_Time, Field_WindSpeed, Field_ActiverPower,Field_LableFlag]) dataFrameOfTurbines = self.userDataFrame( dictionary, conf.dataContract.configAnalysis, self) # 检查所需列是否存在 required_columns = {Field_WindSpeed, Field_ActiverPower,Field_LableFlag} if not required_columns.issubset(dataFrameOfTurbines.columns): raise ValueError(f"DataFrame缺少必要的列。需要的列有: {required_columns}") turbrineInfos = self.common.getTurbineInfos( conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo) groupedOfTurbineModel = turbrineInfos.groupby(Field_MillTypeCode) resultsoftotal=[] returnDatas = [] for turbineModelCode, group in groupedOfTurbineModel: currTurbineCodes = group[Field_CodeOfTurbine].unique().tolist() currTurbineModeInfo = self.common.getTurbineModelByCode( turbineModelCode, self.turbineModelInfo) # 获取合同功率曲线数据 dataFrameOfContractPowerCurve = self.dataFrameContractOfTurbine[ self.dataFrameContractOfTurbine[Field_MillTypeCode] == turbineModelCode] currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin( currTurbineCodes)] currDataFrameOfTurbines[Field_RatedPowerSUM] = self.currPowerFarmInfo[Field_RatedPowerSUM] results,result_df=self.get_result(currDataFrameOfTurbines,outputAnalysisDir,conf,currTurbineModeInfo,dataFrameOfContractPowerCurve) resultsoftotal.append(results) returnDatas.append(result_df) returnResult = pd.concat(returnDatas, ignore_index=True) # 计算总场站指标 Result_df = pd.concat(resultsoftotal, ignore_index=True) results_total=self.get_total_result(Result_df, outputAnalysisDir, conf) # 连接机组生产指标以及总场站生产指标 combined_df = pd.concat([results_total, returnResult], axis=0, ignore_index=True) return combined_df def calculate_metrics(self, df, dataFrameGuaranteePowerCurve): ''' 计算机组生产效能指标 ''' # 实发电量 EPActualTotal = (df.loc[df[Field_ActiverPower] >= 0, Field_ActiverPower] / 6).sum(skipna=True) # 场站总额定功率 RatedPowerSUM=df[Field_RatedPowerSUM].iloc[0] # 风电机组利用小时 Thi=EPActualTotal/self.turbineInfo[Field_RatedPower].iloc[0] # 弃风电量所需变量(Qdl) if self.turbineInfo[Field_Sightcing].iloc[0]==1:#是否为样板机 Qyb=EPActualTotal # 限电时段理论发电量 Qdl=(Qyb/self.turbineInfo[Field_RatedPower].iloc[0])*RatedPowerSUM else: Qdl=0 # 平均风速 WindSpeedAvr = df[Field_WindSpeed].mean() # 切入风速 CutInWS = self.turbineModelInfo[Field_CutInWS] cut_in_ws=CutInWS.iloc[0] # 风机可利用率 nShouldGP = (df[Field_WindSpeed] >= cut_in_ws).sum() nRealGP = ((df[Field_WindSpeed] >= cut_in_ws) & (df[Field_ActiverPower] > 0)).sum() TurbineRunRate = (nRealGP / nShouldGP) * 100 if nShouldGP > 0 else 0 df.dropna(subset=[Field_WindSpeed], inplace=True) df.reset_index(drop=True, inplace=True) # 计算应发电量 EPIdealTotalAAA = 0 for i in range(df.shape[0]): # if df.loc[i, Field_ActiverPower] >= 0: nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i,Field_WindSpeed], side='right') - 1 if 0 <= nWhichBin< dataFrameGuaranteePowerCurve.shape[0]-1: IdealPower = np.interp(df.loc[i,Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve.loc[:,Field_ActiverPower]) EPIdealTotalAAA += IdealPower / 6 # 风机能量利用率 TurbinePowerRate=EPActualTotal/EPIdealTotalAAA*100 # 计算停机损失 EPLostStopTotal = 0 for i in range(df.shape[0]): if df.loc[i, Field_LableFlag] == -1: nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i,Field_WindSpeed], side='right') - 1 if 0 <= nWhichBin < dataFrameGuaranteePowerCurve.shape[0] - 1: IdealPower = np.interp(df.loc[i,Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower]) EPLostStopTotal += IdealPower / 6 # 计算欠发损失 EPLostBadTotal = 0 for i in range(df.shape[0]): if df.loc[i, Field_LableFlag] == 1: nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i, Field_WindSpeed], side='right') - 1 if 0 <= nWhichBin < dataFrameGuaranteePowerCurve.shape[0] - 1: IdealPower = np.interp(df.loc[i, Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower]) EPLostBadTotal += abs(IdealPower - df.loc[i, Field_ActiverPower]) / 6 # 额定风速以上超发功率点 EPOverTotal = 0 for i in range(df.shape[0]): if df.loc[i, Field_LableFlag] == 3: EPOverTotal += (df.loc[i, Field_ActiverPower] - self.turbineInfo[Field_RatedPower].iloc[0]) / 6 # 构造好点数据集 dfGoodPoint = df[df[Field_LableFlag] == 0].reset_index(drop=True) # 计算功率曲线未达标损失电量 EPLostPerformTotal = 0 for i in range(dfGoodPoint.shape[0]): nWhichBinI = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], dfGoodPoint.loc[i, Field_WindSpeed], side='right') - 1 if 0 <= nWhichBinI < dataFrameGuaranteePowerCurve.shape[0] - 1: IdealPower = np.interp(dfGoodPoint.loc[i, Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower]) EPLostPerformTotal += (IdealPower - dfGoodPoint.loc[i, Field_ActiverPower]) / 6 # 计算限电损失电量 EPLostLimitTotal = 0 for i in range(df.shape[0]): if df.loc[i, Field_LableFlag] == 4: nWhichBin = np.searchsorted(dataFrameGuaranteePowerCurve[Field_WindSpeed], df.loc[i, Field_WindSpeed], side='right') - 1 if 0 <= nWhichBin < dataFrameGuaranteePowerCurve.shape[0] - 1: IdealPower = np.interp(df.loc[i, Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_WindSpeed], dataFrameGuaranteePowerCurve[Field_ActiverPower]) EPLostLimitTotal += abs(IdealPower - df.loc[i, Field_ActiverPower]) / 6 # 计算功率曲线一致性系数 Ws=self.get_Ws(dfGoodPoint,dataFrameGuaranteePowerCurve) if EPLostPerformTotal >= 0: EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal + EPLostPerformTotal else: EPIdealTotal = EPActualTotal + EPLostStopTotal + EPLostLimitTotal + EPLostBadTotal EPLostStopPercent = EPLostStopTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0 EPLostBadPercent = EPLostBadTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0 EPLostPerformPercent = EPLostPerformTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0 EPLostLimitPercent = EPLostLimitTotal / EPIdealTotal*100 if EPIdealTotal != 0 else 0 wind_speed_ranges, mean_width, variance_width=self.power_bin_width(df) # print(wind_speed_ranges) result= { 'EPActualTotal':round(EPActualTotal, 2),#实发电量 'TurbinePowerRate':round(TurbinePowerRate, 2),#风机能量利用率 'EPLostStopPercent': round(EPLostStopPercent, 2),#停机损失百分比 'EPLostBadPercent': round(EPLostBadPercent, 2),#欠发损失百分比 'EPLostPerformPercent': round(EPLostPerformPercent, 2),#功率曲线未达标损失百分比 'EPLostLimitPercent': round(EPLostLimitPercent, 2),#限电损失百分比 'TurbineRunRate':round(TurbineRunRate, 2),#风机可利用率 'mean_width':round(mean_width, 2),#功率水平平均宽度 'variance_width':round(variance_width, 2),#功率水平方差 'WindSpeedAvr':round(WindSpeedAvr, 2),#平均风速 'Thi':round(Thi, 2),#利用小时 'Ws':round(Ws, 2),#功率曲线一致性系数 'Qdl':Qdl,#限电时段理论发电量 'RatedPowerSUM':RatedPowerSUM#场站总额定容量 } return result def power_bin_width(self, df): ''' 计算各功率水平的风速区间 ''' # 1. 筛选出Field_LabelFlag等于0的行 filtered_df = df[df[Field_LableFlag] == 0] # 2. 确定功率区间的边界 min_power = filtered_df[Field_ActiverPower].min() max_power = filtered_df[Field_ActiverPower].max() # 确保最大值所在的仓也被包括在内 bins = np.arange(min_power - min_power % 25, max_power + 25, 25) # 3. 将Field_ActivePower分到不同的仓中 filtered_df['Power_Bin'] = pd.cut(filtered_df[Field_ActiverPower], bins=bins, right=False) # 4. 按仓分组,计算每个组内风速的范围 grouped = filtered_df.groupby('Power_Bin') wind_speed_ranges = grouped[Field_WindSpeed].agg(lambda x: x.max() - x.min()) # 5. 计算平均宽度和方差 mean_width = wind_speed_ranges.mean() variance_width = wind_speed_ranges.var() # 返回结果 return wind_speed_ranges, mean_width, variance_width def Production_indicators(self, result_df): ''' 风场生产指标 ''' # 风场发电量 Qp=sum(result_df['EPActualTotal']) # 计划发电量完成率(Qj为计划发电量) # Rj=Qp/Qj # 风场等效利用小时 Thc=Qp/result_df['RatedPowerSUM'].iloc[0] # 风场弃风电量 Qdr=sum(result_df['Qdl'])-Qp # 电网弃风率 if Qp+Qdr==0: Rdr=0 else: Rdr=Qdr/(Qp+Qdr) return Qp,Thc,Qdr,Rdr def get_Ws(self,df,dataFrameGuaranteePowerCurve): ''' 计算功率特性一致性系数 ''' # 确定新的风速范围 # 下限:切入风速减1m/s cut_in_ws = self.turbineModelInfo[Field_CutInWS].iloc[0] lower_limit = cut_in_ws - 1.0 # 找到对应85%额定功率的风速 df_ideal_sorted = dataFrameGuaranteePowerCurve.sort_values(by=Field_WindSpeed) power_to_ws_interp = interp1d(df_ideal_sorted[Field_ActiverPower], df_ideal_sorted[Field_WindSpeed], kind='linear', fill_value='extrapolate') power_85 = 0.85 * self.turbineInfo[Field_RatedPower].iloc[0] ws_85 = power_to_ws_interp(power_85) # 上限:对应85%额定功率风速的1.5倍 upper_limit = ws_85 * 1.5 # 生成取样点的中心,以0.5m/s为步长,在[lower_limit,upper_limit]之间 start_center = np.ceil((lower_limit + 0.25) / 0.5) * 0.5 sample_centers = np.arange(start_center, upper_limit + 0.25, 0.5) # 计算每个区间内的实际功率平均值 actual_power_means = [] for center in sample_centers: lower = center - 0.25 upper = center + 0.25 df_subset = df[(df[Field_WindSpeed] >= lower) & (df[Field_WindSpeed] <= upper)] if not df_subset.empty: actual_power_mean = df_subset[Field_ActiverPower].mean() actual_power_means.append(actual_power_mean) else: actual_power_means.append(np.nan) # 获取对应中心风速的理论功率 ideal_power_interp = interp1d(df_ideal_sorted[Field_WindSpeed], df_ideal_sorted[Field_ActiverPower], kind='linear', fill_value='extrapolate') ideal_powers = ideal_power_interp(sample_centers) # 计算百分比差异,并求均值 actual_power_means = np.array(actual_power_means) ideal_powers = ideal_powers[:len(actual_power_means)] valid_indices = (ideal_powers != 0) & (~np.isnan(actual_power_means)) & (~np.isnan(ideal_powers)) percentage_diff = ((actual_power_means[valid_indices] - ideal_powers[valid_indices]) / ideal_powers[valid_indices]) * 100 Ws = np.mean(percentage_diff) return Ws def get_result(self, dataFrameMerge: pd.DataFrame, outputAnalysisDir, conf: Contract, turbineModelInfo: pd.Series,dataFrameOfContractPowerCurve:pd.DataFrame): # 按设备名分组数据 grouped = dataFrameMerge.groupby( [Field_NameOfTurbine, Field_CodeOfTurbine]) results = [] result_rows=[] # 计算每个设备的功率曲线 for name, group in grouped: # 创建结果字典,首先添加风机名称wind_turbine_name result = {'wind_turbine_name':name[0]} # 更新结果字典,添加计算的指标 result.update(self.calculate_metrics(group, dataFrameOfContractPowerCurve)) results.append(result) # 将results转换成DataFrame results = pd.DataFrame(results) Ws_mean=results['Ws'].mean() results['Wr']=results['Ws']/Ws_mean #保存为csv文件 a=f"{turbineModelInfo[Field_MachineTypeCode]}-production-indicator.csv" filepath=self.escape_special_characters(a) filePathOfproductionindicator = os.path.join( # outputAnalysisDir, f"production_indicator{turbineModelInfo[Field_MachineTypeCode]}{CSVSuffix}") outputAnalysisDir, filepath) results.to_csv(filePathOfproductionindicator, index=False) result_rows.append({ Field_Return_TypeAnalyst: self.typeAnalyst(), Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID, Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum, Field_CodeOfTurbine: "total", Field_MillTypeCode:turbineModelInfo[Field_MillTypeCode], Field_Return_FilePath: filePathOfproductionindicator, Field_Return_IsSaveDatabase: True }) result_df = pd.DataFrame(result_rows) return results,result_df def get_total_result(self,dataFrameresults: pd.DataFrame, outputAnalysisDir, conf: Contract): # 创建一个空的 DataFrame dataFrameResult_total = pd.DataFrame() # 计算场站总体指标Qp, Thc, Rdr Qp, Thc, Qdr,Rdr = self.Production_indicators(dataFrameresults) Qp=round(Qp,2) Thc=round(Thc,2) Rdr = Rdr if Rdr is not None else 0 print("Rdr:",Rdr) Qdr=round(Qdr,2) # 将Qp, Thc, Rdr添加到results_df中 dataFrameResult_total['Qp'] = [Qp]#风场总发电量 dataFrameResult_total['Thc'] = [Thc]#风场等效利用小时 dataFrameResult_total['Rdr'] = [Rdr]#风场弃风率 dataFrameResult_total['Qdr'] = [Qdr]#风场弃风电量 #保存为csv文件 # print("dataFrameResult_total:",dataFrameResult_total) filePathOfproductionindicator_total = os.path.join( outputAnalysisDir, f"production_indicator_total.csv") dataFrameResult_total.to_csv(filePathOfproductionindicator_total, index=False) result_rows=[] result_rows.append({ Field_Return_TypeAnalyst: self.typeAnalyst(), Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID, Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum, Field_CodeOfTurbine: "total", Field_MillTypeCode:"total_production_indicator", Field_Return_FilePath: filePathOfproductionindicator_total, Field_Return_IsSaveDatabase: True }) result_df = pd.DataFrame(result_rows) return result_df