|
|
@@ -44,6 +44,22 @@ class PowerCurveAnalyst(AnalystWithGoodPoint):
|
|
|
currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
|
|
|
currTurbineCodes)]
|
|
|
|
|
|
+ # 【新增筛选逻辑】 只在画图计算前清洗,剔除高风速下的低功率(停机)数据
|
|
|
+
|
|
|
+ # +1. 获取额定风速。
|
|
|
+ # 如果 confBusiness 中定义了 Field_RatedWindSpeed 且机型信息里有该字段,则使用;否则给一个默认值(例如 11m/s 或 12m/s),防止报错。
|
|
|
+ rated_ws = 11 # 默认值,防报错
|
|
|
+ if 'Field_RatedWindSpeed' in globals() and Field_RatedWindSpeed in currTurbineModeInfo:
|
|
|
+ rated_ws = currTurbineModeInfo[Field_RatedWindSpeed]
|
|
|
+
|
|
|
+ # +2. 执行过滤:保留 (风速 <= 额定风速) 或者 (风速 > 额定风速 且 功率 >= 20) 的数据
|
|
|
+ # 先做一个 .copy() 防止 SettingWithCopyWarning
|
|
|
+ currDataFrameOfTurbines = currDataFrameOfTurbines.copy()
|
|
|
+
|
|
|
+ mask_bad_data = (currDataFrameOfTurbines[Field_WindSpeed] > rated_ws) & (currDataFrameOfTurbines[Field_ActiverPower] < 20)
|
|
|
+ currDataFrameOfTurbines = currDataFrameOfTurbines[~mask_bad_data]
|
|
|
+
|
|
|
+
|
|
|
powerCurveDataOfTurbines = self.dataReprocess(
|
|
|
currDataFrameOfTurbines, self.binsWindSpeed)
|
|
|
|
|
|
@@ -128,21 +144,64 @@ class PowerCurveAnalyst(AnalystWithGoodPoint):
|
|
|
|
|
|
return result
|
|
|
|
|
|
- def buildPowerCurveData(self, group: pd.DataFrame, fieldWindSpeed: str, fieldActivePower: str, bins) -> pd.DataFrame:
|
|
|
+ # def buildPowerCurveData(self, group: pd.DataFrame, fieldWindSpeed: str, fieldActivePower: str, bins) -> pd.DataFrame:
|
|
|
+ # """
|
|
|
+ # 计算设备的功率曲线。
|
|
|
+ # """
|
|
|
+ # powerCut = group.groupby(pd.cut(group[fieldWindSpeed], bins, labels=np.arange(0, 25.5, 0.5))).agg({
|
|
|
+ # fieldActivePower: 'median',
|
|
|
+ # fieldWindSpeed: ['median', 'count']
|
|
|
+ # })
|
|
|
+ # wind_count = powerCut[fieldWindSpeed]['count'].tolist()
|
|
|
+ # line = powerCut[fieldActivePower]['median'].round(decimals=2).tolist()
|
|
|
+ # act_line = pd.DataFrame([powerCut.index, wind_count, line]).T
|
|
|
+ # act_line.columns = [Field_WindSpeed,
|
|
|
+ # 'EffectiveQuantity', Field_ActiverPower]
|
|
|
+ # return act_line
|
|
|
+
|
|
|
+def buildPowerCurveData(self, group: pd.DataFrame, fieldWindSpeed: str, fieldActivePower: str, bins) -> pd.DataFrame:
|
|
|
"""
|
|
|
计算设备的功率曲线。
|
|
|
"""
|
|
|
+ # 1. 按照固定步长进行分箱统计
|
|
|
+ # 注意:这里使用的是固定的 bins (0, 0.5, 1.0 ... 25.0),即使某区间没数据,也会生成一行索引,只是值为 NaN
|
|
|
powerCut = group.groupby(pd.cut(group[fieldWindSpeed], bins, labels=np.arange(0, 25.5, 0.5))).agg({
|
|
|
fieldActivePower: 'median',
|
|
|
fieldWindSpeed: ['median', 'count']
|
|
|
})
|
|
|
+
|
|
|
+ # 2. 提取数据
|
|
|
wind_count = powerCut[fieldWindSpeed]['count'].tolist()
|
|
|
- line = powerCut[fieldActivePower]['median'].round(decimals=2).tolist()
|
|
|
+
|
|
|
+ # 获取原始的中位数序列(包含 NaN)
|
|
|
+ power_series = powerCut[fieldActivePower]['median']
|
|
|
+
|
|
|
+ # 3. 处理不连续(NaN)的情况
|
|
|
+
|
|
|
+ # 步骤 A: 线性插值 (Interpolate)
|
|
|
+ # 解决中间的断档。例如:[1000, NaN, 1200] -> [1000, 1100, 1200]
|
|
|
+ # limit_direction='forward' 表示只向后插值,防止低风速段无数据时胡乱填充
|
|
|
+ power_series = power_series.interpolate(method='linear', limit_direction='forward')
|
|
|
+
|
|
|
+ # 步骤 B: 前向填充 (Forward Fill)
|
|
|
+ # 解决高风速段的断档。
|
|
|
+ # 场景:筛选后,20m/s 以上全是 NaN。
|
|
|
+ # 逻辑:既然是高风速,且之前已经达到了额定功率,那么后面缺失的值应该维持在最后一次观测到的功率(即额定功率)。
|
|
|
+ power_series = power_series.ffill()
|
|
|
+
|
|
|
+ # 步骤 C: (可选) 0值填充
|
|
|
+ # 如果低风速段(开头)是 NaN,通常是因为没风,补 0
|
|
|
+ power_series = power_series.fillna(0)
|
|
|
+
|
|
|
+ line = power_series.round(decimals=2).tolist()
|
|
|
+
|
|
|
+ # 4. 组装结果
|
|
|
act_line = pd.DataFrame([powerCut.index, wind_count, line]).T
|
|
|
act_line.columns = [Field_WindSpeed,
|
|
|
'EffectiveQuantity', Field_ActiverPower]
|
|
|
return act_line
|
|
|
|
|
|
+
|
|
|
def dataReprocess(self, dataFrameMerge: pd.DataFrame, binsWindSpeed) -> pd.DataFrame:
|
|
|
# 初始化结果DataFrame
|
|
|
dataFrames = []
|