Browse Source

中广核消缺1

Xmia 6 ngày trước cách đây
mục cha
commit
7a5fd22a14

+ 8 - 3
dataAnalysisBehavior/behavior/baseAnalyst.py

@@ -645,10 +645,15 @@ class BaseAnalyst(ABC):
                 self.logger.info(
                     f"typeAnalyst: {self.typeAnalyst()} data frame concat dataBatchNum: {dataBatchNum}  timeGranularity: {timeGranularity} finish")
                 if dataFrameOfTurbines.empty:
-                    excption = CustomError(102)
+                    # excption = CustomError(102)
+                    # self.logger.warning(
+                    #     f"{excption.message} typeAnalyst: {self.typeAnalyst()}  Power Farm: {conf.dataContract.dataFilter.powerFarmID} Batch : {conf.dataContract.dataFilter.dataBatchNum} Time Granularity : {timeGranularity}")
+                    # raise excption
+
+                    # 改为仅记录警告,不抛出异常
                     self.logger.warning(
-                        f"{excption.message} typeAnalyst: {self.typeAnalyst()}  Power Farm: {conf.dataContract.dataFilter.powerFarmID} Batch : {conf.dataContract.dataFilter.dataBatchNum} Time Granularity : {timeGranularity}")
-                    raise excption
+                        f"Data is empty after processing. typeAnalyst: {self.typeAnalyst()}  Power Farm: {conf.dataContract.dataFilter.powerFarmID} Batch : {conf.dataContract.dataFilter.dataBatchNum} Time Granularity : {timeGranularity}")
+                    dictionary[timeGranularity] = pd.DataFrame()
                 else:
                     self.logger.info(
                         f"typeAnalyst: {self.typeAnalyst()} data frame concat dataBatchNum: {dataBatchNum}  timeGranularity: {timeGranularity} dataFrameOfTurbines : {dataFrameOfTurbines}")

+ 61 - 2
dataAnalysisBusiness/algorithm/powerCurveAnalyst.py

@@ -44,6 +44,22 @@ class PowerCurveAnalyst(AnalystWithGoodPoint):
             currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
                 currTurbineCodes)]
 
+            # 【新增筛选逻辑】 只在画图计算前清洗,剔除高风速下的低功率(停机)数据
+            
+            # +1. 获取额定风速。
+            # 如果 confBusiness 中定义了 Field_RatedWindSpeed 且机型信息里有该字段,则使用;否则给一个默认值(例如 11m/s 或 12m/s),防止报错。
+            rated_ws = 11 # 默认值,防报错
+            if 'Field_RatedWindSpeed' in globals() and Field_RatedWindSpeed in currTurbineModeInfo:
+                 rated_ws = currTurbineModeInfo[Field_RatedWindSpeed]
+
+            # +2. 执行过滤:保留 (风速 <= 额定风速) 或者 (风速 > 额定风速 且 功率 >= 20) 的数据
+            # 先做一个 .copy() 防止 SettingWithCopyWarning
+            currDataFrameOfTurbines = currDataFrameOfTurbines.copy()
+            
+            mask_bad_data = (currDataFrameOfTurbines[Field_WindSpeed] > rated_ws) & (currDataFrameOfTurbines[Field_ActiverPower] < 20)
+            currDataFrameOfTurbines = currDataFrameOfTurbines[~mask_bad_data]
+            
+            
             powerCurveDataOfTurbines = self.dataReprocess(
                 currDataFrameOfTurbines, self.binsWindSpeed)
 
@@ -128,21 +144,64 @@ class PowerCurveAnalyst(AnalystWithGoodPoint):
 
         return result
 
-    def buildPowerCurveData(self, group: pd.DataFrame, fieldWindSpeed: str, fieldActivePower: str, bins) -> pd.DataFrame:
+    # def buildPowerCurveData(self, group: pd.DataFrame, fieldWindSpeed: str, fieldActivePower: str, bins) -> pd.DataFrame:
+    #     """
+    #     计算设备的功率曲线。
+    #     """
+    #     powerCut = group.groupby(pd.cut(group[fieldWindSpeed], bins, labels=np.arange(0, 25.5, 0.5))).agg({
+    #         fieldActivePower: 'median',
+    #         fieldWindSpeed: ['median', 'count']
+    #     })
+    #     wind_count = powerCut[fieldWindSpeed]['count'].tolist()
+    #     line = powerCut[fieldActivePower]['median'].round(decimals=2).tolist()
+    #     act_line = pd.DataFrame([powerCut.index, wind_count, line]).T
+    #     act_line.columns = [Field_WindSpeed,
+    #                         'EffectiveQuantity', Field_ActiverPower]
+    #     return act_line
+
+def buildPowerCurveData(self, group: pd.DataFrame, fieldWindSpeed: str, fieldActivePower: str, bins) -> pd.DataFrame:
         """
         计算设备的功率曲线。
         """
+        # 1. 按照固定步长进行分箱统计
+        # 注意:这里使用的是固定的 bins (0, 0.5, 1.0 ... 25.0),即使某区间没数据,也会生成一行索引,只是值为 NaN
         powerCut = group.groupby(pd.cut(group[fieldWindSpeed], bins, labels=np.arange(0, 25.5, 0.5))).agg({
             fieldActivePower: 'median',
             fieldWindSpeed: ['median', 'count']
         })
+
+        # 2. 提取数据
         wind_count = powerCut[fieldWindSpeed]['count'].tolist()
-        line = powerCut[fieldActivePower]['median'].round(decimals=2).tolist()
+        
+        # 获取原始的中位数序列(包含 NaN)
+        power_series = powerCut[fieldActivePower]['median']
+
+        # 3. 处理不连续(NaN)的情况
+        
+        # 步骤 A: 线性插值 (Interpolate)
+        # 解决中间的断档。例如:[1000, NaN, 1200] -> [1000, 1100, 1200]
+        # limit_direction='forward' 表示只向后插值,防止低风速段无数据时胡乱填充
+        power_series = power_series.interpolate(method='linear', limit_direction='forward')
+
+        # 步骤 B: 前向填充 (Forward Fill)
+        # 解决高风速段的断档。
+        # 场景:筛选后,20m/s 以上全是 NaN。
+        # 逻辑:既然是高风速,且之前已经达到了额定功率,那么后面缺失的值应该维持在最后一次观测到的功率(即额定功率)。
+        power_series = power_series.ffill()
+
+        # 步骤 C: (可选) 0值填充
+        # 如果低风速段(开头)是 NaN,通常是因为没风,补 0
+        power_series = power_series.fillna(0)
+
+        line = power_series.round(decimals=2).tolist()
+
+        # 4. 组装结果
         act_line = pd.DataFrame([powerCut.index, wind_count, line]).T
         act_line.columns = [Field_WindSpeed,
                             'EffectiveQuantity', Field_ActiverPower]
         return act_line
 
+
     def dataReprocess(self, dataFrameMerge: pd.DataFrame, binsWindSpeed) -> pd.DataFrame:
         # 初始化结果DataFrame
         dataFrames = []

+ 13 - 0
dataAnalysisBusiness/algorithm/temperatureEnvironmentAnalyst.py

@@ -88,6 +88,19 @@ class TemperatureEnvironmentAnalyst(AnalystWithGoodBadLimitPoint):
 
     # def draw(self, dataFrame: pd.DataFrame, outputAnalysisDir, conf: Contract, charset=charset_unify):
     def draw(self, dataFrame: pd.DataFrame, outputAnalysisDir, conf: Contract, turbineModelInfo: pd.Series):
+        # +1. 去除经纬度或温度为空的数据
+        dataFrame = dataFrame.dropna(subset=[Field_NameOfTurbine, Field_Longitude, Field_Latitude, Field_EnvTemp])
+
+        # +2. 筛选合法的经纬度范围 (纬度 [-90, 90], 经度 [-180, 180]); geopy 对纬度非常敏感,超出范围会直接报错
+        valid_lat = (dataFrame[Field_Latitude] >= -90) & (dataFrame[Field_Latitude] <= 90)
+        valid_lon = (dataFrame[Field_Longitude] >= -180) & (dataFrame[Field_Longitude] <= 180)
+        dataFrame = dataFrame[valid_lat & valid_lon]
+
+        # +3. 如果筛选后没有数据,打印日志并返回空 DataFrame,避免后续报错
+        if dataFrame.empty:
+            print(f"Warning: {self.typeAnalyst()} filtered no data (invalid coordinates or missing values).")
+            return pd.DataFrame()
+
         # 处理数据
         dataFrame['new'] = dataFrame.loc[:, [Field_NameOfTurbine,
                                              Field_Longitude, Field_Latitude, Field_EnvTemp]].apply(tuple, axis=1)

+ 38 - 5
dataAnalysisBusiness/algorithm/temperatureLargeComponentsAnalyst.py

@@ -127,7 +127,7 @@ class TemperatureLargeComponentsAnalyst(AnalystWithGoodBadLimitPoint):
         """
         大部件温度传感器分析
         """
-        y_name = '温(℃)'
+        y_name = '温(℃)'
 
         outputDir = os.path.join(outputAnalysisDir, "GeneratorTemperature")
         dir.create_directory(outputDir)
@@ -139,7 +139,15 @@ class TemperatureLargeComponentsAnalyst(AnalystWithGoodBadLimitPoint):
         for column in temperatureCols:
             if not column in dataFrameMerge.columns:
                 continue
+            
+            # 判断是否是机舱温度本身
+            is_nac_temp = (column == Field_NacTemp)
+            
             columnZH = TemperatureColumns.get(column)
+
+            if not is_nac_temp:
+                columnZH = f"{columnZH}温差"
+
             outputPath = os.path.join(outputAnalysisDir, column)
             dir.create_directory(outputPath)
 
@@ -155,6 +163,16 @@ class TemperatureLargeComponentsAnalyst(AnalystWithGoodBadLimitPoint):
             for idx, (name, group) in enumerate(grouped):
                 currTurbineInfo_group = self.common.getTurbineInfo(
                     conf.dataContract.dataFilter.powerFarmID, name, self.turbineInfo)
+                
+                # --- 数据计算逻辑 ---
+                x_data = group[Field_PowerFloor]
+                y_data = group[column]
+                
+                # 如果不是机舱温度本身,且存在机舱温度数据,则计算温差(部件温度 - 机舱温度)
+                if not is_nac_temp and Field_NacTemp in group.columns:
+                    y_data = y_data - group[Field_NacTemp]
+                # ------------------
+                
                 fig.add_trace(go.Scatter(
                     x=group[Field_PowerFloor],
                     y=group[column],
@@ -277,9 +295,17 @@ class TemperatureLargeComponentsAnalyst(AnalystWithGoodBadLimitPoint):
                     if other_name != name:
                         tempTurbineInfo = self.common.getTurbineInfo(
                             conf.dataContract.dataFilter.powerFarmID, other_name, self.turbineInfo)
+                        
+                        # --- 数据计算 ---
+                        ox_data = other_group[Field_PowerFloor]
+                        oy_data = other_group[column]
+                        if not is_nac_temp and Field_NacTemp in other_group.columns:
+                            oy_data = oy_data - other_group[Field_NacTemp]
+                        # ---------------
+                        
                         single_fig.add_trace(go.Scatter(
-                            x=other_group[Field_PowerFloor],
-                            y=other_group[column],
+                            x=ox_data,
+                            y=oy_data,
                             mode='lines',
                             name=tempTurbineInfo[Field_NameOfTurbine],
                             line=dict(color='lightgrey', width=1),
@@ -295,9 +321,16 @@ class TemperatureLargeComponentsAnalyst(AnalystWithGoodBadLimitPoint):
                         turbine_data_list_each.append(turbine_data_other_each)
 
                 # Add the turbine of interest in dark blue
+                # --- 数据计算 ---
+                gx_data = group[Field_PowerFloor]
+                gy_data = group[column]
+                if not is_nac_temp and Field_NacTemp in group.columns:
+                    gy_data = gy_data - group[Field_NacTemp]
+                # ---------------
+
                 single_fig.add_trace(go.Scatter(
-                    x=group[Field_PowerFloor],
-                    y=group[column],
+                    x=gx_data,
+                    y=gy_data,
                     mode='lines',
                     # Make it slightly thicker for visibility
                     line=dict(color='darkblue', width=2),

+ 11 - 0
dataAnalysisBusiness/algorithm/yawErrorDensityAnalyst.py

@@ -73,6 +73,12 @@ class YawErrorDensityAnalyst(AnalystWithGoodBadLimitPoint):
 
             df = self.calculateYawError(group)
             df.dropna(inplace=True)
+            
+            # 如果返回的是空的,跳过
+            if df.empty:
+                print(f"Warning: Turbine {name[0]} has no valid data after screening, skip the analysis.")
+                continue
+            
             counts = df['density'].value_counts()
             count_0 = counts.get(0, 0)  # 获取 density 为 0 的数量,如果没有 0 则返回 0
             count_1 = counts.get(1, 0)  # 获取 density 为 1 的数量,如果没有 1 则返回 0
@@ -238,6 +244,11 @@ class YawErrorDensityAnalyst(AnalystWithGoodBadLimitPoint):
             subset=[Field_NameOfTurbine, Field_YawError, Field_ActiverPower,Field_WindSpeed])
 
         filtered_dataFrame = dataFrame[(dataFrame[Field_YawError].abs() <= 30)&(dataFrame[Field_WindSpeed] >= 0)&(dataFrame[Field_WindSpeed] <= 25)]
+        
+        # 如果筛选结果为空,直接返回 None,不再进行后续计算,防止 binned_statistic_2d 报错
+        if filtered_dataFrame.empty:
+            return pd.DataFrame()
+        
         x=filtered_dataFrame[Field_YawError]
         y=filtered_dataFrame[Field_WindSpeed]
         # data = np.column_stack((x, y))  # 合并为两列数组