7 時間前 · 40a23e5630
--- a/api_health.py
+++ b/api_health.py
@@ -6,8 +6,10 @@ import logging
 
				 from database import DataFetcher
			
 
				 from health_evalution_class import HealthAssessor
			
 
				 import pandas as pd
			
 
				-
			
 
				-app = FastAPI()
			
 
				+import math
			
 
				+import numpy as np
			
 
				+# app = FastAPI()
			
 
				+app = FastAPI(root_path="/api/health")
			
 
				 
			
 
				 # 配置日志
			
 
				 logging.basicConfig(level=logging.INFO)
			
@@ -135,13 +137,15 @@ async def assess_windfarm(request: AssessmentRequest):
 
				     if not results:
			
 
				         logger.warning(f"风场{request.windcode}在{request.month}无有效评估结果，返回空列表")
			
 
				         return []
			
 
				-    
			
 
				+
			
 
				+    cleaned_results = [clean_nans(result.dict()) for result in results]   
			
 
				+
			
 
				     # 防御性类型检查（生产环境可移除）
			
 
				-    if not isinstance(results, list):
			
 
				+    if not isinstance(cleaned_results, list):
			
 
				         logger.error(f"结果类型错误，期望list但得到{type(results)}，强制转换为列表")
			
 
				-        results = [results] if results is not None else []
			
 
				+        cleaned_results = [cleaned_results] if cleaned_results is not None else []
			
 
				     
			
 
				-    return results
			
 
				+    return cleaned_results
			
 
				 
			
 
				 
			
 
				 def _format_result(assessment):
			
@@ -156,4 +160,17 @@ def _format_result(assessment):
 
				             for k, v in assessment['subsystems'].items()
			
 
				         },
			
 
				         assessed_subsystems=assessment.get('assessed_subsystems', [])
			
 
				-    )
			
 
				+    )
			
 
				+
			
 
				+def clean_nans(obj):
			
 
				+    """递归清理字典和列表中的NaN值"""
			
 
				+    if isinstance(obj, dict):
			
 
				+        return {k: clean_nans(v) for k, v in obj.items()}
			
 
				+    elif isinstance(obj, list):
			
 
				+        return [clean_nans(item) for item in obj]
			
 
				+    elif isinstance(obj, float) and math.isnan(obj):
			
 
				+        return -1.0  # 替换为-1表示无效值
			
 
				+    elif isinstance(obj, float) and not math.isfinite(obj):
			
 
				+        return -1.0  # 处理无穷大值
			
 
				+    else:
			
 
				+        return obj
			
--- a/health_evalution_class.py
+++ b/health_evalution_class.py
@@ -52,12 +52,12 @@ class HealthAssessor:
 
				                 self.healthyResidual = None
			
 
				 
			
 
				             def calcSimilarity(self, x, y):
			
 
				-                """优化后的相似度计算"""
			
 
				+                """相似度计算"""
			
 
				                 diff = np.array(x) - np.array(y)
			
 
				                 return 1/(1 + np.sqrt(np.sum(diff**2)))
			
 
				 
			
 
				             def genDLMatrix(self, trainDataset, dataSize4D=60, dataSize4L=5):
			
 
				-                """优化矩阵生成过程"""
			
 
				+                """矩阵生成过程"""
			
 
				                 m, n = trainDataset.shape
			
 
				                 
			
 
				                 # 快速选择极值点
			
@@ -85,7 +85,7 @@ class HealthAssessor:
 
				                 return 0
			
 
				 
			
 
				             def calcResidualByLocallyWeightedLR(self, newStates):
			
 
				-                """优化残差计算"""
			
 
				+                """残差计算"""
			
 
				                 if len(newStates.shape) == 1:
			
 
				                     newStates = newStates.reshape(-1, 1)
			
 
				                     
			
@@ -101,55 +101,72 @@ class HealthAssessor:
 
				                 return est_X - newStates
			
 
				 
			
 
				             def calcSPRT(self, newsStates, feature_weight, alpha=0.1, beta=0.1, decisionGroup=1):
			
 
				-                """优化SPRT计算"""
			
 
				-                stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
			
 
				-                weightedStateResidual = np.dot(stateResidual, feature_weight)
			
 
				-                weightedHealthyResidual = np.dot(self.healthyResidual, feature_weight)
			
 
				+                """SPRT计算"""
			
 
				+                try:
			
 
				+                    stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
			
 
				+                    weightedStateResidual = np.dot(stateResidual, feature_weight)
			
 
				+                    weightedHealthyResidual = np.dot(self.healthyResidual, feature_weight)
			
 
				 
			
 
				-                mu0 = np.mean(weightedHealthyResidual)
			
 
				-                sigma0 = np.std(weightedHealthyResidual)
			
 
				-                
			
 
				-                # 向量化计算
			
 
				-                n = len(newsStates)
			
 
				-                if n < decisionGroup:
			
 
				-                    return [50]  # 中性值
			
 
				-                
			
 
				-                rolling_mean = np.convolve(weightedStateResidual, np.ones(decisionGroup)/decisionGroup, 'valid')
			
 
				-                si = (rolling_mean - mu0) * (rolling_mean + mu0 - 2*mu0) / (2*sigma0**2)
			
 
				-                
			
 
				-                lowThres = np.log(beta/(1-alpha))
			
 
				-                highThres = np.log((1-beta)/alpha)
			
 
				-                
			
 
				-                si = np.clip(si, lowThres, highThres)
			
 
				-                si = np.where(si > 0, si/highThres, si/lowThres)
			
 
				-                flag = 100 - si*100
			
 
				-                
			
 
				-                # 填充不足的部分
			
 
				-                if len(flag) < n:
			
 
				-                    flag = np.pad(flag, (0, n-len(flag)), mode='edge')
			
 
				-                
			
 
				-                return flag.tolist()
			
 
				+                    mu0 = np.mean(weightedHealthyResidual)
			
 
				+                    sigma0 = np.std(weightedHealthyResidual)
			
 
				+                    # 处理标准差为零的情况
			
 
				+                    if sigma0 < 1e-5:
			
 
				+                        sigma0 = 1.0  # 设为安                
			
 
				+                    
			
 
				+                    # 向量化计算
			
 
				+                    n = len(newsStates)
			
 
				+                    if n < decisionGroup:
			
 
				+                        return [50]  # 中性值
			
 
				+                    
			
 
				+                    rolling_mean = np.convolve(weightedStateResidual, np.ones(decisionGroup)/decisionGroup, 'valid')
			
 
				+                    si = (rolling_mean - mu0) * (rolling_mean + mu0 - 2*mu0) / (2*sigma0**2)
			
 
				+                    
			
 
				+                    lowThres = np.log(beta/(1-alpha))
			
 
				+                    highThres = np.log((1-beta)/alpha)
			
 
				+                    
			
 
				+                    si = np.clip(si, lowThres, highThres)
			
 
				+                    si = np.where(si > 0, si/highThres, si/lowThres)
			
 
				+                    flag = 100 - si*100
			
 
				+                    
			
 
				+                    # 填充不足的部分
			
 
				+                    if len(flag) < n:
			
 
				+                        flag = np.pad(flag, (0, n-len(flag)), mode='edge')
			
 
				+                    
			
 
				+                    return flag.tolist()
			
 
				+                except Exception as e:
			
 
				+                    print(f"SPRT计算错误: {str(e)}")
			
 
				+                    return [50] * len(newsStates)  # 返回中性值                
			
 
				 
			
 
				             def CRITIC_prepare(self, data, flag=1):
			
 
				                 """标准化处理"""
			
 
				                 data = data.astype(float)
			
 
				                 numeric_cols = data.select_dtypes(include=[np.number]).columns
			
 
				-                #需要确认哪些指标是正向标准化 哪些是负向标准化
			
 
				-                negative_cols = [col for col in numeric_cols 
			
 
				-                               if any(kw in col for kw in ['temperature'])]
			
 
				-                positive_cols = list(set(numeric_cols) - set(negative_cols))
			
 
				                 
			
 
				-                # 负向标准化
			
 
				-                if negative_cols:
			
 
				-                    max_val = data[negative_cols].max()
			
 
				-                    min_val = data[negative_cols].min()
			
 
				-                    data[negative_cols] = (max_val - data[negative_cols]) / (max_val - min_val).replace(0, 1e-5)
			
 
				+                # 处理全零或常数列
			
 
				+                for col in numeric_cols:
			
 
				+                    if data[col].nunique() == 1:  # 所有值相同
			
 
				+                        data[col] = 0.5  # 设为中性值
			
 
				+                        continue
			
 
				+                        
			
 
				+                # 负向标准化（温度等指标）
			
 
				+                negative_cols = [col for col in numeric_cols if 'temperature' in col]
			
 
				+                for col in negative_cols:
			
 
				+                    col_min = data[col].min()
			
 
				+                    col_max = data[col].max()
			
 
				+                    range_val = col_max - col_min
			
 
				+                    if range_val < 1e-5:  # 防止除零
			
 
				+                        range_val = 1.0
			
 
				+                    data[col] = (col_max - data[col]) / range_val
			
 
				                 
			
 
				-                # 正向标准化
			
 
				-                if positive_cols:
			
 
				-                    max_val = data[positive_cols].max()
			
 
				-                    min_val = data[positive_cols].min()
			
 
				-                    data[positive_cols] = (data[positive_cols] - min_val) / (max_val - min_val).replace(0, 1e-5)
			
 
				+                # 正向标准化（其他指标）
			
 
				+                positive_cols = list(set(numeric_cols) - set(negative_cols))
			
 
				+                for col in positive_cols:
			
 
				+                    col_min = data[col].min()
			
 
				+                    col_max = data[col].max()
			
 
				+                    range_val = col_max - col_min
			
 
				+                    if range_val < 1e-5:  # 防止除零
			
 
				+                        range_val = 1.0
			
 
				+                    data[col] = (data[col] - col_min) / range_val
			
 
				                 
			
 
				                 return data
			
 
				 
			
@@ -213,7 +230,17 @@ class HealthAssessor:
 
				             # 功能3：删除features内容
			
 
				             if 'features' in assessment:
			
 
				                 del assessment['features']
			
 
				-                
			
 
				+
			
 
				+            # 最终清理：确保没有NaN值
			
 
				+            for sys, result in results["subsystems"].items():
			
 
				+                if isinstance(result['health_score'], float) and np.isnan(result['health_score']):
			
 
				+                    result['health_score'] = -1
			
 
				+                    result['message'] = (result.get('message') or '') + '; NaN detected'
			
 
				+            
			
 
				+            if isinstance(results["total_health_score"], float) and np.isnan(results["total_health_score"]):
			
 
				+                results["total_health_score"] = -1               
			
 
				+
			
 
				+
			
 
				             results["subsystems"][subsystem] = assessment
			
 
				 
			
 
				         # 计算整机健康度（使用新字段名）
			
@@ -229,7 +256,7 @@ class HealthAssessor:
 
				                 health_scores = [results["subsystems"][sys]['health_score'] for sys in valid_subsystems]
			
 
				                 results["total_health_score"] = float(np.dot(health_scores, weights))
			
 
				                 results["assessed_subsystems"] = valid_subsystems
			
 
				-
			
 
				+        print(results)
			
 
				         return results
			
 
				 
			
 
				 
			
@@ -293,7 +320,7 @@ class HealthAssessor:
 
				             for f in config['fixed']:
			
 
				                 if f in data.columns and data[f].notna().mean() > 0.1:
			
 
				                     available_features.append(f)
			
 
				-        print(f"匹配到的固定特征: {available_features}")
			
 
				+       # print(f"匹配到的固定特征: {available_features}")
			
 
				         # 关键词特征检查
			
 
				         if 'keywords' in config:
			
 
				             for rule in config['keywords']:
			
@@ -305,11 +332,11 @@ class HealthAssessor:
 
				                 ]
			
 
				                 if len(matched) >= rule.get('min_count', 1):
			
 
				                     available_features.extend(matched)
			
 
				-        print(f"匹配到的关键词特征: {available_features}")  
			
 
				+      #  print(f"匹配到的关键词特征: {available_features}")  
			
 
				         return list(set(available_features))
			
 
				 
			
 
				     def _assess_subsystem(self, data: pd.DataFrame) -> Dict:
			
 
				-        """评估子系统（与源代码逻辑完全一致）"""
			
 
				+        """评估子系统"""
			
 
				         # 数据清洗
			
 
				         clean_data = data.dropna()
			
 
				         if len(clean_data) < 20:  # 数据量不足
			
@@ -336,7 +363,7 @@ class HealthAssessor:
 
				     @lru_cache(maxsize=10)
			
 
				     def _get_mset_model(self, train_data: tuple):
			
 
				         """缓存MSET模型"""
			
 
				-        # 注意：由于lru_cache需要可哈希参数，这里使用元组
			
 
				+
			
 
				         arr = np.array(train_data)
			
 
				         model = self._create_mset_core()
			
 
				         model.genDLMatrix(arr)
			
@@ -344,17 +371,29 @@ class HealthAssessor:
 
				 
			
 
				     def _run_mset_assessment(self, data: np.ndarray, weights: np.ndarray) -> float:
			
 
				         """执行MSET评估"""
			
 
				+        # 检查权重有效性
			
 
				+        if np.isnan(weights).any() or np.isinf(weights).any():
			
 
				+            weights = np.ones_like(weights) / len(weights)  # 重置为等权重 
			
 
				+
			
 
				         # 分割训练集和测试集
			
 
				         split_idx = len(data) // 2
			
 
				         train_data = data[:split_idx]
			
 
				         test_data = data[split_idx:]
			
 
				-        
			
 
				+
			
 
				         # 使用缓存模型
			
 
				-        model = self._get_mset_model(tuple(map(tuple, train_data)))  # 转换为可哈希的元组
			
 
				-        
			
 
				-        # 计算SPRT标志
			
 
				-        flags = model.calcSPRT(test_data, weights)
			
 
				-        return np.mean(flags)
			
 
				+        try:
			
 
				+            model = self._get_mset_model(tuple(map(tuple, train_data)))
			
 
				+            flags = model.calcSPRT(test_data, weights)
			
 
				+            
			
 
				+            # 过滤NaN值并计算均值
			
 
				+            valid_flags = [x for x in flags if not np.isnan(x)]
			
 
				+            if not valid_flags:
			
 
				+                return 50.0  # 默认中性值
			
 
				+                
			
 
				+            return float(np.mean(valid_flags))
			
 
				+        except Exception as e:
			
 
				+            print(f"MSET评估失败: {str(e)}")
			
 
				+            return 50.0  # 默认中性值
			
 
				     
			
 
				     def _get_subsystem_weights(self, subsystems: List[str]) -> np.ndarray:
			
 
				         """生成等权重的子系统权重向量"""