1 mēnesi atpakaļ · a4d8c74f46
--- a/__pycache__/api_20250604.cpython-312.pyc
+++ b/__pycache__/api_20250604.cpython-312.pyc
--- a/__pycache__/api_health.cpython-312.pyc
+++ b/__pycache__/api_health.cpython-312.pyc
--- a/__pycache__/database.cpython-312.pyc
+++ b/__pycache__/database.cpython-312.pyc
--- a/__pycache__/health_evalution_class.cpython-312.pyc
+++ b/__pycache__/health_evalution_class.cpython-312.pyc
--- a/api_health.py
+++ b/api_health.py
@@ -0,0 +1,124 @@
 
															+from fastapi import FastAPI, HTTPException
														
 
															+from pydantic import BaseModel
														
 
															+from datetime import datetime
														
 
															+from typing import List, Dict, Optional
														
 
															+import logging
														
 
															+from database import DataFetcher
														
 
															+from health_evalution_class import HealthAssessor
														
 
															+import pandas as pd
														
 
															+
														
 
															+app = FastAPI()
														
 
															+
														
 
															+# 配置日志
														
 
															+logging.basicConfig(level=logging.INFO)
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+#请求模型定义
														
 
															+class AssessmentRequest(BaseModel):
														
 
															+    windcode: str
														
 
															+    month: str  # 格式: "YYYY-MM"
														
 
															+
														
 
															+class SubsystemResult(BaseModel):
														
 
															+    health_score: float
														
 
															+    weights: Dict[str, float]
														
 
															+  #  features: List[str]
														
 
															+    message: Optional[str] = None
														
 
															+
														
 
															+class AssessmentResult(BaseModel):
														
 
															+    engine_code: str
														
 
															+    wind_turbine_name:str
														
 
															+    mill_type: str
														
 
															+    total_health_score: Optional[float]
														
 
															+    subsystems: Dict[str, SubsystemResult]
														
 
															+    assessed_subsystems: List[str]
														
 
															+
														
 
															+@app.post("/health_assess", response_model=List[AssessmentResult])
														
 
															+async def assess_windfarm(request: AssessmentRequest):
														
 
															+    try:
														
 
															+        datetime.strptime(request.month, "%Y-%m")
														
 
															+    except ValueError:
														
 
															+        raise HTTPException(status_code=400, detail="无效时间格式，使用YYYY-MM格式")
														
 
															+    logger.info(f"开始处理风场评估请求 - 风场编码: {request.windcode}, 月份: {request.month}")
														
 
															+
														
 
															+    fetcher = DataFetcher()
														
 
															+    assessor = HealthAssessor()
														
 
															+    turbines = fetcher.get_turbines(request.windcode)
														
 
															+    print(turbines)
														
 
															+    if turbines.empty:
														
 
															+        raise HTTPException(status_code=404, detail="未找到风场数据")
														
 
															+
														
 
															+    results: List[AssessmentResult] = []  # 显式声明列表类型并初始化
														
 
															+    all_turbines = len(turbines)
														
 
															+    processed_count = 0
														
 
															+
														
 
															+    for idx, row in turbines.iterrows():
														
 
															+        try:
														
 
															+            engine_code = row['engine_code']
														
 
															+            mill_type_code = row['mill_type_code']
														
 
															+            wind_turbine_name = row['engine_name']
														
 
															+            # 获取机型类型
														
 
															+            mill_type_num = fetcher.get_mill_type(mill_type_code)
														
 
															+            mill_type = {1: 'dfig', 2: 'direct', 3: 'semi_direct'}.get(mill_type_num, 'unknown')
														
 
															+
														
 
															+            if mill_type == 'unknown':
														
 
															+                logger.warning(f"{engine_code}机型未知，跳过处理")
														
 
															+                continue
														
 
															+
														
 
															+            # 获取特征与数据
														
 
															+            # 先获取该风场所有可用列名
														
 
															+            available_columns = fetcher.get_turbine_columns(request.windcode)
														
 
															+            if not available_columns:
														
 
															+                continue            
														
 
															+            # 获取有效特征列（基于实际列名）
														
 
															+            valid_features = assessor._get_all_possible_features(assessor, mill_type, available_columns)
														
 
															+            # 获取数据
														
 
															+            data = fetcher.fetch_turbine_data(request.windcode, engine_code, request.month, valid_features)
														
 
															+            print(data)
														
 
															+            if data is None or data.empty:  # 增加对None的检查
														
 
															+                logger.warning(f"{engine_code}在{request.month}无有效数据")
														
 
															+                continue
														
 
															+
														
 
															+            # 执行评估并格式化结果
														
 
															+            assessment = assessor.assess_turbine(engine_code, data, mill_type,wind_turbine_name)
														
 
															+            if not assessment:  # 检查评估结果有效性
														
 
															+                logger.warning(f"{engine_code}评估结果为空")
														
 
															+                continue
														
 
															+                
														
 
															+            formatted_result = _format_result(assessment)
														
 
															+            if isinstance(formatted_result, AssessmentResult):  # 严格类型检查
														
 
															+                results.append(formatted_result)
														
 
															+                processed_count += 1
														
 
															+                logger.info(f"成功处理{engine_code}（已处理{processed_count}/{all_turbines}台）")
														
 
															+            else:
														
 
															+                logger.error(f"{engine_code}结果格式化失败，类型错误: {type(formatted_result)}")
														
 
															+                
														
 
															+        except Exception as e:
														
 
															+            logger.error(f"处理{engine_code}时发生异常: {str(e)}", exc_info=True)
														
 
															+            continue
														
 
															+
														
 
															+    # 最终返回处理 - 强制转换为列表并确保类型正确
														
 
															+    if not results:
														
 
															+        logger.warning(f"风场{request.windcode}在{request.month}无有效评估结果，返回空列表")
														
 
															+        return []
														
 
															+    
														
 
															+    # 防御性类型检查（生产环境可移除）
														
 
															+    if not isinstance(results, list):
														
 
															+        logger.error(f"结果类型错误，期望list但得到{type(results)}，强制转换为列表")
														
 
															+        results = [results] if results is not None else []
														
 
															+    
														
 
															+    return results
														
 
															+
														
 
															+
														
 
															+def _format_result(assessment):
														
 
															+    """格式化评估结果"""
														
 
															+    return AssessmentResult(
														
 
															+        engine_code=assessment['engine_code'],
														
 
															+        wind_turbine_name=assessment['wind_turbine_name'],
														
 
															+        mill_type=assessment['mill_type'],
														
 
															+        total_health_score=assessment.get('total_health_score'),
														
 
															+        subsystems={
														
 
															+            k: SubsystemResult(**v) 
														
 
															+            for k, v in assessment['subsystems'].items()
														
 
															+        },
														
 
															+        assessed_subsystems=assessment.get('assessed_subsystems', [])
														
 
															+    )
														
--- a/database.py
+++ b/database.py
@@ -0,0 +1,156 @@
 
															+import pandas as pd
														
 
															+from sqlalchemy import create_engine, inspect
														
 
															+import traceback
														
 
															+import logging
														
 
															+
														
 
															+class DataFetcher:
														
 
															+    def __init__(self):
														
 
															+        self.show_engine = create_engine('mysql+pymysql://admin:admin123456@192.168.50.233:3306/energy_show')
														
 
															+        self.data_engine = create_engine('mysql+pymysql://root:admin123456@192.168.50.235:30306/energy_data_prod')
														
 
															+   
														
 
															+    def get_turbine_columns(self, windcode):
														
 
															+        """
														
 
															+        获取指定风场数据表的所有列名
														
 
															+        :param windcode: 风场编号 (如 "WF001")
														
 
															+        :return: 列名列表 (如 ["timestamp", "temp1", "vibration1"])
														
 
															+        """
														
 
															+        table_name = f"{windcode}_minute"
														
 
															+        try:
														
 
															+            inspector = inspect(self.data_engine)
														
 
															+            columns = inspector.get_columns(table_name)
														
 
															+            return [col['name'] for col in columns]
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            print(f"Error fetching columns for {table_name}: {str(e)}")
														
 
															+            return []
														
 
															+
														
 
															+    """
														
 
															+    获取风场下所有风机信息
														
 
															+    根据风场编号在表'wind_engine_group'中查询所有的风机编号engine_code 以及对应的机型编号mill_type_code,风机名称engine_name
														
 
															+    """
														
 
															+    def get_turbines(self, windcode):
														
 
															+
														
 
															+        query = f"""
														
 
															+        SELECT engine_code, mill_type_code,engine_name 
														
 
															+        FROM wind_engine_group 
														
 
															+        WHERE field_code = '{windcode}'
														
 
															+        """
														
 
															+        return pd.read_sql(query, self.show_engine)
														
 
															+    """
														
 
															+    获取机型驱动类型
														
 
															+    根据机型编号在表'wind_engine_mill'中查询对应的驱动方式值
														
 
															+    """
														
 
															+    def get_mill_type(self, mill_type_code):
														
 
															+
														
 
															+        query = f"""
														
 
															+        SELECT curved_motion_type 
														
 
															+        FROM wind_engine_mill 
														
 
															+        WHERE mill_type_code = '{mill_type_code}'
														
 
															+        """
														
 
															+        result = pd.read_sql(query, self.show_engine)
														
 
															+        return result.iloc[0, 0] if not result.empty else None
														
 
															+    """
														
 
															+    获取风机时序数据
														
 
															+    根据风机编号在表'windcode_minute'中，筛选出timestamp在month范围里的所有数据条
														
 
															+    """
														
 
															+    # def fetch_turbine_data(self, windecode,engine_code, month, features):
														
 
															+
														
 
															+    #     table_name = f"{windecode}_minute"
														
 
															+    #     month_start = f"{month}-01"
														
 
															+    #     month_end = f"{month}-31"  # 自动处理不同月份天数
														
 
															+        
														
 
															+    #     query = f"""
														
 
															+    #     SELECT time_stamp, {','.join(features)} 
														
 
															+    #     FROM {table_name} 
														
 
															+    #     WHERE wind_turbine_number ='{engine_code}'
														
 
															+    #     AND time_stamp BETWEEN '{month_start} 00:00:00' AND '{month_end} 23:59:59'
														
 
															+    #     """
														
 
															+    #     print('sql语句')
														
 
															+    #     print(query)
														
 
															+    #     try:
														
 
															+    #         return pd.read_sql(query, self.data_engine)
														
 
															+    #     except:
														
 
															+    #         print(traceback.print_exc())
														
 
															+    #         return pd.DataFrame()
														
 
															+        
														
 
															+    # def fetch_turbine_data(self, windecode,engine_code, month, features):
														
 
															+
														
 
															+    #     year_month_int = int(month.replace('-',''))
														
 
															+    #     table_name = f"{windecode}_minute"
														
 
															+        
														
 
															+    #     query = f"""
														
 
															+    #     SELECT year_month, {','.join(features)} 
														
 
															+    #     FROM {table_name} 
														
 
															+    #     WHERE wind_turbine_number ='{engine_code}'
														
 
															+    #     AND year_month = :year_month_int
														
 
															+    #     """
														
 
															+    #     print('sql语句')
														
 
															+    #     print(query)
														
 
															+    #     try:
														
 
															+    #         return pd.read_sql(query, self.data_engine)
														
 
															+    #     except:
														
 
															+    #         print(traceback.print_exc())
														
 
															+    #         return pd.DataFrame()
														
 
															+    def fetch_turbine_data(self, windcode, engine_code, month, features):
														
 
															+        """获取指定月份风机数据（安全参数化版本）
														
 
															+        
														
 
															+        Args:
														
 
															+            windcode: 风场编号 (如 "WF001")
														
 
															+            engine_code: 风机编号 (如 "WT001")
														
 
															+            month: 月份字符串 (格式 "YYYY-MM")
														
 
															+            features: 需要查询的字段列表
														
 
															+        Returns:
														
 
															+            pd.DataFrame: 包含查询结果的DataFrame
														
 
															+        """
														
 
															+        try:
														
 
															+            # 1. 转换并验证月份格式
														
 
															+            year_month_int = int(month.replace('-', ''))
														
 
															+            if not 100001 <= year_month_int <= 999912:  # 基本格式验证
														
 
															+                raise ValueError("月份格式应为YYYY-MM")
														
 
															+            
														
 
															+            # 2. 验证特征列名安全性
														
 
															+            safe_features = []
														
 
															+            for feat in features:
														
 
															+                if isinstance(feat, str) and all(c.isalnum() or c == '_' for c in feat):
														
 
															+                    safe_features.append(f'`{feat}`')  # 用反引号包裹
														
 
															+                else:
														
 
															+                    print(f"警告：忽略非法特征名 '{feat}'")
														
 
															+            
														
 
															+            if not safe_features:
														
 
															+                print("错误：无有效特征列")
														
 
															+                return pd.DataFrame()
														
 
															+
														
 
															+            # 3. 构建参数化查询
														
 
															+            query = f"""
														
 
															+            SELECT `year_month`, {','.join(safe_features)} 
														
 
															+            FROM `{windcode}_minute` 
														
 
															+            WHERE `wind_turbine_number` = %s
														
 
															+            AND `year_month` = %s
														
 
															+            """
														
 
															+            
														
 
															+            print(f"执行安全查询:\n{query}\n参数: ({engine_code}, {year_month_int})")
														
 
															+            
														
 
															+            # 4. 执行参数化查询
														
 
															+            return pd.read_sql(query, self.data_engine, 
														
 
															+                            params=(engine_code, year_month_int))
														
 
															+        
														
 
															+        except ValueError as e:
														
 
															+            print(f"输入参数错误: {str(e)}")
														
 
															+            return pd.DataFrame()
														
 
															+        except Exception as e:
														
 
															+            print(f"数据库查询失败: {str(e)}")
														
 
															+            import traceback
														
 
															+            traceback.print_exc()
														
 
															+            return pd.DataFrame()
														
 
															+
														
 
															+
														
 
															+    def get_turbine_columns(self, windcode):
														
 
															+        """获取指定风场数据表的所有列名"""
														
 
															+        table_name = f"{windcode}_minute"
														
 
															+        try:
														
 
															+            inspector = inspect(self.data_engine)
														
 
															+            columns = inspector.get_columns(table_name)
														
 
															+            return [col['name'] for col in columns]
														
 
															+        except Exception as e:
														
 
															+            print(f"获取列名失败: {str(e)}")
														
 
															+            return []
														
--- a/health_evalution_class.py
+++ b/health_evalution_class.py
@@ -0,0 +1,429 @@
 
															+import numpy as np
														
 
															+import pandas as pd
														
 
															+from sklearn.neighbors import BallTree
														
 
															+from typing import Dict, List, Optional, Union
														
 
															+from functools import lru_cache
														
 
															+
														
 
															+class HealthAssessor:
														
 
															+    def __init__(self):
														
 
															+        self.subsystem_config = {
														
 
															+            # 发电机
														
 
															+            'generator': {
														
 
															+                # 双馈
														
 
															+                'dfig': {
														
 
															+                    'fixed': ['generator_winding1_temperature', 'generator_winding2_temperature', 
														
 
															+                             'generator_winding3_temperature', 'generatordrive_end_bearing_temperature',
														
 
															+                             'generatornon_drive_end_bearing_temperature'],
														
 
															+                },
														
 
															+                # 直驱
														
 
															+                'direct': {
														
 
															+                    'fixed': ['generator_winding1_temperature', 'generator_winding2_temperature',
														
 
															+                              'generator_winding3_temperature', 'main_bearing_temperature'],
														
 
															+                }
														
 
															+            },
														
 
															+            # 机舱系统
														
 
															+            'nacelle': {
														
 
															+                'fixed': ['front_back_vibration_of_the_cabin', 'side_to_side_vibration_of_the_cabin',
														
 
															+                          'cabin_position', 'cabin_temperature'],
														
 
															+            },
														
 
															+            # 电网环境
														
 
															+            'grid': {
														
 
															+                'fixed': ['reactive_power', 'active_power', 'grid_a_phase_current',
														
 
															+                          'grid_b_phase_current', 'grid_c_phase_current'],
														
 
															+            },
														
 
															+            # 传动系统
														
 
															+            'drive_train': {
														
 
															+                'fixed': ['main_bearing_temperature'],
														
 
															+                'keywords': [
														
 
															+                    {'include': ['gearbox', 'temperature'], 'exclude': [], 'min_count': 2},
														
 
															+                ]
														
 
															+            }
														
 
															+        }
														
 
															+        
														
 
															+        # 嵌入源代码的MSET实现
														
 
															+        self.mset = self._create_mset_core()
														
 
															+
														
 
															+    def _create_mset_core(self):
														
 
															+        """创建MSET核心计算模块"""
														
 
															+        class MSETCore:
														
 
															+            def __init__(self):
														
 
															+                self.matrixD = None
														
 
															+                self.normalDataBallTree = None
														
 
															+                self.healthyResidual = None
														
 
															+
														
 
															+            def calcSimilarity(self, x, y):
														
 
															+                """优化后的相似度计算"""
														
 
															+                diff = np.array(x) - np.array(y)
														
 
															+                return 1/(1 + np.sqrt(np.sum(diff**2)))
														
 
															+
														
 
															+            def genDLMatrix(self, trainDataset, dataSize4D=60, dataSize4L=5):
														
 
															+                """优化矩阵生成过程"""
														
 
															+                m, n = trainDataset.shape
														
 
															+                
														
 
															+                # 快速选择极值点
														
 
															+                min_indices = np.argmin(trainDataset, axis=0)
														
 
															+                max_indices = np.argmax(trainDataset, axis=0)
														
 
															+                unique_indices = np.unique(np.concatenate([min_indices, max_indices]))
														
 
															+                self.matrixD = trainDataset[unique_indices].copy()
														
 
															+                
														
 
															+                # 快速填充剩余点
														
 
															+                remaining_indices = np.setdiff1d(np.arange(m), unique_indices)
														
 
															+                np.random.shuffle(remaining_indices)
														
 
															+                needed = max(0, dataSize4D - len(unique_indices))
														
 
															+                if needed > 0:
														
 
															+                    self.matrixD = np.vstack([self.matrixD, trainDataset[remaining_indices[:needed]]])
														
 
															+                
														
 
															+                # 使用与源代码一致的BallTree参数
														
 
															+                self.normalDataBallTree = BallTree(
														
 
															+                    self.matrixD, 
														
 
															+                    leaf_size=4,  
														
 
															+                    metric=lambda i,j: 1-self.calcSimilarity(i,j)  # 自定义相似度
														
 
															+                )
														
 
															+                
														
 
															+                # 使用所有数据计算残差
														
 
															+                self.healthyResidual = self.calcResidualByLocallyWeightedLR(trainDataset)
														
 
															+                return 0
														
 
															+
														
 
															+            def calcResidualByLocallyWeightedLR(self, newStates):
														
 
															+                """优化残差计算"""
														
 
															+                if len(newStates.shape) == 1:
														
 
															+                    newStates = newStates.reshape(-1, 1)
														
 
															+                    
														
 
															+                dist, iList = self.normalDataBallTree.query(
														
 
															+                    newStates, 
														
 
															+                    k=min(10, len(self.matrixD)), 
														
 
															+                    return_distance=True
														
 
															+                )
														
 
															+                weights = 1/(dist + 1e-5)
														
 
															+                weights /= weights.sum(axis=1)[:, np.newaxis]
														
 
															+                
														
 
															+                est_X = np.sum(weights[:, :, np.newaxis] * self.matrixD[iList[0]], axis=1)
														
 
															+                return est_X - newStates
														
 
															+
														
 
															+            def calcSPRT(self, newsStates, feature_weight, alpha=0.1, beta=0.1, decisionGroup=1):
														
 
															+                """优化SPRT计算（与源代码完全一致）"""
														
 
															+                stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
														
 
															+                weightedStateResidual = np.dot(stateResidual, feature_weight)
														
 
															+                weightedHealthyResidual = np.dot(self.healthyResidual, feature_weight)
														
 
															+
														
 
															+                mu0 = np.mean(weightedHealthyResidual)
														
 
															+                sigma0 = np.std(weightedHealthyResidual)
														
 
															+                
														
 
															+                # 向量化计算
														
 
															+                n = len(newsStates)
														
 
															+                if n < decisionGroup:
														
 
															+                    return [50]  # 中性值
														
 
															+                
														
 
															+                rolling_mean = np.convolve(weightedStateResidual, np.ones(decisionGroup)/decisionGroup, 'valid')
														
 
															+                si = (rolling_mean - mu0) * (rolling_mean + mu0 - 2*mu0) / (2*sigma0**2)
														
 
															+                
														
 
															+                lowThres = np.log(beta/(1-alpha))
														
 
															+                highThres = np.log((1-beta)/alpha)
														
 
															+                
														
 
															+                si = np.clip(si, lowThres, highThres)
														
 
															+                si = np.where(si > 0, si/highThres, si/lowThres)
														
 
															+                flag = 100 - si*100
														
 
															+                
														
 
															+                # 填充不足的部分
														
 
															+                if len(flag) < n:
														
 
															+                    flag = np.pad(flag, (0, n-len(flag)), mode='edge')
														
 
															+                
														
 
															+                return flag.tolist()
														
 
															+
														
 
															+            def CRITIC_prepare(self, data, flag=1):
														
 
															+                """标准化处理"""
														
 
															+                data = data.astype(float)
														
 
															+                numeric_cols = data.select_dtypes(include=[np.number]).columns
														
 
															+                #需要确认哪些指标是正向标准化 哪些是负向标准化
														
 
															+                negative_cols = [col for col in numeric_cols 
														
 
															+                               if any(kw in col for kw in ['temperature'])]
														
 
															+                positive_cols = list(set(numeric_cols) - set(negative_cols))
														
 
															+                
														
 
															+                # 负向标准化
														
 
															+                if negative_cols:
														
 
															+                    max_val = data[negative_cols].max()
														
 
															+                    min_val = data[negative_cols].min()
														
 
															+                    data[negative_cols] = (max_val - data[negative_cols]) / (max_val - min_val).replace(0, 1e-5)
														
 
															+                
														
 
															+                # 正向标准化
														
 
															+                if positive_cols:
														
 
															+                    max_val = data[positive_cols].max()
														
 
															+                    min_val = data[positive_cols].min()
														
 
															+                    data[positive_cols] = (data[positive_cols] - min_val) / (max_val - min_val).replace(0, 1e-5)
														
 
															+                
														
 
															+                return data
														
 
															+
														
 
															+            def CRITIC(self, data):
														
 
															+                """CRITIC权重计算"""
														
 
															+                data_norm = self.CRITIC_prepare(data.copy())               
														
 
															+                std = data_norm.std(ddof=0).clip(lower=0.01)              
														
 
															+                corr = np.abs(np.corrcoef(data_norm.T))
														
 
															+                np.fill_diagonal(corr, 0)              
														
 
															+                conflict = np.sum(1 - corr, axis=1)              
														
 
															+                info = std * conflict
														
 
															+                weights = info / info.sum()                
														
 
															+                return pd.Series(weights, index=data.columns)
														
 
															+
														
 
															+            def ahp(self, matrix):
														
 
															+                """AHP权重计算"""
														
 
															+                eigenvalue, eigenvector = np.linalg.eig(matrix)
														
 
															+                max_idx = np.argmax(eigenvalue)
														
 
															+                weight = eigenvector[:, max_idx].real
														
 
															+                return weight / weight.sum(), eigenvalue[max_idx].real
														
 
															+
														
 
															+        return MSETCore()
														
 
															+
														
 
															+    # def assess_turbine(self, engine_code, data, mill_type,wind_turbine_name) :
														
 
															+    #     """评估单个风机"""
														
 
															+    #     results = {
														
 
															+    #         "engine_code": engine_code,
														
 
															+    #         "wind_turbine_name":wind_turbine_name,
														
 
															+    #         "mill_type": mill_type,
														
 
															+    #         "assessed_subsystems": [],            
														
 
															+    #         "subsystems": {},
														
 
															+    #         "overall_health": None
														
 
															+
														
 
															+    #     }
														
 
															+
														
 
															+    #     # 各子系统评估
														
 
															+    #     subsystems_to_assess = [
														
 
															+    #         ('generator', self.subsystem_config['generator'][mill_type], 2),
														
 
															+    #         ('nacelle', self.subsystem_config['nacelle'], 2),
														
 
															+    #         ('grid', self.subsystem_config['grid'], 2),
														
 
															+    #         ('drive_train', self.subsystem_config['drive_train'] if mill_type == 'dfig' else None, 2)
														
 
															+    #     ]
														
 
															+
														
 
															+    #     for subsystem, config, min_features in subsystems_to_assess:
														
 
															+    #         if config is None:
														
 
															+    #             continue
														
 
															+                
														
 
															+    #         features = self._get_subsystem_features(config, data)
														
 
															+    #         if len(features) >= min_features:
														
 
															+    #             assessment = self._assess_subsystem(data[features])
														
 
															+    #             results["subsystems"][subsystem] = assessment
														
 
															+
														
 
															+    #     # 计算整机健康度
														
 
															+    #     if results["subsystems"]:
														
 
															+    #         valid_subsystems = [k for k, v in results["subsystems"].items() if v['health'] >= 0]
														
 
															+    #         if valid_subsystems:
														
 
															+    #             weights = self._get_subsystem_weights(valid_subsystems)
														
 
															+    #             health_scores = [results["subsystems"][sys]['health'] for sys in valid_subsystems]
														
 
															+    #             #整体健康度评分
														
 
															+    #             results["overall_health"] = float(np.dot(health_scores, weights))
														
 
															+    #             results["assessed_subsystems"] = valid_subsystems
														
 
															+
														
 
															+    #     return results
														
 
															+
														
 
															+    def assess_turbine(self, engine_code, data, mill_type, wind_turbine_name):
														
 
															+        """评估单个风机
														
 
															+        """
														
 
															+        results = {
														
 
															+            "engine_code": engine_code,
														
 
															+            "wind_turbine_name": wind_turbine_name,
														
 
															+            "mill_type": mill_type,
														
 
															+            "total_health_score": None,  
														
 
															+            "subsystems": {},         
														
 
															+            "assessed_subsystems": []
														
 
															+        }
														
 
															+
														
 
															+        # 各子系统评估
														
 
															+        subsystems_to_assess = [
														
 
															+            ('generator', self.subsystem_config['generator'][mill_type], 1),
														
 
															+            ('nacelle', self.subsystem_config['nacelle'],1),
														
 
															+            ('grid', self.subsystem_config['grid'], 1),
														
 
															+            ('drive_train', self.subsystem_config['drive_train'] if mill_type == 'dfig' else None,1)
														
 
															+        ]
														
 
															+
														
 
															+        for subsystem, config, min_features in subsystems_to_assess:
														
 
															+            if config is None:
														
 
															+                continue
														
 
															+                
														
 
															+            features = self._get_subsystem_features(config, data)
														
 
															+            
														
 
															+            # 功能1：无论特征数量是否足够都输出结果
														
 
															+            if len(features) >= min_features:
														
 
															+                assessment = self._assess_subsystem(data[features])
														
 
															+            else:
														
 
															+                assessment = {
														
 
															+                    'health_score': -1,  # 特征不足时输出'-'
														
 
															+                    'weights': {},
														
 
															+                    'message': f'Insufficient features (required {min_features}, got {len(features)})'
														
 
															+                }
														
 
															+            
														
 
															+            # 功能3：删除features内容
														
 
															+            if 'features' in assessment:
														
 
															+                del assessment['features']
														
 
															+                
														
 
															+            results["subsystems"][subsystem] = assessment
														
 
															+
														
 
															+        # 计算整机健康度（使用新字段名）
														
 
															+        if results["subsystems"]:
														
 
															+            # 只计算健康值为数字的子系统
														
 
															+            valid_subsystems = [
														
 
															+                k for k, v in results["subsystems"].items() 
														
 
															+                if isinstance(v['health_score'], (int, float)) and v['health_score'] >= 0
														
 
															+            ]
														
 
															+            
														
 
															+            if valid_subsystems:
														
 
															+                weights = self._get_subsystem_weights(valid_subsystems)
														
 
															+                health_scores = [results["subsystems"][sys]['health_score'] for sys in valid_subsystems]
														
 
															+                results["total_health_score"] = float(np.dot(health_scores, weights))
														
 
															+                results["assessed_subsystems"] = valid_subsystems
														
 
															+
														
 
															+        return results
														
 
															+
														
 
															+
														
 
															+
														
 
															+    
														
 
															+    def _get_all_possible_features(self,assessor, mill_type, available_columns):
														
 
															+        """
														
 
															+        获取所有可能的特征列（基于实际存在的列）
														
 
															+        
														
 
															+        参数:
														
 
															+            assessor: HealthAssessor实例
														
 
															+            mill_type: 机型类型
														
 
															+            available_columns: 数据库实际存在的列名列表
														
 
															+        """
														
 
															+        features = []
														
 
															+        available_columns_lower = [col.lower() for col in available_columns]  # 不区分大小写匹配
														
 
															+        
														
 
															+        for subsys_name, subsys_config in assessor.subsystem_config.items():
														
 
															+            # 处理子系统配置
														
 
															+            if subsys_name == 'generator':
														
 
															+                config = subsys_config.get(mill_type, {})
														
 
															+            elif subsys_name == 'drive_train' and mill_type != 'dfig':
														
 
															+                continue
														
 
															+            else:
														
 
															+                config = subsys_config
														
 
															+            
														
 
															+            # 处理固定特征
														
 
															+            if 'fixed' in config:
														
 
															+                for f in config['fixed']:
														
 
															+                    if f in available_columns:
														
 
															+                        features.append(f)
														
 
															+            
														
 
															+            # 处理关键词特征
														
 
															+            if 'keywords' in config:
														
 
															+                for rule in config['keywords']:
														
 
															+                    matched = []
														
 
															+                    include_kws = [kw.lower() for kw in rule['include']]
														
 
															+                    exclude_kws = [ex.lower() for ex in rule.get('exclude', [])]
														
 
															+                    
														
 
															+                    for col in available_columns:
														
 
															+                        col_lower = col.lower()
														
 
															+                        # 检查包含关键词
														
 
															+                        include_ok = all(kw in col_lower for kw in include_kws)
														
 
															+                        # 检查排除关键词
														
 
															+                        exclude_ok = not any(ex in col_lower for ex in exclude_kws)
														
 
															+                        
														
 
															+                        if include_ok and exclude_ok:
														
 
															+                            matched.append(col)
														
 
															+                    
														
 
															+                    if len(matched) >= rule.get('min_count', 1):
														
 
															+                        features.extend(matched)
														
 
															+        
														
 
															+        return list(set(features))  # 去重   
														
 
															+
														
 
															+    def _get_subsystem_features(self, config: Dict, data: pd.DataFrame) -> List[str]:
														
 
															+        """最终版特征获取方法"""
														
 
															+        available_features = []
														
 
															+        
														
 
															+        # 固定特征检查（要求至少10%非空）
														
 
															+        if 'fixed' in config:
														
 
															+            for f in config['fixed']:
														
 
															+                if f in data.columns and data[f].notna().mean() > 0.1:
														
 
															+                    available_features.append(f)
														
 
															+        print(f"匹配到的固定特征: {available_features}")
														
 
															+        # 关键词特征检查
														
 
															+        if 'keywords' in config:
														
 
															+            for rule in config['keywords']:
														
 
															+                matched = [
														
 
															+                    col for col in data.columns
														
 
															+                    if all(kw.lower() in col.lower() for kw in rule['include'])
														
 
															+                    and not any(ex.lower() in col.lower() for ex in rule.get('exclude', []))
														
 
															+                    and data[col].notna().mean() > 0.1  # 数据有效性检查
														
 
															+                ]
														
 
															+                if len(matched) >= rule.get('min_count', 1):
														
 
															+                    available_features.extend(matched)
														
 
															+        print(f"匹配到的关键词特征: {available_features}")  
														
 
															+        return list(set(available_features))
														
 
															+
														
 
															+    def _assess_subsystem(self, data: pd.DataFrame) -> Dict:
														
 
															+        """评估子系统（与源代码逻辑完全一致）"""
														
 
															+        # 数据清洗
														
 
															+        clean_data = data.dropna()
														
 
															+        if len(clean_data) < 20:  # 数据量不足
														
 
															+            return {'health_score': -1, 'weights': {}, 'features': list(data.columns), 'message': 'Insufficient data'}
														
 
															+        
														
 
															+        try:
														
 
															+            # 标准化
														
 
															+            normalized_data = self.mset.CRITIC_prepare(clean_data)
														
 
															+            
														
 
															+            # 计算权重
														
 
															+            weights = self.mset.CRITIC(normalized_data)
														
 
															+            
														
 
															+            # MSET评估
														
 
															+            health_score = self._run_mset_assessment(normalized_data.values, weights.values)
														
 
															+            
														
 
															+            return {
														
 
															+                'health_score': float(health_score),
														
 
															+                'weights': weights.to_dict(),
														
 
															+                'features': list(data.columns)
														
 
															+            }
														
 
															+        except Exception as e:
														
 
															+            return {'health_score': -1, 'weights': {}, 'features': list(data.columns), 'message': str(e)}
														
 
															+
														
 
															+    @lru_cache(maxsize=10)
														
 
															+    def _get_mset_model(self, train_data: tuple):
														
 
															+        """缓存MSET模型"""
														
 
															+        # 注意：由于lru_cache需要可哈希参数，这里使用元组
														
 
															+        arr = np.array(train_data)
														
 
															+        model = self._create_mset_core()
														
 
															+        model.genDLMatrix(arr)
														
 
															+        return model
														
 
															+
														
 
															+    def _run_mset_assessment(self, data: np.ndarray, weights: np.ndarray) -> float:
														
 
															+        """执行MSET评估"""
														
 
															+        # 分割训练集和测试集
														
 
															+        split_idx = len(data) // 2
														
 
															+        train_data = data[:split_idx]
														
 
															+        test_data = data[split_idx:]
														
 
															+        
														
 
															+        # 使用缓存模型
														
 
															+        model = self._get_mset_model(tuple(map(tuple, train_data)))  # 转换为可哈希的元组
														
 
															+        
														
 
															+        # 计算SPRT标志
														
 
															+        flags = model.calcSPRT(test_data, weights)
														
 
															+        return np.mean(flags)
														
 
															+    
														
 
															+    def _get_subsystem_weights(self, subsystems: List[str]) -> np.ndarray:
														
 
															+        """生成等权重的子系统权重向量"""
														
 
															+        n = len(subsystems)
														
 
															+        if n == 0:
														
 
															+            return np.array([])
														
 
															+        
														
 
															+        # 直接返回等权重向量
														
 
															+        return np.ones(n) / n
														
 
															+    # def _get_subsystem_weights(self, subsystems: List[str]) -> np.ndarray:
														
 
															+    #     """动态生成子系统权重矩阵"""
														
 
															+    #     n = len(subsystems)
														
 
															+    #     if n == 0:
														
 
															+    #         return np.array([])
														
 
															+        
														
 
															+    #     # 定义子系统优先级
														
 
															+    #     priority_order = ['generator', 'drive_train', 'nacelle', 'grid']
														
 
															+    #     priority = {sys: idx for idx, sys in enumerate(priority_order) if sys in subsystems}
														
 
															+        
														
 
															+    #     # 构建比较矩阵
														
 
															+    #     matrix = np.ones((n, n))
														
 
															+    #     for i in range(n):
														
 
															+    #         for j in range(n):
														
 
															+    #             if subsystems[i] in priority and subsystems[j] in priority:
														
 
															+    #                 if priority[subsystems[i]] < priority[subsystems[j]]:
														
 
															+    #                     matrix[i,j] = 3
														
 
															+    #                 elif priority[subsystems[i]] > priority[subsystems[j]]:
														
 
															+    #                     matrix[i,j] = 1/3
														
 
															+        
														
 
															+    #     # AHP计算权重
														
 
															+    #     weights, _ = self.mset.ahp(matrix)
														
 
															+    #     return weights