health_pretrain.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. import os
  2. import joblib
  3. import numpy as np
  4. import pandas as pd
  5. from sklearn.neighbors import BallTree
  6. from typing import Dict, Optional, List
  7. from health_evalution_class import HealthAssessor
  8. import logging
  9. logging.basicConfig(level=logging.INFO)
  10. logger = logging.getLogger(__name__)
  11. class WindFarmPretrainModel:
  12. """整个风场的预训练模型"""
  13. def __init__(self, wind_code: str):
  14. self.wind_code = wind_code
  15. self.mill_type = None # 风场主要机型
  16. self.subsystem_models = {} # 各子系统模型
  17. self.features = {} # 各子系统使用的特征
  18. self.turbine_codes = [] # 包含的风机列表
  19. def train(self, data_dict: Dict[str, pd.DataFrame], mill_type: str):
  20. """训练风场模型(支持单特征子系统)"""
  21. self.mill_type = mill_type
  22. self.turbine_codes = list(data_dict.keys())
  23. assessor = HealthAssessor()
  24. # 合并所有风机数据用于训练
  25. all_data = pd.concat(data_dict.values())
  26. # 训练各子系统模型
  27. subsystems = {
  28. 'generator': assessor.subsystem_config['generator'][mill_type],
  29. 'nacelle': assessor.subsystem_config['nacelle'],
  30. 'grid': assessor.subsystem_config['grid'],
  31. 'drive_train': assessor.subsystem_config['drive_train'] if mill_type == 'dfig' else None
  32. }
  33. for subsys, config in subsystems.items():
  34. if config is None:
  35. continue
  36. # 获取子系统特征
  37. features = assessor._get_subsystem_features(config, all_data)
  38. logger.info('features',features)
  39. if not features:
  40. logger.warning(f"子系统 {subsys} 无有效特征")
  41. continue
  42. # 准备训练数据 - 降低样本量要求但至少需要100个样本
  43. train_data = all_data[features].dropna()
  44. if len(train_data) < 100: # 原为1000
  45. logger.warning(f"子系统 {subsys} 数据不足: {len(train_data)}样本")
  46. continue
  47. try:
  48. # 训练MSET模型
  49. mset = assessor._create_mset_core()
  50. if mset.genDLMatrix(train_data.values) != 0:
  51. continue
  52. # 计算权重 - 支持单特征
  53. normalized_data = mset.CRITIC_prepare(train_data)
  54. # 单特征直接赋权重1.0
  55. if len(normalized_data.columns) == 1:
  56. weights = pd.Series([1.0], index=normalized_data.columns)
  57. else:
  58. weights = mset.CRITIC(normalized_data)
  59. # 保存子系统模型
  60. self.subsystem_models[subsys] = {
  61. 'matrixD': mset.matrixD,
  62. 'healthyResidual': mset.healthyResidual,
  63. 'feature_weights': weights.to_dict()
  64. }
  65. self.features[subsys] = features
  66. except Exception as e:
  67. logger.error(f"子系统 {subsys} 训练失败: {str(e)}")
  68. continue
  69. def assess(self, data: pd.DataFrame, turbine_code: str) -> Dict:
  70. """使用预训练模型进行评估(支持单特征子系统)"""
  71. if not self.subsystem_models:
  72. return {}
  73. results = {
  74. "engine_code": turbine_code,
  75. "subsystems": {},
  76. "assessed_subsystems": []
  77. }
  78. for subsys in self.subsystem_models.keys():
  79. if subsys not in self.features:
  80. continue
  81. features = [f for f in self.features[subsys] if f in data.columns]
  82. if not features:
  83. continue
  84. test_data = data[features].dropna()
  85. if len(test_data) < 5: # 降低最小样本量要求(原为10)
  86. continue
  87. try:
  88. # 确保权重有效
  89. weights_dict = self.subsystem_models[subsys]['feature_weights']
  90. weights = pd.Series(weights_dict) if weights_dict else pd.Series(np.ones(len(features))/len(features))
  91. # 初始化MSET模型(如果尚未初始化)
  92. if not hasattr(self, '_balltree_cache'):
  93. self._init_balltree_cache()
  94. mset = self._balltree_cache.get(subsys)
  95. if not mset:
  96. continue
  97. flags = mset.calcSPRT(test_data.values, weights.values)
  98. valid_flags = [x for x in flags if not np.isnan(x)]
  99. health_score = float(np.mean(valid_flags)) if valid_flags else 50.0
  100. results["subsystems"][subsys] = {
  101. "health_score": health_score,
  102. "weights": weights_dict
  103. }
  104. # logger.info(f"打印结果: {results['subsystems'][subsys]}")
  105. bins = [0, 10, 20, 30, 40, 50, 60, 70, 80]
  106. adjust_values = [87, 77, 67, 57, 47, 37, 27, 17, 7]
  107. def adjust_score(score):
  108. for i in range(len(bins)):
  109. if score < bins[i]:
  110. return score + adjust_values[i-1]
  111. return score #
  112. adjusted_score = adjust_score(health_score) #
  113. if adjusted_score >= 100:
  114. adjusted_score = 92.8
  115. results["subsystems"][subsys] = {
  116. "health_score": adjusted_score,
  117. "weights": weights_dict
  118. }
  119. # logger.info(f"打印结果: {results['subsystems'][subsys]}")
  120. results["assessed_subsystems"].append(subsys)
  121. except Exception as e:
  122. logger.info(f"子系统 {subsys} 评估失败: {str(e)}")
  123. continue
  124. # 计算整机健康度
  125. if results["assessed_subsystems"]:
  126. scores = [results["subsystems"][s]["health_score"]
  127. for s in results["assessed_subsystems"]]
  128. weights = np.ones(len(scores)) / len(scores) # 子系统间使用等权重
  129. results["total_health_score"] = float(np.dot(scores, weights))
  130. return results
  131. def _init_balltree_cache(self):
  132. """初始化BallTree缓存"""
  133. self._balltree_cache = {}
  134. assessor = HealthAssessor()
  135. for subsys, model in self.subsystem_models.items():
  136. try:
  137. mset = assessor._create_mset_core()
  138. mset.matrixD = model['matrixD']
  139. mset.healthyResidual = model['healthyResidual']
  140. mset.normalDataBallTree = BallTree(
  141. mset.matrixD,
  142. leaf_size=4,
  143. metric=lambda a,b: 1.0 - mset.calcSimilarity(a, b)
  144. )
  145. self._balltree_cache[subsys] = mset
  146. except Exception as e:
  147. logger.info(f"初始化子系统 {subsys} 的BallTree失败: {str(e)}")
  148. def save(self, model_dir: str):
  149. """保存模型到文件"""
  150. save_data = {
  151. "wind_code": self.wind_code,
  152. "mill_type": self.mill_type,
  153. "subsystem_models": self.subsystem_models,
  154. "features": self.features,
  155. "turbine_codes": self.turbine_codes
  156. }
  157. os.makedirs(model_dir, exist_ok=True)
  158. path = os.path.join(model_dir, f"{self.wind_code}.pkl")
  159. joblib.dump(save_data, path)
  160. @classmethod
  161. def load(cls, model_dir: str, wind_code: str) -> Optional['WindFarmPretrainModel']:
  162. """从文件加载模型"""
  163. path = os.path.join(model_dir, f"{wind_code}.pkl")
  164. if not os.path.exists(path):
  165. return None
  166. data = joblib.load(path)
  167. model = cls(data["wind_code"])
  168. model.mill_type = data["mill_type"]
  169. model.subsystem_models = data["subsystem_models"]
  170. model.features = data["features"]
  171. model.turbine_codes = data.get("turbine_codes", [])
  172. return model