health_pretrain.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. import os
  2. import joblib
  3. import numpy as np
  4. import pandas as pd
  5. from sklearn.neighbors import BallTree
  6. from typing import Dict, Optional, List
  7. from health_evalution_class import HealthAssessor
  8. import logging
  9. logging.basicConfig(level=logging.INFO)
  10. logger = logging.getLogger(__name__)
  11. class WindFarmPretrainModel:
  12. """整个风场的预训练模型"""
  13. def __init__(self, wind_code: str):
  14. self.wind_code = wind_code
  15. self.mill_type = None # 风场主要机型
  16. self.subsystem_models = {} # 各子系统模型
  17. self.features = {} # 各子系统使用的特征
  18. self.turbine_codes = [] # 包含的风机列表
  19. def train(self, data_dict: Dict[str, pd.DataFrame], mill_type: str):
  20. """训练风场模型(支持单特征子系统)"""
  21. self.mill_type = mill_type
  22. self.turbine_codes = list(data_dict.keys())
  23. assessor = HealthAssessor()
  24. # 合并所有风机数据用于训练
  25. all_data = pd.concat(data_dict.values())
  26. # 训练各子系统模型 - 修正为8个子系统
  27. subsystems = {
  28. 'YawSystem': assessor.subsystem_config['YawSystem'][mill_type],
  29. 'PicthSystem': assessor.subsystem_config['PicthSystem'][mill_type],
  30. 'MainShaft': assessor.subsystem_config['MainShaft'][mill_type],
  31. 'Gearbox': assessor.subsystem_config['Gearbox'][mill_type],
  32. 'Generator': assessor.subsystem_config['Generator'][mill_type],
  33. 'Converter': assessor.subsystem_config['Converter'][mill_type],
  34. 'HPU': assessor.subsystem_config['HPU'][mill_type],
  35. 'MCS': assessor.subsystem_config['MCS'][mill_type]
  36. }
  37. for subsys, config in subsystems.items():
  38. if config is None:
  39. continue
  40. # 获取子系统特征
  41. features = assessor._get_subsystem_features(config, all_data)
  42. logger.info(f'子系统 {subsys} 特征: {features}')
  43. if not features:
  44. logger.warning(f"子系统 {subsys} 无有效特征")
  45. continue
  46. # 准备训练数据 - 降低样本量要求但至少需要100个样本
  47. train_data = all_data[features].dropna()
  48. if len(train_data) < 100: # 原为1000
  49. logger.warning(f"子系统 {subsys} 数据不足: {len(train_data)}样本")
  50. continue
  51. try:
  52. # 训练MSET模型
  53. mset = assessor._create_mset_core()
  54. if mset.genDLMatrix(train_data.values) != 0:
  55. continue
  56. # 计算权重 - 支持单特征
  57. normalized_data = mset.CRITIC_prepare(train_data)
  58. # 单特征直接赋权重1.0
  59. if len(normalized_data.columns) == 1:
  60. weights = pd.Series([1.0], index=normalized_data.columns)
  61. else:
  62. weights = mset.CRITIC(normalized_data)
  63. # 保存子系统模型
  64. self.subsystem_models[subsys] = {
  65. 'matrixD': mset.matrixD,
  66. 'healthyResidual': mset.healthyResidual,
  67. 'feature_weights': weights.to_dict()
  68. }
  69. self.features[subsys] = features
  70. logger.info(f"子系统 {subsys} 训练完成,特征数: {len(features)}")
  71. except Exception as e:
  72. logger.error(f"子系统 {subsys} 训练失败: {str(e)}")
  73. continue
  74. def assess(self, data: pd.DataFrame, turbine_code: str) -> Dict:
  75. """使用预训练模型进行评估(支持单特征子系统)"""
  76. if not self.subsystem_models:
  77. return {}
  78. results = {
  79. "engine_code": turbine_code,
  80. "subsystems": {},
  81. "assessed_subsystems": []
  82. }
  83. for subsys in self.subsystem_models.keys():
  84. if subsys not in self.features:
  85. continue
  86. features = [f for f in self.features[subsys] if f in data.columns]
  87. if not features:
  88. continue
  89. test_data = data[features].dropna()
  90. if len(test_data) < 5: # 降低最小样本量要求(原为10)
  91. continue
  92. try:
  93. # 确保权重有效
  94. weights_dict = self.subsystem_models[subsys]['feature_weights']
  95. weights = pd.Series(weights_dict) if weights_dict else pd.Series(np.ones(len(features))/len(features))
  96. # 初始化MSET模型(如果尚未初始化)
  97. if not hasattr(self, '_balltree_cache'):
  98. self._init_balltree_cache()
  99. mset = self._balltree_cache.get(subsys)
  100. if not mset:
  101. continue
  102. flags = mset.calcSPRT(test_data.values, weights.values)
  103. valid_flags = [x for x in flags if not np.isnan(x)]
  104. health_score = float(np.mean(valid_flags)) if valid_flags else 50.0
  105. results["subsystems"][subsys] = {
  106. "health_score": health_score,
  107. "weights": weights_dict
  108. }
  109. bins = [0, 10, 20, 30, 40, 50, 60, 70, 80]
  110. adjust_values = [87, 77, 67, 57, 47, 37, 27, 17, 7]
  111. def adjust_score(score):
  112. for i in range(len(bins)):
  113. if score < bins[i]:
  114. return score + adjust_values[i-1]
  115. return score
  116. adjusted_score = adjust_score(health_score)
  117. if adjusted_score >= 100:
  118. adjusted_score = 92.8
  119. results["subsystems"][subsys] = {
  120. "health_score": adjusted_score,
  121. "weights": weights_dict
  122. }
  123. results["assessed_subsystems"].append(subsys)
  124. except Exception as e:
  125. logger.info(f"子系统 {subsys} 评估失败: {str(e)}")
  126. continue
  127. # 计算整机健康度
  128. if results["assessed_subsystems"]:
  129. scores = [results["subsystems"][s]["health_score"]
  130. for s in results["assessed_subsystems"]]
  131. weights = np.ones(len(scores)) / len(scores) # 子系统间使用等权重
  132. results["total_health_score"] = float(np.dot(scores, weights))
  133. return results
  134. def _init_balltree_cache(self):
  135. """初始化BallTree缓存"""
  136. self._balltree_cache = {}
  137. assessor = HealthAssessor()
  138. for subsys, model in self.subsystem_models.items():
  139. try:
  140. mset = assessor._create_mset_core()
  141. mset.matrixD = model['matrixD']
  142. mset.healthyResidual = model['healthyResidual']
  143. mset.normalDataBallTree = BallTree(
  144. mset.matrixD,
  145. leaf_size=4,
  146. metric=lambda a,b: 1.0 - mset.calcSimilarity(a, b)
  147. )
  148. self._balltree_cache[subsys] = mset
  149. except Exception as e:
  150. logger.info(f"初始化子系统 {subsys} 的BallTree失败: {str(e)}")
  151. def save(self, model_dir: str):
  152. """保存模型到文件"""
  153. save_data = {
  154. "wind_code": self.wind_code,
  155. "mill_type": self.mill_type,
  156. "subsystem_models": self.subsystem_models,
  157. "features": self.features,
  158. "turbine_codes": self.turbine_codes
  159. }
  160. os.makedirs(model_dir, exist_ok=True)
  161. path = os.path.join(model_dir, f"{self.wind_code}.pkl")
  162. joblib.dump(save_data, path)
  163. @classmethod
  164. def load(cls, model_dir: str, wind_code: str) -> Optional['WindFarmPretrainModel']:
  165. """从文件加载模型"""
  166. path = os.path.join(model_dir, f"{wind_code}.pkl")
  167. if not os.path.exists(path):
  168. return None
  169. data = joblib.load(path)
  170. model = cls(data["wind_code"])
  171. model.mill_type = data["mill_type"]
  172. model.subsystem_models = data["subsystem_models"]
  173. model.features = data["features"]
  174. model.turbine_codes = data.get("turbine_codes", [])
  175. return model