health_evalution_class.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.neighbors import BallTree
  4. from typing import Dict, List, Optional, Union
  5. from functools import lru_cache
  6. class HealthAssessor:
  7. def __init__(self):
  8. self.subsystem_config = {
  9. # 发电机
  10. 'generator': {
  11. # 双馈
  12. 'dfig': {
  13. 'fixed': ['generator_winding1_temperature', 'generator_winding2_temperature',
  14. 'generator_winding3_temperature', 'generatordrive_end_bearing_temperature',
  15. 'generatornon_drive_end_bearing_temperature'],
  16. },
  17. # 直驱
  18. 'direct': {
  19. 'fixed': ['generator_winding1_temperature', 'generator_winding2_temperature',
  20. 'generator_winding3_temperature', 'main_bearing_temperature'],
  21. }
  22. },
  23. # 机舱系统
  24. 'nacelle': {
  25. 'fixed': ['front_back_vibration_of_the_cabin', 'side_to_side_vibration_of_the_cabin',
  26. 'cabin_position', 'cabin_temperature'],
  27. },
  28. # 电网环境
  29. 'grid': {
  30. 'fixed': ['reactive_power', 'active_power', 'grid_a_phase_current',
  31. 'grid_b_phase_current', 'grid_c_phase_current'],
  32. },
  33. # 传动系统
  34. 'drive_train': {
  35. 'fixed': ['main_bearing_temperature'],
  36. 'keywords': [
  37. {'include': ['gearbox', 'temperature'], 'exclude': [], 'min_count': 2},
  38. ]
  39. }
  40. }
  41. # 嵌入源代码的MSET实现
  42. self.mset = self._create_mset_core()
  43. def _create_mset_core(self):
  44. """创建MSET核心计算模块"""
  45. class MSETCore:
  46. def __init__(self):
  47. self.matrixD = None
  48. self.normalDataBallTree = None
  49. self.healthyResidual = None
  50. def calcSimilarity(self, x, y):
  51. """相似度计算"""
  52. diff = np.array(x) - np.array(y)
  53. return 1/(1 + np.sqrt(np.sum(diff**2)))
  54. def genDLMatrix(self, trainDataset, dataSize4D=60, dataSize4L=5):
  55. """矩阵生成过程"""
  56. m, n = trainDataset.shape
  57. # 快速选择极值点
  58. min_indices = np.argmin(trainDataset, axis=0)
  59. max_indices = np.argmax(trainDataset, axis=0)
  60. unique_indices = np.unique(np.concatenate([min_indices, max_indices]))
  61. self.matrixD = trainDataset[unique_indices].copy()
  62. # 快速填充剩余点
  63. remaining_indices = np.setdiff1d(np.arange(m), unique_indices)
  64. np.random.shuffle(remaining_indices)
  65. needed = max(0, dataSize4D - len(unique_indices))
  66. if needed > 0:
  67. self.matrixD = np.vstack([self.matrixD, trainDataset[remaining_indices[:needed]]])
  68. # 使用与源代码一致的BallTree参数
  69. self.normalDataBallTree = BallTree(
  70. self.matrixD,
  71. leaf_size=4,
  72. metric=lambda i,j: 1-self.calcSimilarity(i,j) # 自定义相似度
  73. )
  74. # 使用所有数据计算残差
  75. self.healthyResidual = self.calcResidualByLocallyWeightedLR(trainDataset)
  76. return 0
  77. def calcResidualByLocallyWeightedLR(self, newStates):
  78. """残差计算"""
  79. if len(newStates.shape) == 1:
  80. newStates = newStates.reshape(-1, 1)
  81. dist, iList = self.normalDataBallTree.query(
  82. newStates,
  83. k=min(10, len(self.matrixD)),
  84. return_distance=True
  85. )
  86. weights = 1/(dist + 1e-5)
  87. weights /= weights.sum(axis=1)[:, np.newaxis]
  88. est_X = np.sum(weights[:, :, np.newaxis] * self.matrixD[iList[0]], axis=1)
  89. return est_X - newStates
  90. def calcSPRT(self, newsStates, feature_weight, alpha=0.1, beta=0.1, decisionGroup=1):
  91. """SPRT计算"""
  92. try:
  93. stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
  94. weightedStateResidual = np.dot(stateResidual, feature_weight)
  95. weightedHealthyResidual = np.dot(self.healthyResidual, feature_weight)
  96. mu0 = np.mean(weightedHealthyResidual)
  97. sigma0 = np.std(weightedHealthyResidual)
  98. # 处理标准差为零的情况
  99. if sigma0 < 1e-5:
  100. sigma0 = 1.0 # 设为安
  101. # 向量化计算
  102. n = len(newsStates)
  103. if n < decisionGroup:
  104. return [50] # 中性值
  105. rolling_mean = np.convolve(weightedStateResidual, np.ones(decisionGroup)/decisionGroup, 'valid')
  106. si = (rolling_mean - mu0) * (rolling_mean + mu0 - 2*mu0) / (2*sigma0**2)
  107. lowThres = np.log(beta/(1-alpha))
  108. highThres = np.log((1-beta)/alpha)
  109. si = np.clip(si, lowThres, highThres)
  110. si = np.where(si > 0, si/highThres, si/lowThres)
  111. flag = 100 - si*100
  112. # 填充不足的部分
  113. if len(flag) < n:
  114. flag = np.pad(flag, (0, n-len(flag)), mode='edge')
  115. return flag.tolist()
  116. except Exception as e:
  117. print(f"SPRT计算错误: {str(e)}")
  118. return [50] * len(newsStates) # 返回中性值
  119. def CRITIC_prepare(self, data, flag=1):
  120. """标准化处理"""
  121. data = data.astype(float)
  122. numeric_cols = data.select_dtypes(include=[np.number]).columns
  123. # 处理全零或常数列
  124. for col in numeric_cols:
  125. if data[col].nunique() == 1: # 所有值相同
  126. data[col] = 0.5 # 设为中性值
  127. continue
  128. # 负向标准化(温度等指标)
  129. negative_cols = [col for col in numeric_cols if 'temperature' in col]
  130. for col in negative_cols:
  131. col_min = data[col].min()
  132. col_max = data[col].max()
  133. range_val = col_max - col_min
  134. if range_val < 1e-5: # 防止除零
  135. range_val = 1.0
  136. data[col] = (col_max - data[col]) / range_val
  137. # 正向标准化(其他指标)
  138. positive_cols = list(set(numeric_cols) - set(negative_cols))
  139. for col in positive_cols:
  140. col_min = data[col].min()
  141. col_max = data[col].max()
  142. range_val = col_max - col_min
  143. if range_val < 1e-5: # 防止除零
  144. range_val = 1.0
  145. data[col] = (data[col] - col_min) / range_val
  146. return data
  147. def CRITIC(self, data):
  148. """CRITIC权重计算"""
  149. data_norm = self.CRITIC_prepare(data.copy())
  150. std = data_norm.std(ddof=0).clip(lower=0.01)
  151. corr = np.abs(np.corrcoef(data_norm.T))
  152. np.fill_diagonal(corr, 0)
  153. conflict = np.sum(1 - corr, axis=1)
  154. info = std * conflict
  155. weights = info / info.sum()
  156. return pd.Series(weights, index=data.columns)
  157. def ahp(self, matrix):
  158. """AHP权重计算"""
  159. eigenvalue, eigenvector = np.linalg.eig(matrix)
  160. max_idx = np.argmax(eigenvalue)
  161. weight = eigenvector[:, max_idx].real
  162. return weight / weight.sum(), eigenvalue[max_idx].real
  163. return MSETCore()
  164. def assess_turbine(self, engine_code, data, mill_type, wind_turbine_name):
  165. """评估单个风机
  166. """
  167. results = {
  168. "engine_code": engine_code,
  169. "wind_turbine_name": wind_turbine_name,
  170. "mill_type": mill_type,
  171. "total_health_score": None,
  172. "subsystems": {},
  173. "assessed_subsystems": []
  174. }
  175. # 各子系统评估
  176. subsystems_to_assess = [
  177. ('generator', self.subsystem_config['generator'][mill_type], 1),
  178. ('nacelle', self.subsystem_config['nacelle'],1),
  179. ('grid', self.subsystem_config['grid'], 1),
  180. ('drive_train', self.subsystem_config['drive_train'] if mill_type == 'dfig' else None,1)
  181. ]
  182. for subsystem, config, min_features in subsystems_to_assess:
  183. if config is None:
  184. continue
  185. features = self._get_subsystem_features(config, data)
  186. # 功能1:无论特征数量是否足够都输出结果
  187. if len(features) >= min_features:
  188. assessment = self._assess_subsystem(data[features])
  189. else:
  190. assessment = {
  191. 'health_score': -1, # 特征不足时输出'-'
  192. 'weights': {},
  193. 'message': f'Insufficient features (required {min_features}, got {len(features)})'
  194. }
  195. # 功能3:删除features内容
  196. if 'features' in assessment:
  197. del assessment['features']
  198. # 最终清理:确保没有NaN值
  199. for sys, result in results["subsystems"].items():
  200. if isinstance(result['health_score'], float) and np.isnan(result['health_score']):
  201. result['health_score'] = -1
  202. result['message'] = (result.get('message') or '') + '; NaN detected'
  203. if isinstance(results["total_health_score"], float) and np.isnan(results["total_health_score"]):
  204. results["total_health_score"] = -1
  205. results["subsystems"][subsystem] = assessment
  206. # 计算整机健康度(使用新字段名)
  207. if results["subsystems"]:
  208. # 只计算健康值为数字的子系统
  209. valid_subsystems = [
  210. k for k, v in results["subsystems"].items()
  211. if isinstance(v['health_score'], (int, float)) and v['health_score'] >= 0
  212. ]
  213. if valid_subsystems:
  214. weights = self._get_subsystem_weights(valid_subsystems)
  215. health_scores = [results["subsystems"][sys]['health_score'] for sys in valid_subsystems]
  216. results["total_health_score"] = float(np.dot(health_scores, weights))
  217. results["assessed_subsystems"] = valid_subsystems
  218. print(results)
  219. return results
  220. def _get_all_possible_features(self,assessor, mill_type, available_columns):
  221. """
  222. 获取所有可能的特征列(基于实际存在的列)
  223. 参数:
  224. assessor: HealthAssessor实例
  225. mill_type: 机型类型
  226. available_columns: 数据库实际存在的列名列表
  227. """
  228. features = []
  229. available_columns_lower = [col.lower() for col in available_columns] # 不区分大小写匹配
  230. for subsys_name, subsys_config in assessor.subsystem_config.items():
  231. # 处理子系统配置
  232. if subsys_name == 'generator':
  233. config = subsys_config.get(mill_type, {})
  234. elif subsys_name == 'drive_train' and mill_type != 'dfig':
  235. continue
  236. else:
  237. config = subsys_config
  238. # 处理固定特征
  239. if 'fixed' in config:
  240. for f in config['fixed']:
  241. if f in available_columns:
  242. features.append(f)
  243. # 处理关键词特征
  244. if 'keywords' in config:
  245. for rule in config['keywords']:
  246. matched = []
  247. include_kws = [kw.lower() for kw in rule['include']]
  248. exclude_kws = [ex.lower() for ex in rule.get('exclude', [])]
  249. for col in available_columns:
  250. col_lower = col.lower()
  251. # 检查包含关键词
  252. include_ok = all(kw in col_lower for kw in include_kws)
  253. # 检查排除关键词
  254. exclude_ok = not any(ex in col_lower for ex in exclude_kws)
  255. if include_ok and exclude_ok:
  256. matched.append(col)
  257. if len(matched) >= rule.get('min_count', 1):
  258. features.extend(matched)
  259. return list(set(features)) # 去重
  260. def _get_subsystem_features(self, config: Dict, data: pd.DataFrame) -> List[str]:
  261. """最终版特征获取方法"""
  262. available_features = []
  263. # 固定特征检查(要求至少10%非空)
  264. if 'fixed' in config:
  265. for f in config['fixed']:
  266. if f in data.columns and data[f].notna().mean() > 0.1:
  267. available_features.append(f)
  268. # print(f"匹配到的固定特征: {available_features}")
  269. # 关键词特征检查
  270. if 'keywords' in config:
  271. for rule in config['keywords']:
  272. matched = [
  273. col for col in data.columns
  274. if all(kw.lower() in col.lower() for kw in rule['include'])
  275. and not any(ex.lower() in col.lower() for ex in rule.get('exclude', []))
  276. and data[col].notna().mean() > 0.1 # 数据有效性检查
  277. ]
  278. if len(matched) >= rule.get('min_count', 1):
  279. available_features.extend(matched)
  280. # print(f"匹配到的关键词特征: {available_features}")
  281. return list(set(available_features))
  282. def _assess_subsystem(self, data: pd.DataFrame) -> Dict:
  283. """评估子系统"""
  284. # 数据清洗
  285. clean_data = data.dropna()
  286. if len(clean_data) < 20: # 数据量不足
  287. return {'health_score': -1, 'weights': {}, 'features': list(data.columns), 'message': 'Insufficient data'}
  288. try:
  289. # 标准化
  290. normalized_data = self.mset.CRITIC_prepare(clean_data)
  291. # 计算权重
  292. weights = self.mset.CRITIC(normalized_data)
  293. # MSET评估
  294. health_score = self._run_mset_assessment(normalized_data.values, weights.values)
  295. return {
  296. 'health_score': float(health_score),
  297. 'weights': weights.to_dict(),
  298. 'features': list(data.columns)
  299. }
  300. except Exception as e:
  301. return {'health_score': -1, 'weights': {}, 'features': list(data.columns), 'message': str(e)}
  302. @lru_cache(maxsize=10)
  303. def _get_mset_model(self, train_data: tuple):
  304. """缓存MSET模型"""
  305. arr = np.array(train_data)
  306. model = self._create_mset_core()
  307. model.genDLMatrix(arr)
  308. return model
  309. def _run_mset_assessment(self, data: np.ndarray, weights: np.ndarray) -> float:
  310. """执行MSET评估"""
  311. # 检查权重有效性
  312. if np.isnan(weights).any() or np.isinf(weights).any():
  313. weights = np.ones_like(weights) / len(weights) # 重置为等权重
  314. # 分割训练集和测试集
  315. split_idx = len(data) // 2
  316. train_data = data[:split_idx]
  317. test_data = data[split_idx:]
  318. # 使用缓存模型
  319. try:
  320. model = self._get_mset_model(tuple(map(tuple, train_data)))
  321. flags = model.calcSPRT(test_data, weights)
  322. # 过滤NaN值并计算均值
  323. valid_flags = [x for x in flags if not np.isnan(x)]
  324. if not valid_flags:
  325. return 50.0 # 默认中性值
  326. return float(np.mean(valid_flags))
  327. except Exception as e:
  328. print(f"MSET评估失败: {str(e)}")
  329. return 50.0 # 默认中性值
  330. def _get_subsystem_weights(self, subsystems: List[str]) -> np.ndarray:
  331. """生成等权重的子系统权重向量"""
  332. n = len(subsystems)
  333. if n == 0:
  334. return np.array([])
  335. # 直接返回等权重向量
  336. return np.ones(n) / n
  337. # def _get_subsystem_weights(self, subsystems: List[str]) -> np.ndarray:
  338. # """动态生成子系统权重矩阵"""
  339. # n = len(subsystems)
  340. # if n == 0:
  341. # return np.array([])
  342. # # 定义子系统优先级
  343. # priority_order = ['generator', 'drive_train', 'nacelle', 'grid']
  344. # priority = {sys: idx for idx, sys in enumerate(priority_order) if sys in subsystems}
  345. # # 构建比较矩阵
  346. # matrix = np.ones((n, n))
  347. # for i in range(n):
  348. # for j in range(n):
  349. # if subsystems[i] in priority and subsystems[j] in priority:
  350. # if priority[subsystems[i]] < priority[subsystems[j]]:
  351. # matrix[i,j] = 3
  352. # elif priority[subsystems[i]] > priority[subsystems[j]]:
  353. # matrix[i,j] = 1/3
  354. # # AHP计算权重
  355. # weights, _ = self.mset.ahp(matrix)
  356. # return weights