mset_service.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.neighbors import BallTree
  4. from pathlib import Path
  5. from typing import Tuple, Dict, List
  6. class MSETService:
  7. def __init__(self):
  8. self.matrixD = None
  9. self.matrixL = None
  10. self.healthyResidual = None
  11. self.normalDataBallTree = None
  12. self.DDSimilarity = None
  13. self.project_root = Path(__file__).resolve().parent.parent.parent
  14. @staticmethod
  15. def generate_test_data(num_samples: int = 1000) -> Dict[str, pd.DataFrame]:
  16. """生成测试数据"""
  17. np.random.seed(42)
  18. gen_data = np.random.normal(80, 5, (num_samples, 4))
  19. nacelle_data = np.random.normal(0.5, 0.1, (num_samples, 4))
  20. converter_data = np.column_stack([
  21. np.random.normal(40, 5, num_samples),
  22. np.random.normal(2000, 300, num_samples)
  23. ])
  24. grid_data = np.column_stack([
  25. np.random.normal(500, 100, num_samples),
  26. np.random.normal(2000, 300, num_samples),
  27. np.random.normal(1000, 200, (num_samples, 3))
  28. ])
  29. return {
  30. 'generator': pd.DataFrame(gen_data, columns=['U_temp', 'V_temp', 'W_temp', 'bearing_temp']),
  31. 'nacelle': pd.DataFrame(nacelle_data, columns=['vibration_front', 'vibration_side', 'position', 'temperature']),
  32. 'converter': pd.DataFrame(converter_data, columns=['coolant_temp', 'active_power']),
  33. 'grid': pd.DataFrame(grid_data, columns=['reactive_power', 'active_power', 'current_A', 'current_B', 'current_C'])
  34. }
  35. def calc_similarity(self, x: np.ndarray, y: np.ndarray, method: str = 'euc') -> float:
  36. if len(x) != len(y):
  37. return 0.0
  38. if method == 'cbd':
  39. return np.mean([1 / (1 + np.abs(p - q)) for p, q in zip(x, y)])
  40. else:
  41. return 1 / (1 + np.sqrt(np.sum((p - q)**2 for p, q in zip(x, y))))
  42. def train_model(self, train_data: np.ndarray, dataSize4D: int = 100, dataSize4L: int = 50) -> int:
  43. m, n = train_data.shape
  44. if m < dataSize4D + dataSize4L:
  45. print('训练数据集太小,无法生成矩阵D和L')
  46. return -1
  47. self.matrixD = []
  48. selectIndex4D = []
  49. for i in range(n):
  50. feature_i = train_data[:, i]
  51. minIndex = np.argmin(feature_i)
  52. maxIndex = np.argmax(feature_i)
  53. self.matrixD.extend([train_data[minIndex], train_data[maxIndex]])
  54. selectIndex4D.extend([minIndex, maxIndex])
  55. while len(selectIndex4D) < dataSize4D:
  56. freeStateList = list(set(range(len(train_data))) - set(selectIndex4D))
  57. if not freeStateList: break
  58. distList = [np.mean([1 - self.calc_similarity(train_data[i], x) for x in self.matrixD])
  59. for i in freeStateList]
  60. selectId = freeStateList[np.argmax(distList)]
  61. self.matrixD.append(train_data[selectId])
  62. selectIndex4D.append(selectId)
  63. self.matrixD = np.array(self.matrixD[:dataSize4D])
  64. self.matrixL = train_data
  65. self.normalDataBallTree = BallTree(
  66. self.matrixD, leaf_size=4,
  67. metric=lambda i,j: 1 - self.calc_similarity(i,j)
  68. )
  69. lamdaRatio = 1e-3
  70. m_d = len(self.matrixD)
  71. self.DDSimilarity = np.array([
  72. [1 - self.calc_similarity(x, y) for x in self.matrixD] for y in self.matrixD
  73. ]) + lamdaRatio * np.eye(m_d)
  74. self.DDSimilarity = np.linalg.inv(self.DDSimilarity)
  75. self.healthyResidual = self._calc_residuals(self.matrixL)
  76. return 0
  77. def _calc_residuals(self, states: np.ndarray) -> np.ndarray:
  78. m, n = states.shape
  79. est_X = []
  80. for x in states:
  81. dist, iList = self.normalDataBallTree.query([x], 20, return_distance=True)
  82. weight = 1 / (dist[0] + 1e-1)
  83. weight = weight / np.sum(weight)
  84. eState = np.sum([w * self.matrixD[i] for w, i in zip(weight, iList[0])], axis=0)
  85. est_X.append(eState)
  86. return np.array(est_X) - states
  87. def _critic_prepare(self, data: pd.DataFrame, flag: int = 1) -> pd.DataFrame:
  88. data_columns = data.columns.values
  89. maxnum = np.max(data, axis=0)
  90. minnum = np.min(data, axis=0)
  91. Y = (data - minnum) / (maxnum - minnum) if flag == 0 else (maxnum - data) / (maxnum - minnum)
  92. Y0 = Y.values # 转换为NumPy数组
  93. Y0[np.where(Y0 == 0)] = 0.00001
  94. return pd.DataFrame(Y0, columns=data_columns)
  95. def _critic(self, data: pd.DataFrame) -> np.ndarray:
  96. """确保返回NumPy数组"""
  97. n, m = data.shape
  98. s = np.std(data, axis=0)
  99. r = np.corrcoef(data, rowvar=False)
  100. a = np.sum(1 - r, axis=1)
  101. c = s * a
  102. weights = c / np.sum(c)
  103. return weights # 直接返回NumPy数组
  104. def evaluate_subsystem_health(self, data: pd.DataFrame) -> Tuple[np.ndarray, float]:
  105. data = data.apply(pd.to_numeric, errors='coerce').dropna()
  106. if data.empty: return np.array([]), 0.0
  107. W_prepare_data = self._critic_prepare(data)
  108. weights = self._critic(W_prepare_data) # 获取权重
  109. # 关键修改:显式转换为NumPy数组
  110. weights = np.array(weights)
  111. data_values = data.values
  112. m, n = data_values.shape
  113. flag_Spart_data = []
  114. for i in range(n):
  115. data_i = data_values[:, i].reshape(-1, 1)
  116. train_data = data_i[:len(data_i)//2]
  117. test_data = data_i[len(data_i)//2+1:]
  118. if len(train_data) < 10 or len(test_data) < 5:
  119. continue
  120. self.train_model(train_data, 60, 5)
  121. feature_weight = np.array([1.0])
  122. flag_data = self._sprt(test_data, feature_weight, decisionGroup=5)
  123. flag_Spart_data.append(flag_data)
  124. if not flag_Spart_data: return weights, 0.0
  125. flag_Spart_data = np.array(flag_Spart_data).T
  126. weights = weights.reshape(-1, 1) # 现在可以安全调用reshape
  127. score = np.dot(flag_Spart_data, weights).mean()
  128. return weights.flatten(), float(score)
  129. def _sprt(self, newStates: np.ndarray, feature_weight: np.ndarray,
  130. alpha: float = 0.1, beta: float = 0.1, decisionGroup: int = 5) -> List[float]:
  131. feature_weight = feature_weight.flatten()
  132. stateResidual = self._calc_residuals(newStates)
  133. if stateResidual.shape[1] != len(feature_weight):
  134. feature_weight = np.array([1.0])
  135. weightedStateResidual = [np.dot(x, feature_weight) for x in stateResidual]
  136. weightedHealthyResidual = [np.dot(x, feature_weight) for x in self.healthyResidual]
  137. mu0 = np.mean(weightedHealthyResidual)
  138. sigma0 = np.std(weightedHealthyResidual)
  139. if sigma0 < 1e-10: sigma0 = 1e-10
  140. lowThres = np.log(beta / (1 - alpha))
  141. highThres = np.log((1 - beta) / alpha)
  142. flag = []
  143. for i in range(len(newStates) - decisionGroup + 1):
  144. mu1 = np.mean(weightedStateResidual[i:i+decisionGroup])
  145. si = np.sum(weightedStateResidual[i:i+decisionGroup]) * (mu1 - mu0) / sigma0**2 - \
  146. decisionGroup * (mu1**2 - mu0**2) / (2 * sigma0**2)
  147. si = np.clip(si, lowThres, highThres)
  148. si = 100 - (si / highThres if si > 0 else si / lowThres) * 100
  149. flag.append(si)
  150. return flag
  151. def evaluate_turbine_health(self, subsystems_data: Dict[str, pd.DataFrame]) -> Tuple[float, Dict[str, float]]:
  152. subsystems = {
  153. 'generator': [10, 11, 12, 17],
  154. 'nacelle': [23, 24, 41, 42],
  155. 'converter': [18, 19],
  156. 'grid': [32, 38, 64, 65, 66]
  157. }
  158. matrix_subsys = np.array([
  159. [1, 2, 3, 4],
  160. [1/2, 1, 2, 3],
  161. [1/3, 1/2, 1, 2],
  162. [1/4, 1/3, 1/2, 1],
  163. ])
  164. health_scores = {}
  165. for name, cols in subsystems.items():
  166. if name in subsystems_data:
  167. data = subsystems_data[name]
  168. w, score = self.evaluate_subsystem_health(data)
  169. health_scores[name] = score
  170. subsystem_names = list(health_scores.keys())
  171. if not subsystem_names: return 0.0, {}
  172. m = len(subsystem_names)
  173. ahp_matrix = matrix_subsys[:m, :m]
  174. eigenvalue, eigenvector = np.linalg.eig(ahp_matrix)
  175. max_idx = np.argmax(eigenvalue)
  176. subsystem_weights = eigenvector[:, max_idx].real
  177. subsystem_weights = subsystem_weights / np.sum(subsystem_weights)
  178. overall_score = np.sum([
  179. health_scores[name] * subsystem_weights[i]
  180. for i, name in enumerate(subsystem_names)
  181. ])
  182. return float(overall_score), health_scores