Temp_Diag.PY 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. # Temp_Diag.py
  2. import numpy as np
  3. import pandas as pd
  4. from sklearn.neighbors import BallTree
  5. from sqlalchemy import create_engine, text
  6. import math, joblib, os
  7. class MSET_Temp:
  8. """
  9. MSET + SPRT 温度趋势/阈值分析类。
  10. - 离线:用全场数据训练 genDLMatrix → save_model
  11. - 在线:根据前端传来的 windTurbineNumberList & 时间区间,_get_data_by_filter 拿数据 → predict_SPRT
  12. """
  13. def __init__(self,
  14. windCode: str,
  15. windTurbineNumberList: list[str],
  16. startTime: str,
  17. endTime: str):
  18. self.windCode = windCode.strip()
  19. self.windTurbineNumberList = windTurbineNumberList or []
  20. self.startTime = startTime
  21. self.endTime = endTime
  22. # 离线或加载后会赋值
  23. self.matrixD = None
  24. self.matrixL = None
  25. self.healthyResidual = None
  26. self.normalDataBallTree = None
  27. # SPRT 参数(离线训练时赋值)
  28. self.feature_weight: np.ndarray | None = None
  29. self.alpha: float = 0.1
  30. self.beta: float = 0.1
  31. def _get_data_by_filter(self) -> pd.DataFrame:
  32. """
  33. 在线推理用:按前端给的风机列表 & 时间范围拉数据,
  34. 如果风机列表为空,则只按时间拉全场数据。
  35. """
  36. table = f"{self.windCode}_minute"
  37. engine = create_engine(
  38. "mysql+pymysql://root:admin123456@106.120.102.238:10336/energy_data_prod"
  39. )
  40. if self.windTurbineNumberList:
  41. turbines = ",".join(f"'{wt.strip()}'" for wt in self.windTurbineNumberList)
  42. where = f"wind_turbine_number IN ({turbines}) AND time_stamp BETWEEN :start AND :end"
  43. else:
  44. where = "time_stamp BETWEEN :start AND :end"
  45. sql = text(f"""
  46. SELECT *
  47. FROM {table}
  48. WHERE {where}
  49. ORDER BY time_stamp ASC
  50. """)
  51. df = pd.read_sql(sql, engine,
  52. params={"start": self.startTime, "end": self.endTime})
  53. return df
  54. def calcSimilarity(self, x: np.ndarray, y: np.ndarray, m: str = 'euc') -> float:
  55. if len(x) != len(y):
  56. return 0.0
  57. if m == 'cbd':
  58. return float(np.mean([1.0/(1.0+abs(p-q)) for p,q in zip(x,y)]))
  59. diffsq = np.sum((x-y)**2)
  60. return float(1.0/(1.0+math.sqrt(diffsq)))
  61. def genDLMatrix(self, trainDataset: np.ndarray,
  62. dataSize4D=100, dataSize4L=50) -> int:
  63. """
  64. 离线训练用:构造 matrixD、matrixL、健康残差、BallTree
  65. """
  66. m, n = trainDataset.shape
  67. if m < dataSize4D + dataSize4L:
  68. return -1
  69. # Step1: 每维最小/最大入 D
  70. D_idx = []
  71. D = []
  72. for i in range(n):
  73. col = trainDataset[:, i]
  74. imin, imax = np.argmin(col), np.argmax(col)
  75. for idx in (imin, imax):
  76. D.append(trainDataset[idx].tolist())
  77. D_idx.append(idx)
  78. # Step2: 迭代挑偏样本到 dataSize4D
  79. while len(D_idx) < dataSize4D:
  80. free = list(set(range(m)) - set(D_idx))
  81. scores = []
  82. for idx in free:
  83. dists = [1-self.calcSimilarity(trainDataset[idx], d) for d in D]
  84. scores.append((np.mean(dists), idx))
  85. _, pick = max(scores)
  86. D.append(trainDataset[pick].tolist()); D_idx.append(pick)
  87. self.matrixD = np.array(D)
  88. # BallTree + matrixL + healthyResidual
  89. self.normalDataBallTree = BallTree(
  90. self.matrixD,
  91. leaf_size=4,
  92. metric=lambda a, b: 1.0 - self.calcSimilarity(a, b)
  93. )
  94. self.matrixL = trainDataset.copy()
  95. self.healthyResidual = self._calcResidual(self.matrixL)
  96. return 0
  97. def _calcResidual(self, states: np.ndarray) -> np.ndarray:
  98. ests = []
  99. for x in states:
  100. dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
  101. w = 1.0/(dist[0]+1e-1)
  102. w = w/ w.sum()
  103. ests.append(np.sum([wi*self.matrixD[j] for wi,j in zip(w, idxs[0])],axis=0))
  104. est = np.array(ests).reshape(len(ests), -1)
  105. return est - states
  106. def calcSPRT(self,
  107. newsStates: np.ndarray,
  108. feature_weight: np.ndarray,
  109. alpha: float = 0.1,
  110. beta: float = 0.1,
  111. decisionGroup: int = 5) -> list[float]:
  112. # 1) 残差+加权
  113. resN = self._calcResidual(newsStates)
  114. wN = [np.dot(r, feature_weight) for r in resN]
  115. wH = [np.dot(r, feature_weight) for r in self.healthyResidual]
  116. mu0, sigma0 = np.mean(wH), np.std(wH)
  117. low = math.log(beta/(1-alpha))
  118. high = math.log((1-beta)/alpha)
  119. flags = []
  120. for i in range(len(wN)-decisionGroup+1):
  121. seg = wN[i:i+decisionGroup]; mu1 = np.mean(seg)
  122. si = (sum(seg)*(mu1-mu0)/sigma0**2
  123. - decisionGroup*((mu1**2-mu0**2)/(2*sigma0**2)))
  124. si = max(min(si, high), low)
  125. flags.append(si/high if si>0 else si/low)
  126. return flags
  127. def predict_SPRT(self,
  128. newsStates: np.ndarray,
  129. decisionGroup: int = 5) -> list[float]:
  130. """在线推理:用已加载的 matrixD、healthyResidual、feature_weight、alpha、beta"""
  131. return self.calcSPRT(
  132. newsStates,
  133. self.feature_weight,
  134. alpha=self.alpha,
  135. beta=self.beta,
  136. decisionGroup=decisionGroup
  137. )
  138. def save_model(self, path: str):
  139. """离线训练后持久化:matrixD, healthyResidual, feature_weight, alpha, beta"""
  140. os.makedirs(os.path.dirname(path), exist_ok=True)
  141. joblib.dump({
  142. 'matrixD': self.matrixD,
  143. 'healthyResidual': self.healthyResidual,
  144. 'feature_weight': self.feature_weight,
  145. 'alpha': self.alpha,
  146. 'beta': self.beta,
  147. }, path)
  148. @classmethod
  149. def load_model(cls, path: str) -> 'MSET_Temp':
  150. """在线启动时反序列化并重建 BallTree"""
  151. data = joblib.load(path)
  152. inst = cls('', [], '', '')
  153. inst.matrixD = data['matrixD']
  154. inst.healthyResidual = data['healthyResidual']
  155. inst.feature_weight = data['feature_weight']
  156. inst.alpha = data['alpha']
  157. inst.beta = data['beta']
  158. inst.normalDataBallTree = BallTree(
  159. inst.matrixD,
  160. leaf_size=4,
  161. metric=lambda a, b: 1.0 - inst.calcSimilarity(a, b)
  162. )
  163. return inst