MSET_Temp.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. import math
  2. import os
  3. import joblib
  4. import numpy as np
  5. import pandas as pd
  6. from sqlalchemy import text
  7. from sklearn.neighbors import BallTree
  8. from app.config import dataBase
  9. from app.database import get_engine
  10. class MSET_Temp:
  11. """
  12. MSET + SPRT 温度分析类:
  13. - 离线训练:genDLMatrix → save_model
  14. - 在线推理:load_model → predict_SPRT
  15. """
  16. def __init__(self,
  17. windCode: str,
  18. windTurbineNumberList: list[str],
  19. startTime: str,
  20. endTime: str):
  21. self.windCode = windCode.strip()
  22. self.windTurbineNumberList = windTurbineNumberList or []
  23. self.startTime = startTime
  24. self.endTime = endTime
  25. # 离线训练/加载后赋值
  26. self.matrixD = None
  27. self.healthyResidual = None
  28. self.normalDataBallTree = None
  29. # SPRT 参数(离线训练时设置)
  30. self.feature_weight: np.ndarray | None = None
  31. self.alpha: float = 0.1
  32. self.beta: float = 0.1
  33. def _get_data_by_filter(self) -> pd.DataFrame:
  34. """
  35. 在线推理专用:根据 self.windTurbineNumberList & 时间拉数据;
  36. 如果列表为空,则拉全场数据。
  37. """
  38. table = f"{self.windCode}_minute"
  39. engine = get_engine(dataBase.DATA_DB)
  40. if self.windTurbineNumberList:
  41. turbines = ",".join(f"'{t}'" for t in self.windTurbineNumberList)
  42. cond = f"wind_turbine_number IN ({turbines}) AND time_stamp BETWEEN :start AND :end"
  43. else:
  44. cond = "time_stamp BETWEEN :start AND :end"
  45. sql = text(f""" SELECT * FROM {table} WHERE {cond} ORDER BY time_stamp ASC """)
  46. return pd.read_sql(sql, engine, params={"start": self.startTime, "end": self.endTime})
  47. def calcSimilarity(self, x: np.ndarray, y: np.ndarray, m: str = 'euc') -> float:
  48. if len(x) != len(y):
  49. return 0.0
  50. if m == 'cbd':
  51. return float(np.mean([1.0 / (1.0 + abs(p - q)) for p, q in zip(x, y)]))
  52. diffsq = np.sum((x - y) ** 2)
  53. return float(1.0 / (1.0 + math.sqrt(diffsq)))
  54. def genDLMatrix(self, trainDataset: np.ndarray, dataSize4D=100, dataSize4L=50) -> int:
  55. """
  56. 离线训练:构造 matrixD/matrixL/healthyResidual/BallTree
  57. """
  58. m, n = trainDataset.shape
  59. if m < dataSize4D + dataSize4L:
  60. return -1
  61. # Step1:每维最小/最大入 D
  62. D_idx, D = [], []
  63. for i in range(n):
  64. col = trainDataset[:, i]
  65. for idx in (np.argmin(col), np.argmax(col)):
  66. D.append(trainDataset[idx].tolist())
  67. D_idx.append(idx)
  68. # Step2:挑样本至 dataSize4D
  69. while len(D_idx) < dataSize4D:
  70. free = list(set(range(m)) - set(D_idx))
  71. scores = [(np.mean([1 - self.calcSimilarity(trainDataset[i], d) for d in D]), i)
  72. for i in free]
  73. _, pick = max(scores)
  74. D.append(trainDataset[pick].tolist())
  75. D_idx.append(pick)
  76. self.matrixD = np.array(D)
  77. # BallTree + healthyResidual
  78. self.normalDataBallTree = BallTree(
  79. self.matrixD,
  80. leaf_size=4,
  81. metric=lambda a, b: 1.0 - self.calcSimilarity(a, b)
  82. )
  83. # healthyResidual
  84. ests = []
  85. for x in trainDataset:
  86. dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
  87. w = 1.0 / (dist[0] + 1e-1)
  88. w /= w.sum()
  89. ests.append(np.sum([wi * self.matrixD[j] for wi, j in zip(w, idxs[0])], axis=0))
  90. self.healthyResidual = np.array(ests) - trainDataset
  91. return 0
  92. def calcSPRT(self,
  93. newsStates: np.ndarray,
  94. feature_weight: np.ndarray,
  95. alpha: float = 0.1,
  96. beta: float = 0.1,
  97. decisionGroup: int = 5) -> list[float]:
  98. """
  99. Wald-SPRT 得分
  100. """
  101. # 新状态残差
  102. ests = []
  103. for x in newsStates:
  104. dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
  105. w = 1.0 / (dist[0] + 1e-1);
  106. w /= w.sum()
  107. ests.append(np.sum([wi * self.matrixD[j] for wi, j in zip(w, idxs[0])], axis=0))
  108. resN = np.array(ests) - newsStates
  109. # 加权
  110. wN = [np.dot(r, feature_weight) for r in resN]
  111. wH = [np.dot(r, feature_weight) for r in self.healthyResidual]
  112. mu0, sigma0 = np.mean(wH), np.std(wH)
  113. low = math.log(beta / (1 - alpha));
  114. high = math.log((1 - beta) / alpha)
  115. flags = []
  116. for i in range(len(wN) - decisionGroup + 1):
  117. seg = wN[i:i + decisionGroup];
  118. mu1 = np.mean(seg)
  119. si = (sum(seg) * (mu1 - mu0) / sigma0 ** 2
  120. - decisionGroup * ((mu1 ** 2 - mu0 ** 2) / (2 * sigma0 ** 2)))
  121. si = max(min(si, high), low)
  122. flags.append(si / high if si > 0 else si / low)
  123. return flags
  124. def predict_SPRT(self,
  125. newsStates: np.ndarray,
  126. decisionGroup: int = 5) -> list[float]:
  127. """
  128. 在线推理:用离线保存的 matrixD/healthyResidual/feature_weight/alpha/beta
  129. """
  130. return self.calcSPRT(
  131. newsStates,
  132. self.feature_weight,
  133. alpha=self.alpha,
  134. beta=self.beta,
  135. decisionGroup=decisionGroup
  136. )
  137. def save_model(self, path: str):
  138. """
  139. Save matrixD, healthyResidual, feature_weight, alpha, beta
  140. """
  141. os.makedirs(os.path.dirname(path), exist_ok=True)
  142. joblib.dump({
  143. 'matrixD': self.matrixD,
  144. 'healthyResidual': self.healthyResidual,
  145. 'feature_weight': self.feature_weight,
  146. 'alpha': self.alpha,
  147. 'beta': self.beta,
  148. }, path)
  149. @classmethod
  150. def load_model(cls, path: str) -> 'MSET_Temp':
  151. """
  152. Load + rebuild BallTree
  153. """
  154. data = joblib.load(path)
  155. inst = cls('', [], '', '')
  156. inst.matrixD = data['matrixD']
  157. inst.healthyResidual = data['healthyResidual']
  158. inst.feature_weight = data['feature_weight']
  159. inst.alpha = data['alpha']
  160. inst.beta = data['beta']
  161. inst.normalDataBallTree = BallTree(
  162. inst.matrixD,
  163. leaf_size=4,
  164. metric=lambda a, b: 1.0 - inst.calcSimilarity(a, b)
  165. )
  166. return inst