MSET_Temp.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. import math
  2. import os
  3. import joblib
  4. import numpy as np
  5. import pandas as pd
  6. from sqlalchemy import text
  7. from sklearn.neighbors import BallTree
  8. from app.config import dataBase
  9. from app.database import get_engine
  10. class MSET_Temp:
  11. """
  12. MSET + SPRT 温度分析类:
  13. - 离线训练:genDLMatrix → save_model
  14. - 在线推理:load_model → predict_SPRT
  15. """
  16. def __init__(self,
  17. windCode: str,
  18. windTurbineNumberList: list[str],
  19. startTime: str,
  20. endTime: str):
  21. self.windCode = windCode.strip()
  22. self.windTurbineNumberList = windTurbineNumberList or []
  23. self.startTime = startTime
  24. self.endTime = endTime
  25. # 离线训练/加载后赋值
  26. self.matrixD = None
  27. self.healthyResidual = None
  28. self.normalDataBallTree = None
  29. # SPRT 参数(离线训练时设置)
  30. self.feature_weight: np.ndarray | None = None
  31. self.alpha: float = 0.1
  32. self.beta: float = 0.1
  33. def _get_data_by_filter(self) -> pd.DataFrame:
  34. """
  35. 在线推理专用:根据 self.windTurbineNumberList & 时间拉数据;
  36. 如果列表为空,则拉全场数据。
  37. """
  38. # 特殊风场表名映射
  39. special_wind_farms = {
  40. "WOF093400005": f"`{self.windCode}-WOB000001_minute`" # 加上反引号
  41. }
  42. # 根据风场编号获取表名,特殊风场用反引号,其他风场不加反引号
  43. table = special_wind_farms.get(self.windCode, f"{self.windCode}_minute")
  44. engine = get_engine(dataBase.DATA_DB)
  45. if self.windTurbineNumberList:
  46. turbines = ",".join(f"'{t}'" for t in self.windTurbineNumberList)
  47. cond = f"wind_turbine_number IN ({turbines}) AND time_stamp BETWEEN :start AND :end"
  48. else:
  49. cond = "time_stamp BETWEEN :start AND :end"
  50. sql = text(f""" SELECT * FROM {table} WHERE {cond} ORDER BY time_stamp ASC """)
  51. return pd.read_sql(sql, engine, params={"start": self.startTime, "end": self.endTime})
  52. def calcSimilarity(self, x: np.ndarray, y: np.ndarray, m: str = 'euc') -> float:
  53. if len(x) != len(y):
  54. return 0.0
  55. if m == 'cbd':
  56. return float(np.mean([1.0 / (1.0 + abs(p - q)) for p, q in zip(x, y)]))
  57. diffsq = np.sum((x - y) ** 2)
  58. return float(1.0 / (1.0 + math.sqrt(diffsq)))
  59. def genDLMatrix(self, trainDataset: np.ndarray, dataSize4D=100, dataSize4L=50) -> int:
  60. """
  61. 离线训练:构造 matrixD/matrixL/healthyResidual/BallTree
  62. """
  63. m, n = trainDataset.shape
  64. if m < dataSize4D + dataSize4L:
  65. return -1
  66. # Step1:每维最小/最大入 D
  67. D_idx, D = [], []
  68. for i in range(n):
  69. col = trainDataset[:, i]
  70. for idx in (np.argmin(col), np.argmax(col)):
  71. D.append(trainDataset[idx].tolist())
  72. D_idx.append(idx)
  73. # Step2:挑样本至 dataSize4D
  74. while len(D_idx) < dataSize4D:
  75. free = list(set(range(m)) - set(D_idx))
  76. scores = [(np.mean([1 - self.calcSimilarity(trainDataset[i], d) for d in D]), i)
  77. for i in free]
  78. _, pick = max(scores)
  79. D.append(trainDataset[pick].tolist())
  80. D_idx.append(pick)
  81. self.matrixD = np.array(D)
  82. # BallTree + healthyResidual
  83. self.normalDataBallTree = BallTree(
  84. self.matrixD,
  85. leaf_size=4,
  86. metric=lambda a, b: 1.0 - self.calcSimilarity(a, b)
  87. )
  88. # healthyResidual
  89. ests = []
  90. for x in trainDataset:
  91. dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
  92. w = 1.0 / (dist[0] + 1e-1)
  93. w /= w.sum()
  94. ests.append(np.sum([wi * self.matrixD[j] for wi, j in zip(w, idxs[0])], axis=0))
  95. self.healthyResidual = np.array(ests) - trainDataset
  96. return 0
  97. def calcSPRT(self,
  98. newsStates: np.ndarray,
  99. feature_weight: np.ndarray,
  100. alpha: float = 0.1,
  101. beta: float = 0.1,
  102. decisionGroup: int = 5) -> list[float]:
  103. """
  104. Wald-SPRT 得分
  105. """
  106. # 新状态残差
  107. ests = []
  108. for x in newsStates:
  109. dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
  110. w = 1.0 / (dist[0] + 1e-1);
  111. w /= w.sum()
  112. ests.append(np.sum([wi * self.matrixD[j] for wi, j in zip(w, idxs[0])], axis=0))
  113. resN = np.array(ests) - newsStates
  114. # 加权
  115. wN = [np.dot(r, feature_weight) for r in resN]
  116. wH = [np.dot(r, feature_weight) for r in self.healthyResidual]
  117. mu0, sigma0 = np.mean(wH), np.std(wH)
  118. low = math.log(beta / (1 - alpha));
  119. high = math.log((1 - beta) / alpha)
  120. flags = []
  121. for i in range(len(wN) - decisionGroup + 1):
  122. seg = wN[i:i + decisionGroup];
  123. mu1 = np.mean(seg)
  124. si = (sum(seg) * (mu1 - mu0) / sigma0 ** 2
  125. - decisionGroup * ((mu1 ** 2 - mu0 ** 2) / (2 * sigma0 ** 2)))
  126. si = max(min(si, high), low)
  127. flags.append(si / high if si > 0 else si / low)
  128. return flags
  129. def predict_SPRT(self,
  130. newsStates: np.ndarray,
  131. decisionGroup: int = 5) -> list[float]:
  132. """
  133. 在线推理:用离线保存的 matrixD/healthyResidual/feature_weight/alpha/beta
  134. """
  135. return self.calcSPRT(
  136. newsStates,
  137. self.feature_weight,
  138. alpha=self.alpha,
  139. beta=self.beta,
  140. decisionGroup=decisionGroup
  141. )
  142. def save_model(self, path: str):
  143. """
  144. Save matrixD, healthyResidual, feature_weight, alpha, beta
  145. """
  146. os.makedirs(os.path.dirname(path), exist_ok=True)
  147. joblib.dump({
  148. 'matrixD': self.matrixD,
  149. 'healthyResidual': self.healthyResidual,
  150. 'feature_weight': self.feature_weight,
  151. 'alpha': self.alpha,
  152. 'beta': self.beta,
  153. }, path)
  154. @classmethod
  155. def load_model(cls, path: str) -> 'MSET_Temp':
  156. """
  157. Load + rebuild BallTree
  158. """
  159. data = joblib.load(path)
  160. inst = cls('', [], '', '')
  161. inst.matrixD = data['matrixD']
  162. inst.healthyResidual = data['healthyResidual']
  163. inst.feature_weight = data['feature_weight']
  164. inst.alpha = data['alpha']
  165. inst.beta = data['beta']
  166. inst.normalDataBallTree = BallTree(
  167. inst.matrixD,
  168. leaf_size=4,
  169. metric=lambda a, b: 1.0 - inst.calcSimilarity(a, b)
  170. )
  171. return inst