Temp_Diag.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.neighbors import BallTree
  4. from sqlalchemy import create_engine, text
  5. import math, joblib, os
  6. class MSET_Temp:
  7. """
  8. MSET + SPRT 温度分析类:
  9. - 离线训练:genDLMatrix → save_model
  10. - 在线推理:load_model → predict_SPRT
  11. """
  12. def __init__(self,
  13. windCode: str,
  14. windTurbineNumberList: list[str],
  15. startTime: str,
  16. endTime: str):
  17. self.windCode = windCode.strip()
  18. self.windTurbineNumberList = windTurbineNumberList or []
  19. self.startTime = startTime
  20. self.endTime = endTime
  21. # 离线训练/加载后赋值
  22. self.matrixD = None
  23. self.healthyResidual = None
  24. self.normalDataBallTree = None
  25. # SPRT 参数(离线训练时设置)
  26. self.feature_weight: np.ndarray | None = None
  27. self.alpha: float = 0.1
  28. self.beta: float = 0.1
  29. def _get_data_by_filter(self) -> pd.DataFrame:
  30. """
  31. 在线推理专用:根据 self.windTurbineNumberList & 时间拉数据;
  32. 如果列表为空,则拉全场数据。
  33. """
  34. # # 特殊风场表名映射
  35. # special_wind_farms = {
  36. # "WOF093400005": f"`{self.windCode}-WOB000001_minute`" # 加上反引号
  37. # }
  38. # # 根据风场编号获取表名,特殊风场用反引号,其他风场不加反引号
  39. # table = special_wind_farms.get(self.windCode, f"{self.windCode}_minute")
  40. table = f"{self.windCode}_minute"
  41. engine = create_engine(
  42. #"mysql+pymysql://root:admin123456@106.120.102.238:10336/energy_data_prod"
  43. "mysql+pymysql://root:admin123456@192.168.50.235:30306/energy_data_prod"
  44. )
  45. if self.windTurbineNumberList:
  46. turbines = ",".join(f"'{t}'" for t in self.windTurbineNumberList)
  47. cond = f"wind_turbine_number IN ({turbines}) AND time_stamp BETWEEN :start AND :end"
  48. else:
  49. cond = "time_stamp BETWEEN :start AND :end"
  50. sql = text(f"""
  51. SELECT *
  52. FROM {table}
  53. WHERE {cond}
  54. ORDER BY time_stamp ASC
  55. """)
  56. return pd.read_sql(sql, engine, params={"start": self.startTime, "end": self.endTime})
  57. def calcSimilarity(self, x: np.ndarray, y: np.ndarray, m: str = 'euc') -> float:
  58. if len(x) != len(y):
  59. return 0.0
  60. if m == 'cbd':
  61. return float(np.mean([1.0/(1.0+abs(p-q)) for p,q in zip(x,y)]))
  62. diffsq = np.sum((x-y)**2)
  63. return float(1.0/(1.0+math.sqrt(diffsq)))
  64. def genDLMatrix(self, trainDataset: np.ndarray,
  65. dataSize4D=100, dataSize4L=50) -> int:
  66. """
  67. 离线训练:构造 matrixD/matrixL/healthyResidual/BallTree
  68. """
  69. m, n = trainDataset.shape
  70. if m < dataSize4D + dataSize4L:
  71. return -1
  72. # Step1:每维最小/最大入 D
  73. D_idx, D = [], []
  74. for i in range(n):
  75. col = trainDataset[:, i]
  76. for idx in (np.argmin(col), np.argmax(col)):
  77. D.append(trainDataset[idx].tolist())
  78. D_idx.append(idx)
  79. # Step2:挑样本至 dataSize4D
  80. while len(D_idx) < dataSize4D:
  81. free = list(set(range(m)) - set(D_idx))
  82. scores = [(np.mean([1-self.calcSimilarity(trainDataset[i], d) for d in D]), i)
  83. for i in free]
  84. _, pick = max(scores)
  85. D.append(trainDataset[pick].tolist())
  86. D_idx.append(pick)
  87. self.matrixD = np.array(D)
  88. # BallTree + healthyResidual
  89. self.normalDataBallTree = BallTree(
  90. self.matrixD,
  91. leaf_size=4,
  92. metric=lambda a,b: 1.0 - self.calcSimilarity(a, b)
  93. )
  94. # healthyResidual
  95. ests = []
  96. for x in trainDataset:
  97. dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
  98. w = 1.0/(dist[0]+1e-1)
  99. w /= w.sum()
  100. ests.append(np.sum([wi*self.matrixD[j] for wi,j in zip(w,idxs[0])], axis=0))
  101. self.healthyResidual = np.array(ests) - trainDataset
  102. return 0
  103. def calcSPRT(self,
  104. newsStates: np.ndarray,
  105. feature_weight: np.ndarray,
  106. alpha: float = 0.1,
  107. beta: float = 0.1,
  108. decisionGroup: int = 5) -> list[float]:
  109. """
  110. Wald-SPRT 得分
  111. """
  112. # 新状态残差
  113. ests = []
  114. for x in newsStates:
  115. dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
  116. w = 1.0/(dist[0]+1e-1); w/=w.sum()
  117. ests.append(np.sum([wi*self.matrixD[j] for wi,j in zip(w,idxs[0])], axis=0))
  118. resN = np.array(ests) - newsStates
  119. # 加权
  120. wN = [np.dot(r, feature_weight) for r in resN]
  121. wH = [np.dot(r, feature_weight) for r in self.healthyResidual]
  122. mu0, sigma0 = np.mean(wH), np.std(wH)
  123. low = math.log(beta/(1-alpha)); high = math.log((1-beta)/alpha)
  124. flags = []
  125. for i in range(len(wN)-decisionGroup+1):
  126. seg = wN[i:i+decisionGroup]; mu1=np.mean(seg)
  127. si = (sum(seg)*(mu1-mu0)/sigma0**2
  128. - decisionGroup*((mu1**2-mu0**2)/(2*sigma0**2)))
  129. si = max(min(si, high), low)
  130. flags.append(si/high if si>0 else si/low)
  131. return flags
  132. def predict_SPRT(self,
  133. newsStates: np.ndarray,
  134. decisionGroup: int = 5) -> list[float]:
  135. """
  136. 在线推理:用离线保存的 matrixD/healthyResidual/feature_weight/alpha/beta
  137. """
  138. return self.calcSPRT(
  139. newsStates,
  140. self.feature_weight,
  141. alpha=self.alpha,
  142. beta=self.beta,
  143. decisionGroup=decisionGroup
  144. )
  145. def save_model(self, path: str):
  146. """
  147. Save matrixD, healthyResidual, feature_weight, alpha, beta
  148. """
  149. os.makedirs(os.path.dirname(path), exist_ok=True)
  150. joblib.dump({
  151. 'matrixD': self.matrixD,
  152. 'healthyResidual': self.healthyResidual,
  153. 'feature_weight': self.feature_weight,
  154. 'alpha': self.alpha,
  155. 'beta': self.beta,
  156. }, path)
  157. @classmethod
  158. def load_model(cls, path: str) -> 'MSET_Temp':
  159. """
  160. Load + rebuild BallTree
  161. """
  162. data = joblib.load(path)
  163. inst = cls('', [], '', '')
  164. inst.matrixD = data['matrixD']
  165. inst.healthyResidual = data['healthyResidual']
  166. inst.feature_weight = data['feature_weight']
  167. inst.alpha = data['alpha']
  168. inst.beta = data['beta']
  169. inst.normalDataBallTree = BallTree(
  170. inst.matrixD,
  171. leaf_size=4,
  172. metric=lambda a,b: 1.0 - inst.calcSimilarity(a, b)
  173. )
  174. return inst