Temp_Diag.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.neighbors import BallTree
  4. from sqlalchemy import create_engine, text
  5. import math, joblib, os
  6. from fastapi.responses import JSONResponse
  7. from typing import Dict
  8. class MSET_Temp:
  9. """
  10. MSET + SPRT 温度分析类:
  11. - 离线训练:genDLMatrix → save_model
  12. - 在线推理:load_model → predict_SPRT
  13. """
  14. def __init__(self,
  15. windCode: str,
  16. windTurbineNumberList: list[str],
  17. startTime: str,
  18. endTime: str):
  19. self.windCode = windCode.strip()
  20. self.windTurbineNumberList = windTurbineNumberList or []
  21. self.startTime = startTime
  22. self.endTime = endTime
  23. # 离线训练/加载后赋值
  24. self.matrixD = None
  25. self.healthyResidual = None
  26. self.normalDataBallTree = None
  27. # SPRT 参数(离线训练时设置)
  28. self.feature_weight: np.ndarray | None = None
  29. self.alpha: float = 0.1
  30. self.beta: float = 0.1
  31. def _get_data_by_filter(self) -> pd.DataFrame:
  32. """
  33. 在线推理专用:根据 self.windTurbineNumberList & 时间拉数据;
  34. 如果列表为空,则拉全场数据。
  35. """
  36. table = f"{self.windCode}_minute"
  37. engine = create_engine(
  38. #"mysql+pymysql://root:admin123456@106.120.102.238:10336/energy_data_prod"
  39. "mysql+pymysql://root:admin123456@192.168.50.235:30306/energy_data_prod"
  40. )
  41. if self.windTurbineNumberList:
  42. turbines = ",".join(f"'{t}'" for t in self.windTurbineNumberList)
  43. cond = f"wind_turbine_number IN ({turbines}) AND time_stamp BETWEEN :start AND :end"
  44. else:
  45. cond = "time_stamp BETWEEN :start AND :end"
  46. sql = text(f"""
  47. SELECT *
  48. FROM {table}
  49. WHERE {cond}
  50. ORDER BY time_stamp ASC
  51. """)
  52. return pd.read_sql(sql, engine, params={"start": self.startTime, "end": self.endTime})
  53. def calcSimilarity(self, x: np.ndarray, y: np.ndarray, m: str = 'euc') -> float:
  54. if len(x) != len(y):
  55. return 0.0
  56. if m == 'cbd':
  57. return float(np.mean([1.0/(1.0+abs(p-q)) for p,q in zip(x,y)]))
  58. diffsq = np.sum((x-y)**2)
  59. return float(1.0/(1.0+math.sqrt(diffsq)))
  60. def genDLMatrix(self, trainDataset: np.ndarray,
  61. dataSize4D=100, dataSize4L=50) -> int:
  62. """
  63. 离线训练:构造 matrixD/matrixL/healthyResidual/BallTree
  64. """
  65. m, n = trainDataset.shape
  66. if m < dataSize4D + dataSize4L:
  67. return -1
  68. # Step1:每维最小/最大入 D
  69. D_idx, D = [], []
  70. for i in range(n):
  71. col = trainDataset[:, i]
  72. for idx in (np.argmin(col), np.argmax(col)):
  73. D.append(trainDataset[idx].tolist())
  74. D_idx.append(idx)
  75. # Step2:挑样本至 dataSize4D
  76. while len(D_idx) < dataSize4D:
  77. free = list(set(range(m)) - set(D_idx))
  78. scores = [(np.mean([1-self.calcSimilarity(trainDataset[i], d) for d in D]), i)
  79. for i in free]
  80. _, pick = max(scores)
  81. D.append(trainDataset[pick].tolist())
  82. D_idx.append(pick)
  83. self.matrixD = np.array(D)
  84. # BallTree + healthyResidual
  85. self.normalDataBallTree = BallTree(
  86. self.matrixD,
  87. leaf_size=4,
  88. metric=lambda a,b: 1.0 - self.calcSimilarity(a, b)
  89. )
  90. # healthyResidual
  91. ests = []
  92. for x in trainDataset:
  93. dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
  94. w = 1.0/(dist[0]+1e-1)
  95. w /= w.sum()
  96. ests.append(np.sum([wi*self.matrixD[j] for wi,j in zip(w,idxs[0])], axis=0))
  97. self.healthyResidual = np.array(ests) - trainDataset
  98. return 0
  99. def calcSPRT(self,
  100. newsStates: np.ndarray,
  101. feature_weight: np.ndarray,
  102. alpha: float = 0.1,
  103. beta: float = 0.1,
  104. decisionGroup: int = 5) -> list[float]:
  105. """
  106. Wald-SPRT 得分
  107. """
  108. # 新状态残差
  109. ests = []
  110. for x in newsStates:
  111. dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
  112. w = 1.0/(dist[0]+1e-1); w/=w.sum()
  113. ests.append(np.sum([wi*self.matrixD[j] for wi,j in zip(w,idxs[0])], axis=0))
  114. resN = np.array(ests) - newsStates
  115. # 加权
  116. wN = [np.dot(r, feature_weight) for r in resN]
  117. wH = [np.dot(r, feature_weight) for r in self.healthyResidual]
  118. mu0, sigma0 = np.mean(wH), np.std(wH)
  119. low = math.log(beta/(1-alpha)); high = math.log((1-beta)/alpha)
  120. flags = []
  121. for i in range(len(wN)-decisionGroup+1):
  122. seg = wN[i:i+decisionGroup]; mu1=np.mean(seg)
  123. si = (sum(seg)*(mu1-mu0)/sigma0**2
  124. - decisionGroup*((mu1**2-mu0**2)/(2*sigma0**2)))
  125. si = max(min(si, high), low)
  126. flags.append(si/high if si>0 else si/low)
  127. return flags
  128. def predict_SPRT(self,
  129. newsStates: np.ndarray,
  130. decisionGroup: int = 5) -> list[float]:
  131. """
  132. 在线推理:用离线保存的 matrixD/healthyResidual/feature_weight/alpha/beta
  133. """
  134. return self.calcSPRT(
  135. newsStates,
  136. self.feature_weight,
  137. alpha=self.alpha,
  138. beta=self.beta,
  139. decisionGroup=decisionGroup
  140. )
  141. def save_model(self, path: str):
  142. """
  143. Save matrixD, healthyResidual, feature_weight, alpha, beta
  144. """
  145. os.makedirs(os.path.dirname(path), exist_ok=True)
  146. joblib.dump({
  147. 'matrixD': self.matrixD,
  148. 'healthyResidual': self.healthyResidual,
  149. 'feature_weight': self.feature_weight,
  150. 'alpha': self.alpha,
  151. 'beta': self.beta,
  152. }, path)
  153. @classmethod
  154. def load_model(cls, path: str) -> 'MSET_Temp':
  155. """
  156. Load + rebuild BallTree
  157. """
  158. data = joblib.load(path)
  159. inst = cls('', [], '', '')
  160. inst.matrixD = data['matrixD']
  161. inst.healthyResidual = data['healthyResidual']
  162. inst.feature_weight = data['feature_weight']
  163. inst.alpha = data['alpha']
  164. inst.beta = data['beta']
  165. inst.normalDataBallTree = BallTree(
  166. inst.matrixD,
  167. leaf_size=4,
  168. metric=lambda a,b: 1.0 - inst.calcSimilarity(a, b)
  169. )
  170. return inst
  171. def query_surrounding_data(self, timestamp: str, minutes_around: int = 250) -> Dict:
  172. """
  173. 查询指定时间点前后50个点的数据
  174. 参数:
  175. timestamp: 中心时间点,格式为 'yyyy-mm-dd HH:MM:SS'
  176. minutes_around: 查询前后多少分钟的数据
  177. 返回:
  178. {
  179. 'record_count': int,
  180. 'records': List[Dict],
  181. 'columns_mapping': Dict[str, str] # 字段中英文映射
  182. }
  183. """
  184. # 中英文映射字典
  185. cn_map = {
  186. 'wind_turbine_name':'风机名称',
  187. 'time_stamp': '时间',
  188. 'active_power': '有功功率(kW)',
  189. 'rotor_speed': '风轮转速(rpm)',
  190. 'generator_speed':'发电机转速(rpm)',
  191. 'wind_velocity': '风速(m/s)',
  192. 'pitch_angle_blade_1':'桨距角1(°)',
  193. 'pitch_angle_blade_2':'桨距角2(°)',
  194. 'pitch_angle_blade_3':'桨距角3(°)',
  195. 'cabin_position':'机舱位置(°)',
  196. 'true_wind_direction':'绝对风向(°)',
  197. 'yaw_error1':'对风角度(°)',
  198. 'set_value_of_active_power':'有功功率设定值(kW)',
  199. 'gearbox_oil_temperature':'齿轮箱油温(℃)',
  200. 'generatordrive_end_bearing_temperature':'发电机驱动端轴承温度(℃)',
  201. 'generatornon_drive_end_bearing_temperature':'发电机非驱动端轴承温度(℃)',
  202. 'cabin_temperature':'机舱内温度(℃)',
  203. 'twisted_cable_angle':'扭缆角度(°)',
  204. 'outside_cabin_temperature':'环境温度(℃)',
  205. 'main_bearing_temperature':'主轴承轴承温度(℃)',
  206. 'main_bearing_temperature_2': '主轴承轴承温度2(℃)',
  207. 'gearbox_high_speed_shaft_bearing_temperature':'齿轮箱高速轴轴承温度(℃)',
  208. 'gearboxmedium_speed_shaftbearing_temperature':'齿轮箱中速轴轴承温度(℃)',
  209. 'gearbox_low_speed_shaft_bearing_temperature':'齿轮箱低速轴轴承温度(℃)',
  210. 'generator_winding1_temperature':'发电机绕组1温度(℃)',
  211. 'generator_winding2_temperature':'发电机绕组2温度(℃)',
  212. 'generator_winding3_temperature':'发电机绕组3温度(℃)',
  213. 'grid_a_phase_current':'电网A相电流(A)',
  214. 'grid_b_phase_current': '电网B相电流(A)',
  215. 'grid_c_phase_current': '电网C相电流(A)'
  216. }
  217. table = f"{self.windCode}_minute"
  218. engine = create_engine(
  219. "mysql+pymysql://root:admin123456@192.168.50.235:30306/energy_data_prod"
  220. )
  221. # 查询数据
  222. sql = text(f"""
  223. SELECT *
  224. FROM {table}
  225. WHERE wind_turbine_number IN ({','.join([f"'{t}'" for t in self.windTurbineNumberList])})
  226. AND time_stamp BETWEEN
  227. DATE_SUB(:timestamp, INTERVAL :minutes MINUTE)
  228. AND DATE_ADD(:timestamp, INTERVAL :minutes MINUTE)
  229. ORDER BY time_stamp ASC
  230. """)
  231. df = pd.read_sql(sql, engine, params={
  232. "timestamp": timestamp,
  233. "minutes": minutes_around
  234. })
  235. # 打印查询到的数据条数
  236. record_count = len(df)
  237. print(f"查询到 {record_count} 条数据")
  238. if df.empty:
  239. return {
  240. 'record_count': 0,
  241. 'records': [],
  242. 'columns_mapping': {}
  243. }
  244. # 删除空列和不需要的列
  245. cols_to_drop = ['wind_turbine_number', 'reactive_power','lab', 'year', 'month','day','year_month','front_back_vibration_of_the_cabin','side_to_side_vibration_of_the_cabin',
  246. 'actual_torque','given_torque','clockwise_yaw_count','counterclockwise_yaw_count','unusable','power_curve_available','required_gearbox_speed','inverter_speed_master_control',
  247. 'wind_turbine_status','wind_turbine_status2','turbulence_intensity'
  248. ]
  249. cols_to_drop = [col for col in cols_to_drop if col in df.columns]
  250. df = df.drop(columns=cols_to_drop)
  251. df = df.dropna(axis=1, how='all')
  252. # 转换字段名和格式
  253. df['time_stamp'] = df['time_stamp'].astype(str)
  254. records = df.rename(columns=cn_map).to_dict('records')
  255. return {
  256. 'record_count': record_count,
  257. 'records': records
  258. }