|
@@ -1,335 +1,339 @@
|
|
|
-# @Time : 2019-6-19
|
|
|
-# @Author : wilbur.cheng@SKF
|
|
|
-# @FileName: baseMSET.py
|
|
|
-# @Software: a package in skf anomaly detection library to realize multivariate state estimation technique
|
|
|
-# this package has included all the functions used in MSET, similarity calculation, state memory matrix D,
|
|
|
-# normal state matrix L, residual calculation, and the basic SPRT (Sequential Probability Ratio Test)
|
|
|
-# function for binary hypothesis testing.
|
|
|
-#
|
|
|
+# temp_diag.py
|
|
|
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
-import matplotlib.pyplot as plt
|
|
|
from sklearn.neighbors import BallTree
|
|
|
-import openpyxl
|
|
|
-import time
|
|
|
-#from scikit-learn.neighbors import BallTree
|
|
|
+from sqlalchemy import create_engine, text
|
|
|
+import math
|
|
|
|
|
|
-class MSET():
|
|
|
+class MSET_Temp:
|
|
|
+ """
|
|
|
+ 基于 MSET + SPRT 的温度趋势/阈值分析类。
|
|
|
+ 查询条件由 wind_turbine_number 列和 time_stamp 范围决定,
|
|
|
+ SPRT 阈值固定为 0.99,calcSPRT 输出在 [-1,1]。
|
|
|
+ """
|
|
|
|
|
|
- matrixD = None
|
|
|
- matrixL = None
|
|
|
- healthyResidual = None
|
|
|
-
|
|
|
- normalDataBallTree = None
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- def __init__(self):
|
|
|
- self.model = None
|
|
|
-
|
|
|
-
|
|
|
- def calcSimilarity(self, x, y, m = 'euc'):
|
|
|
+ def __init__(self, windCode: str, windTurbineNumberList: list[str], startTime: str, endTime: str):
|
|
|
"""
|
|
|
- Calcualte the similartity of two feature vector
|
|
|
- :param x: one of input feature list
|
|
|
- :param y: one of input feature list
|
|
|
- :param m: method of the similarity calculation method, default is the Eucilidean distance named 'euc';
|
|
|
- a city block distance function is used when m set to "cbd'.
|
|
|
- :return: the two feature similarity, float, range (0,1)
|
|
|
+ :param windCode: 风机类型或机组代码,用于拼表名。例如 "WOG01312" → 表名 "WOG01312_minute"
|
|
|
+ :param windTurbineNumberList: 要查询的 wind_turbine_number(风机编号)列表
|
|
|
+ :param startTime: 起始时间(字符串),格式 "YYYY-MM-DD HH:MM"
|
|
|
+ :param endTime: 结束时间(字符串),格式 "YYYY-MM-DD HH:MM"
|
|
|
"""
|
|
|
+ self.windCode = windCode.strip()
|
|
|
+ self.windTurbineNumberList = windTurbineNumberList
|
|
|
+ # 强制保留到秒
|
|
|
+ self.startTime = self._truncate_to_seconds(startTime)
|
|
|
+ self.endTime = self._truncate_to_seconds(endTime)
|
|
|
+
|
|
|
+ # D/L 矩阵相关
|
|
|
+ self.matrixD = None
|
|
|
+ self.matrixL = None
|
|
|
+ self.healthyResidual = None
|
|
|
+ self.normalDataBallTree = None
|
|
|
+
|
|
|
+ def _truncate_to_seconds(self, dt_str: str) -> str:
|
|
|
+ """
|
|
|
+ 将用户可能传进来的 ISO 时间字符串或包含毫秒的字符串
|
|
|
+ 截断到秒,返回 "YYYY-MM-DD HH:MM:SS" 格式。
|
|
|
+ 例如: "2025-06-01T12:34:56.789Z" → "2025-06-01 12:34:56"
|
|
|
+ """
|
|
|
+ # 先将 'T' 替换成空格,去掉尾部可能的 "Z"
|
|
|
+ s = dt_str.replace("T", " ").rstrip("Z")
|
|
|
+ # 如果含有小数秒,截断
|
|
|
+ if "." in s:
|
|
|
+ s = s.split(".")[0]
|
|
|
+ # 如果还有 "+xx:xx" 时区后缀,也截断
|
|
|
+ if "+" in s:
|
|
|
+ s = s.split("+")[0]
|
|
|
+ return s.strip()
|
|
|
+
|
|
|
+ def _get_data_by_filter(self) -> pd.DataFrame:
|
|
|
+ """
|
|
|
+ 按 wind_turbine_number 列和 time_stamp 时间范围批量查询,
|
|
|
+ 返回一个完整的 DataFrame(已按 time_stamp 升序排序)。
|
|
|
+ """
|
|
|
+ table_name = f"{self.windCode}_minute"
|
|
|
+ engine = create_engine(
|
|
|
+ "mysql+pymysql://root:admin123456@106.120.102.238:10336/energy_data_prod"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 准备 wind_turbine_number 列表的 SQL 片段:('WT1','WT2',...)
|
|
|
+ turbines = ",".join(f"'{wt.strip()}'" for wt in self.windTurbineNumberList)
|
|
|
+ sql = text(f"""
|
|
|
+ SELECT *
|
|
|
+ FROM {table_name}
|
|
|
+ WHERE wind_turbine_number IN ({turbines})
|
|
|
+ AND time_stamp BETWEEN :start AND :end
|
|
|
+ ORDER BY time_stamp ASC
|
|
|
+ """)
|
|
|
+
|
|
|
+ df = pd.read_sql(sql, engine, params={"start": self.startTime, "end": self.endTime})
|
|
|
+ return df
|
|
|
+
|
|
|
+ def calcSimilarity(self, x: np.ndarray, y: np.ndarray, m: str = 'euc') -> float:
|
|
|
+ """
|
|
|
+ 计算向量 x 与 y 的相似度,(0,1] 区间:
|
|
|
+ - m='euc' → 欧氏距离
|
|
|
+ - m='cbd' → 城市街区距离
|
|
|
+ """
|
|
|
+ if len(x) != len(y):
|
|
|
+ return 0.0
|
|
|
|
|
|
- if (len(x) != len(y)):
|
|
|
- return 0
|
|
|
-
|
|
|
- if (m == 'cbd'):
|
|
|
- dSimilarity = [1/(1+np.abs(p-q)) for p,q in zip(x,y)]
|
|
|
- dSimilarity = np.sum(dSimilarity)/len(x)
|
|
|
+ if m == 'cbd':
|
|
|
+ arr = [1.0 / (1.0 + abs(p - q)) for p, q in zip(x, y)]
|
|
|
+ return float(np.sum(arr) / len(arr))
|
|
|
else:
|
|
|
- dSimilarity = [np.power(p-q,2) for p, q in zip(x, y)]
|
|
|
- dSimilarity = 1/(1+np.sqrt(np.sum(dSimilarity)))
|
|
|
-
|
|
|
- return dSimilarity
|
|
|
-
|
|
|
+ diffsq = [(p - q) ** 2 for p, q in zip(x, y)]
|
|
|
+ return float(1.0 / (1.0 + math.sqrt(np.sum(diffsq))))
|
|
|
|
|
|
- def genDLMatrix(self, trainDataset, dataSize4D=100, dataSize4L=50):
|
|
|
+ def genDLMatrix(self, trainDataset: np.ndarray, dataSize4D=100, dataSize4L=50) -> int:
|
|
|
"""
|
|
|
- automatically generate the D and L matrix from training data set, assuming the training data set is all normal
|
|
|
- state data.
|
|
|
- :param trainDataset: 2D array, [count of data, length of feature]
|
|
|
- :param dataSize4D: minimized count of state for matrix D
|
|
|
- :param dataSize4L: minimized count of state for matrix L
|
|
|
- :return: 0 if successful, -1 if fail
|
|
|
+ 根据训练集 trainDataset 生成 D/L 矩阵:
|
|
|
+ - 若样本数 < dataSize4D + dataSize4L,返回 -1
|
|
|
+ - 否则构造 matrixD、matrixL,并用局部加权回归获得 healthyResidual,返回 0
|
|
|
"""
|
|
|
- [m,n] = np.shape(trainDataset)
|
|
|
-
|
|
|
+ m, n = trainDataset.shape
|
|
|
if m < dataSize4D + dataSize4L:
|
|
|
- print('training dataset is too small to generate the matrix D and L')
|
|
|
return -1
|
|
|
|
|
|
+ # Step1:每个特征的最小/最大样本加入 matrixD
|
|
|
self.matrixD = []
|
|
|
selectIndex4D = []
|
|
|
- # Step 1: add all the state with minimum or maximum value in each feature into Matrix D
|
|
|
-
|
|
|
- for i in range(0, n):
|
|
|
- feature_i = trainDataset[:,i]
|
|
|
- minIndex = np.argmin(feature_i)
|
|
|
- maxIndex = np.argmax(feature_i)
|
|
|
-
|
|
|
- self.matrixD.append(trainDataset[minIndex, :].tolist())
|
|
|
- selectIndex4D.append(minIndex)
|
|
|
- self.matrixD.append(trainDataset[maxIndex, :].tolist())
|
|
|
- selectIndex4D.append(maxIndex)
|
|
|
-
|
|
|
- # Step 2: iteratively add the state with the maximum average distance to the states in selected matrixD
|
|
|
- while(1):
|
|
|
-
|
|
|
- if (len(selectIndex4D) >= dataSize4D):
|
|
|
- break
|
|
|
-
|
|
|
- # Get the free state list
|
|
|
- freeStateList = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
|
|
|
-
|
|
|
- # Calculate the average dist of each state in free to selected state in matrix D
|
|
|
- distList = []
|
|
|
- for i in freeStateList:
|
|
|
- tmpState = trainDataset[i]
|
|
|
- tmpDist = [1-self.calcSimilarity(x, tmpState) for x in self.matrixD]
|
|
|
- distList.append(np.mean(tmpDist))
|
|
|
-
|
|
|
- # select the free state with largest average distance to states in matrixD, and update matrixD
|
|
|
- selectId = freeStateList[distList.index(np.max(distList))]
|
|
|
- self.matrixD.append(trainDataset[selectId, :].tolist())
|
|
|
- selectIndex4D.append(selectId)
|
|
|
- #format matrixD from list to array
|
|
|
- self.matrixD = np.array(self.matrixD)
|
|
|
- self.normalDataBallTree = BallTree(self.matrixD, leaf_size=4, metric = lambda i,j: 1-self.calcSimilarity(i,j))
|
|
|
+ for i in range(n):
|
|
|
+ col_i = trainDataset[:, i]
|
|
|
+ idx_min = np.argmin(col_i)
|
|
|
+ idx_max = np.argmax(col_i)
|
|
|
+ self.matrixD.append(trainDataset[idx_min, :].tolist())
|
|
|
+ selectIndex4D.append(idx_min)
|
|
|
+ self.matrixD.append(trainDataset[idx_max, :].tolist())
|
|
|
+ selectIndex4D.append(idx_max)
|
|
|
+
|
|
|
+ # Step2:对剩余样本逐步选出“与 matrixD 平均距离最大”的样本,直至 matrixD 行数 = dataSize4D
|
|
|
+ while len(selectIndex4D) < dataSize4D:
|
|
|
+ freeList = list(set(range(len(trainDataset))) - set(selectIndex4D))
|
|
|
+ distAvg = []
|
|
|
+ for idx in freeList:
|
|
|
+ tmp = trainDataset[idx, :]
|
|
|
+ dlist = [1.0 - self.calcSimilarity(x, tmp) for x in self.matrixD]
|
|
|
+ distAvg.append(np.mean(dlist))
|
|
|
+ select_id = freeList[int(np.argmax(distAvg))]
|
|
|
+ self.matrixD.append(trainDataset[select_id, :].tolist())
|
|
|
+ selectIndex4D.append(select_id)
|
|
|
|
|
|
+ self.matrixD = np.array(self.matrixD)
|
|
|
|
|
|
- # Step 3. select remaining state for matrix L
|
|
|
- #index4L = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
|
|
|
- #self.matrixL = trainDataset[index4L, :]
|
|
|
- # consider the limited dataset, using all the train data to matrix L
|
|
|
- self.matrixL = trainDataset
|
|
|
+ # 用 matrixD 建 BallTree,用于局部加权回归
|
|
|
+ self.normalDataBallTree = BallTree(
|
|
|
+ self.matrixD,
|
|
|
+ leaf_size=4,
|
|
|
+ metric=lambda a, b: 1.0 - self.calcSimilarity(a, b)
|
|
|
+ )
|
|
|
|
|
|
- # Calculate the healthy Residual from matrix L
|
|
|
- lamdaRatio = 1e-3
|
|
|
- [m, n] = np.shape(self.matrixD)
|
|
|
- self.DDSimilarity = np.array([[1-self.calcSimilarity(x,y) for x in self.matrixD] for y in self.matrixD] + lamdaRatio*np.eye(n))
|
|
|
- self.DDSimilarity = 1/self.DDSimilarity
|
|
|
+ # Step3:把所有训练样本都作为 matrixL
|
|
|
+ self.matrixL = trainDataset.copy()
|
|
|
|
|
|
- #self.healthyResidual = self.calcResidual(self.matrixL)
|
|
|
+ # Step4:用局部加权回归算出健康残差
|
|
|
self.healthyResidual = self.calcResidualByLocallyWeightedLR(self.matrixL)
|
|
|
-
|
|
|
-
|
|
|
return 0
|
|
|
|
|
|
- def calcResidualByLocallyWeightedLR(self, newStates):
|
|
|
+ def calcResidualByLocallyWeightedLR(self, newStates: np.ndarray) -> np.ndarray:
|
|
|
"""
|
|
|
- find the K-nearest neighbors for each input state, then calculate the estimated state by locally weighted average.
|
|
|
- :param newStates: input states list
|
|
|
- :return: residual R_x
|
|
|
+ 对 newStates 中每个样本,使用 matrixD 的前 20 个最近邻做局部加权回归,计算残差。
|
|
|
+ 返回形状 [len(newStates), 特征数] 的残差矩阵。
|
|
|
"""
|
|
|
- [m,n] = np.shape(newStates)
|
|
|
- est_X = []
|
|
|
- # print(newStates)
|
|
|
+ est_list = []
|
|
|
for x in newStates:
|
|
|
- (dist, iList) = self.normalDataBallTree.query([x], 20, return_distance=True)
|
|
|
- weight = 1/(dist[0]+1e-1)
|
|
|
- weight = weight/sum(weight)
|
|
|
- eState = np.sum([w*self.matrixD[i] for w,i in zip(weight, iList[0])])
|
|
|
- est_X.append(eState)
|
|
|
-
|
|
|
- est_X = np.reshape(est_X, (len(est_X),1))
|
|
|
- # print(est_X)
|
|
|
- # print(newStates)
|
|
|
- return est_X - newStates
|
|
|
-
|
|
|
- def calcStateResidual(self, newsStates):
|
|
|
- stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
|
|
|
- return stateResidual
|
|
|
-
|
|
|
- def calcSPRT(self, newsStates, feature_weight, alpha=0.1, beta=0.1, decisionGroup=5):
|
|
|
+ dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
|
|
|
+ w = 1.0 / (dist[0] + 1e-1)
|
|
|
+ w = w / np.sum(w)
|
|
|
+ est = np.sum([w_i * self.matrixD[j] for w_i, j in zip(w, idxs[0])], axis=0)
|
|
|
+ est_list.append(est)
|
|
|
+ est_arr = np.reshape(np.array(est_list), (len(est_list), -1))
|
|
|
+ return est_arr - newStates
|
|
|
+
|
|
|
+ def calcSPRT(
|
|
|
+ self,
|
|
|
+ newsStates: np.ndarray,
|
|
|
+ feature_weight: np.ndarray,
|
|
|
+ alpha: float = 0.1,
|
|
|
+ beta: float = 0.1,
|
|
|
+ decisionGroup: int = 5
|
|
|
+ ) -> list[float]:
|
|
|
"""
|
|
|
- anomaly detection based Wald's SPRT algorithm, refer to A.Wald, Sequential Analysis,Wiley, New York, NY, USA, 1947
|
|
|
- :param newsStates: input states list
|
|
|
- :param feature_weight: the important weight for each feature, Normalized and Nonnegative
|
|
|
- :param alpha: prescribed false alarm rate, 0 < alpha < 1
|
|
|
- :param beta: prescribed miss alarm rate, 0 < beta < 1
|
|
|
- :param decisionGroup: length of the test sample when the decision is done
|
|
|
-
|
|
|
- :return: anomaly flag for each group of states, 1:anomaly, -1:normal, (-1:1): unable to decision
|
|
|
+ 对 newsStates 运行 Wald-SPRT,返回得分列表,长度 = len(newsStates) - decisionGroup + 1,
|
|
|
+ 分数在 [-1, 1]:
|
|
|
+ - 越接近 1 → 越“异常(危险)”
|
|
|
+ - 越接近 -1 → 越“正常”
|
|
|
"""
|
|
|
-
|
|
|
- # Step 1. transfer the raw residual vector to dimension reduced residual using feature weight
|
|
|
- #stateResidual = self.calcResidual(newsStates)
|
|
|
- stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
|
|
|
- # print(stateResidual)
|
|
|
- weightedStateResidual = [np.dot(x, feature_weight) for x in stateResidual]
|
|
|
- # print(weightedStateResidual)
|
|
|
+ # 1) 计算残差并做特征加权
|
|
|
+ stateRes = self.calcResidualByLocallyWeightedLR(newsStates)
|
|
|
+ weightedStateResidual = [np.dot(x, feature_weight) for x in stateRes]
|
|
|
weightedHealthyResidual = [np.dot(x, feature_weight) for x in self.healthyResidual]
|
|
|
|
|
|
- '''
|
|
|
- plt.subplot(211)
|
|
|
- plt.plot(weightedHealthyResidual)
|
|
|
- plt.xlabel('Sample index')
|
|
|
- plt.ylabel('Healthy State Residual')
|
|
|
- plt.subplot(212)
|
|
|
- plt.plot(weightedStateResidual)
|
|
|
- plt.xlabel('Sample index')
|
|
|
- plt.ylabel('All State Residual')
|
|
|
- plt.show()
|
|
|
- '''
|
|
|
- # Calculate the distribution of health state residual
|
|
|
- mu0 = np.mean(weightedHealthyResidual)
|
|
|
- sigma0 = np.std(weightedHealthyResidual)
|
|
|
-
|
|
|
-
|
|
|
- #sigma1 = np.std(weightedStateResidual)
|
|
|
-
|
|
|
- lowThres = np.log(beta/(1-alpha))
|
|
|
- highThres = np.log((1-beta)/alpha)
|
|
|
-
|
|
|
- flag = []
|
|
|
- for i in range(0, len(newsStates)-decisionGroup+1):
|
|
|
- # For each group to calculate the new state residual distribution
|
|
|
- # Then check the hypothesis testing results
|
|
|
- mu1 = np.mean(weightedStateResidual[i:i+decisionGroup])
|
|
|
- si = np.sum(weightedStateResidual[i:i+decisionGroup])*(mu1-mu0)/sigma0**2 - decisionGroup*(mu1**2-mu0**2)/(2*sigma0**2)
|
|
|
-
|
|
|
- if (si > highThres):
|
|
|
- si = highThres
|
|
|
- if (si < lowThres):
|
|
|
- si = lowThres
|
|
|
-
|
|
|
- if (si > 0):
|
|
|
- si = si/highThres
|
|
|
- if (si < 0):
|
|
|
- si = si/lowThres
|
|
|
-
|
|
|
- flag.append(si)
|
|
|
-
|
|
|
- return flag
|
|
|
-
|
|
|
-if __name__=='__main__':
|
|
|
-
|
|
|
- start_time = time.time()
|
|
|
- myMSET = MSET()
|
|
|
- # 1、计算各子系统的健康度(子系统包括:发电机组、传动系统(直驱机组无齿轮箱、无数据)、机舱系统、变流器系统、电网环境、辅助系统(无数据))
|
|
|
- # 1.1、发电机组健康度评分
|
|
|
- Title = pd.read_excel(r'34_QITAIHE.xlsx', header=None, nrows=1, usecols=[12, 13, 14])
|
|
|
- df_D = pd.read_excel(r'34_QITAIHE.xlsx',
|
|
|
- usecols=[0,12, 13, 14,], parse_dates=True) # 读取温度指标:齿轮箱油温12、驱动侧发电机轴承温度13、非驱动侧发电机轴承温度14
|
|
|
-
|
|
|
- cols = df_D.columns
|
|
|
- df_D[cols] = df_D[cols].apply(pd.to_numeric, errors='coerce')
|
|
|
- df_D = df_D.dropna() # 删除空行和非数字项
|
|
|
-
|
|
|
- x_date = df_D.iloc[len(df_D) // 2 + 1:, 0] #获取时间序列
|
|
|
- x_date = pd.to_datetime(x_date)
|
|
|
- df_Temp = df_D.iloc[:, 1:] #获取除时间列以外的数据
|
|
|
- df_Temp_values = df_Temp.values
|
|
|
- df_Temp_values = np.array(df_Temp_values)
|
|
|
- [m, n] = np.shape(df_Temp_values)
|
|
|
- # Residual = []
|
|
|
- flag_Spart_data = []
|
|
|
- for i in range(0, n):
|
|
|
- df_Temp_i = df_Temp_values[:, i]
|
|
|
- trainDataSet_data = df_Temp_i[0:len(df_Temp_i) // 2]
|
|
|
- testDataSet_data = df_Temp_i[len(df_Temp_i) // 2 + 1:]
|
|
|
- trainDataSet_data = np.reshape(trainDataSet_data, (len(trainDataSet_data), 1))
|
|
|
- testDataSet_data = np.reshape(testDataSet_data, (len(testDataSet_data), 1))
|
|
|
-
|
|
|
- myMSET.genDLMatrix(trainDataSet_data, 60, 5)
|
|
|
- # stateResidual = self.calcStateResidual(testDataSet_data)
|
|
|
- flag_data = myMSET.calcSPRT(testDataSet_data, np.array(1), decisionGroup=1)
|
|
|
- # Residual.append(stateResidual)
|
|
|
- flag_Spart_data.append(flag_data)
|
|
|
- flag_Spart_data = np.array(flag_Spart_data)
|
|
|
- flag_Spart_data = flag_Spart_data.T
|
|
|
- Temp1 = flag_Spart_data[:,0]
|
|
|
- Temp2 = flag_Spart_data[:,1]
|
|
|
- Temp3 = flag_Spart_data[:,2]
|
|
|
- Temp1_lable = Title.iloc[0, 0]
|
|
|
- Temp2_lable = Title.iloc[0, 1]
|
|
|
- Temp3_lable = Title.iloc[0, 2]
|
|
|
-
|
|
|
- print(x_date)
|
|
|
-
|
|
|
- # alarmedFlag = np.array([[i, Temp1[i]] for i, x in enumerate(Temp1) if x > 0.99]) # Flag中选出大于0.99的点
|
|
|
-
|
|
|
- plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文标签
|
|
|
- plt.close('all')
|
|
|
- plt.subplot(311)
|
|
|
- plt.plot(x_date, Temp1, 'b-o', label=Temp1_lable)
|
|
|
- plt.ylabel(Temp1_lable)
|
|
|
- plt.xlabel('时间')
|
|
|
-
|
|
|
- # plt.scatter(alarmedFlag1[:, 0], alarmedFlag1[:, 2], marker='x', c='r')
|
|
|
- plt.subplot(312)
|
|
|
- plt.plot(x_date, Temp2)
|
|
|
- plt.ylabel(Temp2_lable)
|
|
|
- plt.xlabel('时间')
|
|
|
- #plt.scatter(alarmedFlag[:, 0], alarmedFlag[:, 1], marker='x', c='r')
|
|
|
- plt.subplot(313)
|
|
|
- plt.plot(x_date, Temp3)
|
|
|
- plt.ylabel(Temp3_lable)
|
|
|
- plt.xlabel('时间')
|
|
|
- #plt.scatter(alarmedFlag[:, 0], alarmedFlag[:, 1], marker='x', c='r')
|
|
|
- plt.show()
|
|
|
- print(Title)
|
|
|
- print(flag_Spart_data)
|
|
|
- print(np.shape(flag_Spart_data))
|
|
|
-
|
|
|
-
|
|
|
- end_time = time.time()
|
|
|
- execution_time = end_time - start_time
|
|
|
- print(f"Execution time: {execution_time} seconds")
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-#
|
|
|
-
|
|
|
- '''
|
|
|
- f = open("speed_vib.txt", "r")
|
|
|
- data1 = f.read()
|
|
|
- f.close()
|
|
|
- data1 = data1.split('\n')
|
|
|
- rpm = [(row.split('\t')[0]).strip() for row in data1]
|
|
|
- vib = [(row.split('\t')[-1]).strip() for row in data1]
|
|
|
-
|
|
|
- # print(rpm)
|
|
|
- rpm = np.array(rpm).astype(np.float64)
|
|
|
- vib = np.array(vib).astype(np.float64)
|
|
|
-
|
|
|
- #vib = [(x-np.mean(vib))/np.std(vib) for x in vib]
|
|
|
- #print(vib)
|
|
|
- trainDataSet = [vib[i] for i in range(0,100) if vib[i] < 5]
|
|
|
- trainDataSet = np.reshape(trainDataSet,(len(trainDataSet),1))
|
|
|
- testDataSet = np.reshape(vib, (len(vib),1))
|
|
|
- '''
|
|
|
-
|
|
|
-
|
|
|
- # Title = pd.read_csv(r'F1710001001.csv', header=None, nrows=1, usecols=[36,37,38], encoding='gbk')
|
|
|
-
|
|
|
- '''
|
|
|
- alarmedFlag = np.array([[i, flag[i]] for i, x in enumerate(flag) if x > 0.99]) # Flag中选出大于0.99的点
|
|
|
- plt.close('all')
|
|
|
- plt.subplot(211)
|
|
|
- plt.plot(testDataSet)
|
|
|
- plt.ylabel('Vibration')
|
|
|
- plt.xlabel('Sample index')
|
|
|
- plt.subplot(212)
|
|
|
- plt.plot(flag)
|
|
|
- plt.ylabel('SPRT results')
|
|
|
- plt.xlabel('Sample index')
|
|
|
- plt.scatter(alarmedFlag[:,0], alarmedFlag[:,1], marker='x',c='r')
|
|
|
-
|
|
|
- plt.show()
|
|
|
- '''
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
+ # 2) 健康残差的分布统计
|
|
|
+ mu0 = float(np.mean(weightedHealthyResidual))
|
|
|
+ sigma0 = float(np.std(weightedHealthyResidual))
|
|
|
+
|
|
|
+ # 3) 计算 SPRT 的上下阈值
|
|
|
+ lowThres = np.log(beta / (1.0 - alpha)) # < 0
|
|
|
+ highThres = np.log((1.0 - beta) / alpha) # > 0
|
|
|
+
|
|
|
+ flags: list[float] = []
|
|
|
+ length = len(weightedStateResidual)
|
|
|
+ for i in range(0, length - decisionGroup + 1):
|
|
|
+ segment = weightedStateResidual[i : i + decisionGroup]
|
|
|
+ mu1 = float(np.mean(segment))
|
|
|
+ si = (
|
|
|
+ np.sum(segment) * (mu1 - mu0) / (sigma0**2)
|
|
|
+ - decisionGroup * ((mu1**2) - (mu0**2)) / (2.0 * (sigma0**2))
|
|
|
+ )
|
|
|
+
|
|
|
+ # 限制 si 在 [lowThres, highThres] 之内
|
|
|
+ si = max(min(si, highThres), lowThres)
|
|
|
+ # 正负归一化
|
|
|
+ if si > 0:
|
|
|
+ norm_si = float(si / highThres)
|
|
|
+ else:
|
|
|
+ norm_si = float(si / lowThres)
|
|
|
+ flags.append(norm_si)
|
|
|
+
|
|
|
+ return flags
|
|
|
+
|
|
|
+ def check_threshold(self) -> pd.DataFrame:
|
|
|
+ """
|
|
|
+ 阈值分析(阈值固定 0.99)。返回长格式 DataFrame,列:
|
|
|
+ ["time_stamp", "temp_channel", "SPRT_score", "status"]
|
|
|
+ status = "危险" if SPRT_score > 0.99 else "正常"。
|
|
|
+ """
|
|
|
+ THRESHOLD = 0.99
|
|
|
+
|
|
|
+ # 1) 按风机编号 + 时间范围查询原始数据
|
|
|
+ df_concat = self._get_data_by_filter()
|
|
|
+ if df_concat.empty:
|
|
|
+ return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
|
|
|
+
|
|
|
+ # 2) 筛选存在的温度列
|
|
|
+ temp_cols_all = [
|
|
|
+ 'main_bearing_temperature',
|
|
|
+ 'gearbox_oil_temperature',
|
|
|
+ 'generatordrive_end_bearing_temperature',
|
|
|
+ 'generatornon_drive_end_bearing_temperature'
|
|
|
+ ]
|
|
|
+ temp_cols = [c for c in temp_cols_all if c in df_concat.columns]
|
|
|
+ if not temp_cols:
|
|
|
+ return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
|
|
|
+
|
|
|
+ # 3) 转数值 & 删除 NaN
|
|
|
+ df_concat[temp_cols] = df_concat[temp_cols].apply(pd.to_numeric, errors='coerce')
|
|
|
+ df_concat = df_concat.dropna(subset=temp_cols + ['time_stamp'])
|
|
|
+ if df_concat.empty:
|
|
|
+ return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
|
|
|
+
|
|
|
+ # 4) time_stamp 转 datetime
|
|
|
+ df_concat['time_stamp'] = pd.to_datetime(df_concat['time_stamp'])
|
|
|
+ x_date = df_concat['time_stamp']
|
|
|
+
|
|
|
+ # 5) 抽取温度列到 NumPy 数组
|
|
|
+ arr = df_concat[temp_cols].values # shape = [总记录数, 通道数]
|
|
|
+ m, n = arr.shape
|
|
|
+ half = m // 2
|
|
|
+
|
|
|
+ all_flags: list[list[float]] = []
|
|
|
+ for i in range(n):
|
|
|
+ channel = arr[:, i]
|
|
|
+ train = channel[:half].reshape(-1, 1)
|
|
|
+ test = channel[half:].reshape(-1, 1)
|
|
|
+
|
|
|
+ # 用训练集构造 D/L 矩阵
|
|
|
+ if self.genDLMatrix(train, dataSize4D=60, dataSize4L=5) != 0:
|
|
|
+ # 如果训练集样本不足,直接返回空表
|
|
|
+ return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
|
|
|
+
|
|
|
+ feature_w = np.array([1.0])
|
|
|
+ flags = self.calcSPRT(test, feature_w, decisionGroup=1)
|
|
|
+ all_flags.append(flags)
|
|
|
+
|
|
|
+ # 6) 合并为宽表,再 melt 成长表
|
|
|
+ flags_arr = np.array(all_flags) # shape = [通道数, 测试样本数]
|
|
|
+ num_test = flags_arr.shape[1]
|
|
|
+ ts = x_date.iloc[half : half + num_test].reset_index(drop=True)
|
|
|
+
|
|
|
+ wide = pd.DataFrame({"time_stamp": ts})
|
|
|
+ for idx, col in enumerate(temp_cols):
|
|
|
+ wide[col] = flags_arr[idx, :]
|
|
|
+
|
|
|
+ df_long = wide.melt(
|
|
|
+ id_vars=["time_stamp"],
|
|
|
+ value_vars=temp_cols,
|
|
|
+ var_name="temp_channel",
|
|
|
+ value_name="SPRT_score"
|
|
|
+ )
|
|
|
+ # 把 time_stamp 从 datetime 转成字符串,格式 "YYYY-MM-DD HH:MM:SS"
|
|
|
+ df_long['time_stamp'] = pd.to_datetime(df_long['time_stamp']).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
+
|
|
|
+ # 7) 添加状态列:SPRT_score > 0.99 → “危险”,否则 “正常”
|
|
|
+ df_long['status'] = df_long['SPRT_score'].apply(
|
|
|
+ lambda x: "危险" if x > THRESHOLD else "正常"
|
|
|
+ )
|
|
|
+
|
|
|
+ return df_long
|
|
|
+
|
|
|
+ def get_trend(self) -> dict:
|
|
|
+ """
|
|
|
+ 趋势分析
|
|
|
+ 获取温度趋势:将温度数据按时间返回。
|
|
|
+ 返回格式:{
|
|
|
+ "timestamps": [ISO8601 字符串列表],
|
|
|
+ "channels": [
|
|
|
+ {"temp_channel": "main_bearing_temperature", "values": [浮点列表]},
|
|
|
+ {"temp_channel": "gearbox_oil_temperature", "values": [...]},
|
|
|
+ ...
|
|
|
+ ],
|
|
|
+ "unit": "°C"
|
|
|
+ }
|
|
|
+ """
|
|
|
+ df = self._get_data_by_filter()
|
|
|
+
|
|
|
+ if df.empty:
|
|
|
+ return {"timestamps": [], "channels": [], "unit": "°C"}
|
|
|
+
|
|
|
+ # 定义所有需要检查的温度列
|
|
|
+ temp_cols_all = [
|
|
|
+ 'main_bearing_temperature',
|
|
|
+ 'gearbox_oil_temperature',
|
|
|
+ 'generatordrive_end_bearing_temperature',
|
|
|
+ 'generatornon_drive_end_bearing_temperature'
|
|
|
+ ]
|
|
|
+ # 选择实际存在的列
|
|
|
+ temp_cols = [c for c in temp_cols_all if c in df.columns]
|
|
|
+
|
|
|
+ # 如果没有温度数据列,返回空数据
|
|
|
+ if not temp_cols:
|
|
|
+ return {"timestamps": [], "channels": [], "unit": "°C"}
|
|
|
+
|
|
|
+ # 转数值,并删除 NaN
|
|
|
+ df[temp_cols] = df[temp_cols].apply(pd.to_numeric, errors='coerce')
|
|
|
+ df = df.dropna(subset=temp_cols + ['time_stamp'])
|
|
|
+
|
|
|
+ # 转时间戳为 `YYYY-MM-DD HH:MM:SS` 格式
|
|
|
+ df['time_stamp'] = pd.to_datetime(df['time_stamp']).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
+ df = df.sort_values('time_stamp').reset_index(drop=True)
|
|
|
+
|
|
|
+ # 时间戳格式化为 ISO 8601 字符串
|
|
|
+ timestamps = df['time_stamp'].tolist()
|
|
|
+
|
|
|
+ # 对每个通道,收集它在相应行的数值
|
|
|
+ channels_data = []
|
|
|
+ for col in temp_cols:
|
|
|
+ channels_data.append({
|
|
|
+ "temp_channel": col,
|
|
|
+ "values": df[col].tolist()
|
|
|
+ })
|
|
|
+
|
|
|
+ return {
|
|
|
+ "timestamps": timestamps,
|
|
|
+ "channels": channels_data,
|
|
|
+ "unit": "°C"
|
|
|
+ }
|
|
|
|