1 month ago · 7bc10b1011
--- a/Temp_Diag.PY
+++ b/Temp_Diag.PY
@@ -1,335 +1,339 @@
 
				-# @Time    : 2019-6-19
			
 
				-# @Author  : wilbur.cheng@SKF
			
 
				-# @FileName: baseMSET.py
			
 
				-# @Software: a package in skf anomaly detection library to realize multivariate state estimation technique
			
 
				-#            this package has included all the functions used in MSET, similarity calculation, state memory matrix D,
			
 
				-#            normal state matrix L, residual calculation, and the basic SPRT (Sequential Probability Ratio Test)
			
 
				-#            function for binary hypothesis testing.
			
 
				-#
			
 
				+# temp_diag.py
			
 
				 
			
 
				 import numpy as np
			
 
				 import pandas as pd
			
 
				-import matplotlib.pyplot as plt
			
 
				 from sklearn.neighbors import BallTree
			
 
				-import openpyxl
			
 
				-import time
			
 
				-#from scikit-learn.neighbors import BallTree
			
 
				+from sqlalchemy import create_engine, text
			
 
				+import math
			
 
				 
			
 
				-class MSET():
			
 
				+class MSET_Temp:
			
 
				+    """
			
 
				+    基于 MSET + SPRT 的温度趋势/阈值分析类。
			
 
				+    查询条件由 wind_turbine_number 列和 time_stamp 范围决定，
			
 
				+    SPRT 阈值固定为 0.99，calcSPRT 输出在 [-1,1]。
			
 
				+    """
			
 
				 
			
 
				-    matrixD = None
			
 
				-    matrixL = None
			
 
				-    healthyResidual = None
			
 
				-
			
 
				-    normalDataBallTree = None
			
 
				-
			
 
				-
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.model = None
			
 
				-
			
 
				-
			
 
				-    def calcSimilarity(self, x, y, m = 'euc'):
			
 
				+    def __init__(self, windCode: str, windTurbineNumberList: list[str], startTime: str, endTime: str):
			
 
				         """
			
 
				-        Calcualte the similartity of two feature vector
			
 
				-        :param x: one of input feature list
			
 
				-        :param y: one of input feature list
			
 
				-        :param m: method of the similarity calculation method, default is the Eucilidean distance named 'euc';
			
 
				-                    a city block distance function is used when m set to "cbd'.
			
 
				-        :return: the two feature similarity, float, range (0,1)
			
 
				+        :param windCode: 风机类型或机组代码，用于拼表名。例如 "WOG01312" → 表名 "WOG01312_minute"
			
 
				+        :param windTurbineNumberList: 要查询的 wind_turbine_number（风机编号）列表
			
 
				+        :param startTime: 起始时间（字符串），格式 "YYYY-MM-DD HH:MM" 
			
 
				+        :param endTime: 结束时间（字符串），格式 "YYYY-MM-DD HH:MM"
			
 
				         """
			
 
				+        self.windCode = windCode.strip()
			
 
				+        self.windTurbineNumberList = windTurbineNumberList
			
 
				+        # 强制保留到秒
			
 
				+        self.startTime = self._truncate_to_seconds(startTime)
			
 
				+        self.endTime   = self._truncate_to_seconds(endTime)
			
 
				+
			
 
				+        # D/L 矩阵相关
			
 
				+        self.matrixD = None
			
 
				+        self.matrixL = None
			
 
				+        self.healthyResidual = None
			
 
				+        self.normalDataBallTree = None
			
 
				+
			
 
				+    def _truncate_to_seconds(self, dt_str: str) -> str:
			
 
				+        """
			
 
				+        将用户可能传进来的 ISO 时间字符串或包含毫秒的字符串
			
 
				+        截断到秒，返回 "YYYY-MM-DD HH:MM:SS" 格式。
			
 
				+        例如： "2025-06-01T12:34:56.789Z" → "2025-06-01 12:34:56"
			
 
				+        """
			
 
				+        # 先将 'T' 替换成空格，去掉尾部可能的 "Z"
			
 
				+        s = dt_str.replace("T", " ").rstrip("Z")
			
 
				+        # 如果含有小数秒，截断
			
 
				+        if "." in s:
			
 
				+            s = s.split(".")[0]
			
 
				+        # 如果还有 "+xx:xx" 时区后缀，也截断
			
 
				+        if "+" in s:
			
 
				+            s = s.split("+")[0]
			
 
				+        return s.strip()
			
 
				+
			
 
				+    def _get_data_by_filter(self) -> pd.DataFrame:
			
 
				+        """
			
 
				+        按 wind_turbine_number 列和 time_stamp 时间范围批量查询，
			
 
				+        返回一个完整的 DataFrame（已按 time_stamp 升序排序）。
			
 
				+        """
			
 
				+        table_name = f"{self.windCode}_minute"
			
 
				+        engine = create_engine(
			
 
				+            "mysql+pymysql://root:admin123456@106.120.102.238:10336/energy_data_prod"
			
 
				+        )
			
 
				+
			
 
				+        # 准备 wind_turbine_number 列表的 SQL 片段：('WT1','WT2',...)
			
 
				+        turbines = ",".join(f"'{wt.strip()}'" for wt in self.windTurbineNumberList)
			
 
				+        sql = text(f"""
			
 
				+            SELECT *
			
 
				+            FROM {table_name}
			
 
				+            WHERE wind_turbine_number IN ({turbines})
			
 
				+              AND time_stamp BETWEEN :start AND :end
			
 
				+            ORDER BY time_stamp ASC
			
 
				+        """)
			
 
				+
			
 
				+        df = pd.read_sql(sql, engine, params={"start": self.startTime, "end": self.endTime})
			
 
				+        return df
			
 
				+
			
 
				+    def calcSimilarity(self, x: np.ndarray, y: np.ndarray, m: str = 'euc') -> float:
			
 
				+        """
			
 
				+        计算向量 x 与 y 的相似度，(0,1] 区间：
			
 
				+          - m='euc' → 欧氏距离
			
 
				+          - m='cbd' → 城市街区距离
			
 
				+        """
			
 
				+        if len(x) != len(y):
			
 
				+            return 0.0
			
 
				 
			
 
				-        if (len(x) != len(y)):
			
 
				-            return 0
			
 
				-
			
 
				-        if (m == 'cbd'):
			
 
				-            dSimilarity = [1/(1+np.abs(p-q)) for p,q in zip(x,y)]
			
 
				-            dSimilarity = np.sum(dSimilarity)/len(x)
			
 
				+        if m == 'cbd':
			
 
				+            arr = [1.0 / (1.0 + abs(p - q)) for p, q in zip(x, y)]
			
 
				+            return float(np.sum(arr) / len(arr))
			
 
				         else:
			
 
				-            dSimilarity = [np.power(p-q,2) for p, q in zip(x, y)]
			
 
				-            dSimilarity = 1/(1+np.sqrt(np.sum(dSimilarity)))
			
 
				-
			
 
				-        return dSimilarity
			
 
				-
			
 
				+            diffsq = [(p - q) ** 2 for p, q in zip(x, y)]
			
 
				+            return float(1.0 / (1.0 + math.sqrt(np.sum(diffsq))))
			
 
				 
			
 
				-    def genDLMatrix(self, trainDataset, dataSize4D=100, dataSize4L=50):
			
 
				+    def genDLMatrix(self, trainDataset: np.ndarray, dataSize4D=100, dataSize4L=50) -> int:
			
 
				         """
			
 
				-        automatically generate the D and L matrix from training data set, assuming the training data set is all normal
			
 
				-        state data.
			
 
				-        :param trainDataset: 2D array, [count of data， length of feature]
			
 
				-        :param dataSize4D: minimized count of state for matrix D
			
 
				-        :param dataSize4L: minimized count of state for matrix L
			
 
				-        :return: 0 if successful, -1 if fail
			
 
				+        根据训练集 trainDataset 生成 D/L 矩阵：
			
 
				+          - 若样本数 < dataSize4D + dataSize4L，返回 -1
			
 
				+          - 否则构造 matrixD、matrixL，并用局部加权回归获得 healthyResidual，返回 0
			
 
				         """
			
 
				-        [m,n] = np.shape(trainDataset)
			
 
				-
			
 
				+        m, n = trainDataset.shape
			
 
				         if m < dataSize4D + dataSize4L:
			
 
				-            print('training dataset is too small to generate the matrix D and L')
			
 
				             return -1
			
 
				 
			
 
				+        # Step1：每个特征的最小/最大样本加入 matrixD
			
 
				         self.matrixD = []
			
 
				         selectIndex4D = []
			
 
				-        # Step 1: add all the state with minimum or maximum value in each feature into Matrix D
			
 
				-
			
 
				-        for i in range(0, n):
			
 
				-            feature_i = trainDataset[:,i]
			
 
				-            minIndex = np.argmin(feature_i)
			
 
				-            maxIndex = np.argmax(feature_i)
			
 
				-
			
 
				-            self.matrixD.append(trainDataset[minIndex, :].tolist())
			
 
				-            selectIndex4D.append(minIndex)
			
 
				-            self.matrixD.append(trainDataset[maxIndex, :].tolist())
			
 
				-            selectIndex4D.append(maxIndex)
			
 
				-
			
 
				-        # Step 2: iteratively add the state with the maximum average distance to the states in selected matrixD
			
 
				-        while(1):
			
 
				-
			
 
				-            if (len(selectIndex4D) >= dataSize4D):
			
 
				-                break
			
 
				-
			
 
				-            # Get the free state list
			
 
				-            freeStateList = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
			
 
				-
			
 
				-            # Calculate the average dist of each state in free to selected state in matrix D
			
 
				-            distList = []
			
 
				-            for i in freeStateList:
			
 
				-                tmpState = trainDataset[i]
			
 
				-                tmpDist = [1-self.calcSimilarity(x, tmpState) for x in self.matrixD]
			
 
				-                distList.append(np.mean(tmpDist))
			
 
				-
			
 
				-            # select the free state with largest average distance to states in matrixD, and update matrixD
			
 
				-            selectId = freeStateList[distList.index(np.max(distList))]
			
 
				-            self.matrixD.append(trainDataset[selectId, :].tolist())
			
 
				-            selectIndex4D.append(selectId)
			
 
				-        #format matrixD from list to array
			
 
				-        self.matrixD = np.array(self.matrixD)
			
 
				-        self.normalDataBallTree = BallTree(self.matrixD, leaf_size=4, metric = lambda i,j: 1-self.calcSimilarity(i,j))
			
 
				+        for i in range(n):
			
 
				+            col_i = trainDataset[:, i]
			
 
				+            idx_min = np.argmin(col_i)
			
 
				+            idx_max = np.argmax(col_i)
			
 
				+            self.matrixD.append(trainDataset[idx_min, :].tolist())
			
 
				+            selectIndex4D.append(idx_min)
			
 
				+            self.matrixD.append(trainDataset[idx_max, :].tolist())
			
 
				+            selectIndex4D.append(idx_max)
			
 
				+
			
 
				+        # Step2：对剩余样本逐步选出“与 matrixD 平均距离最大”的样本，直至 matrixD 行数 = dataSize4D
			
 
				+        while len(selectIndex4D) < dataSize4D:
			
 
				+            freeList = list(set(range(len(trainDataset))) - set(selectIndex4D))
			
 
				+            distAvg = []
			
 
				+            for idx in freeList:
			
 
				+                tmp = trainDataset[idx, :]
			
 
				+                dlist = [1.0 - self.calcSimilarity(x, tmp) for x in self.matrixD]
			
 
				+                distAvg.append(np.mean(dlist))
			
 
				+            select_id = freeList[int(np.argmax(distAvg))]
			
 
				+            self.matrixD.append(trainDataset[select_id, :].tolist())
			
 
				+            selectIndex4D.append(select_id)
			
 
				 
			
 
				+        self.matrixD = np.array(self.matrixD)
			
 
				 
			
 
				-        # Step 3. select remaining state for matrix L
			
 
				-        #index4L = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
			
 
				-        #self.matrixL = trainDataset[index4L, :]
			
 
				-        # consider the limited dataset, using all the train data to matrix L
			
 
				-        self.matrixL = trainDataset
			
 
				+        # 用 matrixD 建 BallTree，用于局部加权回归
			
 
				+        self.normalDataBallTree = BallTree(
			
 
				+            self.matrixD,
			
 
				+            leaf_size=4,
			
 
				+            metric=lambda a, b: 1.0 - self.calcSimilarity(a, b)
			
 
				+        )
			
 
				 
			
 
				-        # Calculate the healthy Residual from matrix L
			
 
				-        lamdaRatio = 1e-3
			
 
				-        [m, n] = np.shape(self.matrixD)
			
 
				-        self.DDSimilarity = np.array([[1-self.calcSimilarity(x,y) for x in self.matrixD] for y in self.matrixD] + lamdaRatio*np.eye(n))
			
 
				-        self.DDSimilarity = 1/self.DDSimilarity
			
 
				+        # Step3：把所有训练样本都作为 matrixL
			
 
				+        self.matrixL = trainDataset.copy()
			
 
				 
			
 
				-        #self.healthyResidual = self.calcResidual(self.matrixL)
			
 
				+        # Step4：用局部加权回归算出健康残差
			
 
				         self.healthyResidual = self.calcResidualByLocallyWeightedLR(self.matrixL)
			
 
				-
			
 
				-
			
 
				         return 0
			
 
				 
			
 
				-    def calcResidualByLocallyWeightedLR(self, newStates):
			
 
				+    def calcResidualByLocallyWeightedLR(self, newStates: np.ndarray) -> np.ndarray:
			
 
				         """
			
 
				-        find the K-nearest neighbors for each input state, then calculate the estimated state by locally weighted average.
			
 
				-        :param newStates: input states list
			
 
				-        :return: residual R_x
			
 
				+        对 newStates 中每个样本，使用 matrixD 的前 20 个最近邻做局部加权回归，计算残差。
			
 
				+        返回形状 [len(newStates), 特征数] 的残差矩阵。
			
 
				         """
			
 
				-        [m,n] = np.shape(newStates)
			
 
				-        est_X = []
			
 
				-        # print(newStates)
			
 
				+        est_list = []
			
 
				         for x in newStates:
			
 
				-            (dist, iList) = self.normalDataBallTree.query([x], 20, return_distance=True)
			
 
				-            weight = 1/(dist[0]+1e-1)
			
 
				-            weight = weight/sum(weight)
			
 
				-            eState = np.sum([w*self.matrixD[i] for w,i in zip(weight, iList[0])])
			
 
				-            est_X.append(eState)
			
 
				-
			
 
				-        est_X = np.reshape(est_X, (len(est_X),1))
			
 
				-        # print(est_X)
			
 
				-        # print(newStates)
			
 
				-        return est_X - newStates
			
 
				-
			
 
				-    def calcStateResidual(self, newsStates):
			
 
				-        stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
			
 
				-        return stateResidual
			
 
				-
			
 
				-    def calcSPRT(self, newsStates, feature_weight, alpha=0.1, beta=0.1, decisionGroup=5):
			
 
				+            dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
			
 
				+            w = 1.0 / (dist[0] + 1e-1)
			
 
				+            w = w / np.sum(w)
			
 
				+            est = np.sum([w_i * self.matrixD[j] for w_i, j in zip(w, idxs[0])], axis=0)
			
 
				+            est_list.append(est)
			
 
				+        est_arr = np.reshape(np.array(est_list), (len(est_list), -1))
			
 
				+        return est_arr - newStates
			
 
				+
			
 
				+    def calcSPRT(
			
 
				+        self,
			
 
				+        newsStates: np.ndarray,
			
 
				+        feature_weight: np.ndarray,
			
 
				+        alpha: float = 0.1,
			
 
				+        beta: float = 0.1,
			
 
				+        decisionGroup: int = 5
			
 
				+    ) -> list[float]:
			
 
				         """
			
 
				-        anomaly detection based Wald's SPRT algorithm, refer to A.Wald, Sequential Analysis,Wiley, New York, NY, USA, 1947
			
 
				-        :param newsStates: input states list
			
 
				-        :param feature_weight: the important weight for each feature, Normalized and Nonnegative
			
 
				-        :param alpha: prescribed false alarm rate， 0 < alpha < 1
			
 
				-        :param beta: prescribed miss alarm rate, 0 < beta < 1
			
 
				-        :param decisionGroup: length of the test sample when the decision is done
			
 
				-
			
 
				-        :return: anomaly flag for each group of states, 1:anomaly, -1:normal, (-1:1): unable to decision
			
 
				+        对 newsStates 运行 Wald-SPRT，返回得分列表，长度 = len(newsStates) - decisionGroup + 1，
			
 
				+        分数在 [-1, 1]：
			
 
				+          - 越接近 1 → 越“异常（危险）”
			
 
				+          - 越接近 -1 → 越“正常”
			
 
				         """
			
 
				-
			
 
				-        # Step 1. transfer the raw residual vector to dimension reduced residual using feature weight
			
 
				-        #stateResidual = self.calcResidual(newsStates)
			
 
				-        stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
			
 
				-        # print(stateResidual)
			
 
				-        weightedStateResidual = [np.dot(x, feature_weight) for x in stateResidual]
			
 
				-        # print(weightedStateResidual)
			
 
				+        # 1) 计算残差并做特征加权
			
 
				+        stateRes = self.calcResidualByLocallyWeightedLR(newsStates)
			
 
				+        weightedStateResidual = [np.dot(x, feature_weight) for x in stateRes]
			
 
				         weightedHealthyResidual = [np.dot(x, feature_weight) for x in self.healthyResidual]
			
 
				 
			
 
				-        '''
			
 
				-        plt.subplot(211)
			
 
				-        plt.plot(weightedHealthyResidual)
			
 
				-        plt.xlabel('Sample index')
			
 
				-        plt.ylabel('Healthy State Residual')
			
 
				-        plt.subplot(212)
			
 
				-        plt.plot(weightedStateResidual)
			
 
				-        plt.xlabel('Sample index')
			
 
				-        plt.ylabel('All State Residual')
			
 
				-        plt.show()
			
 
				-        '''
			
 
				-        # Calculate the distribution of health state residual
			
 
				-        mu0 = np.mean(weightedHealthyResidual)
			
 
				-        sigma0 = np.std(weightedHealthyResidual)
			
 
				-
			
 
				-
			
 
				-        #sigma1 = np.std(weightedStateResidual)
			
 
				-
			
 
				-        lowThres = np.log(beta/(1-alpha))
			
 
				-        highThres = np.log((1-beta)/alpha)
			
 
				-
			
 
				-        flag = []
			
 
				-        for i in range(0, len(newsStates)-decisionGroup+1):
			
 
				-            # For each group to calculate the new state residual distribution
			
 
				-            # Then check the  hypothesis testing results
			
 
				-            mu1 = np.mean(weightedStateResidual[i:i+decisionGroup])
			
 
				-            si = np.sum(weightedStateResidual[i:i+decisionGroup])*(mu1-mu0)/sigma0**2 - decisionGroup*(mu1**2-mu0**2)/(2*sigma0**2)
			
 
				-
			
 
				-            if (si > highThres):
			
 
				-                si = highThres
			
 
				-            if (si < lowThres):
			
 
				-                si = lowThres
			
 
				-
			
 
				-            if (si > 0):
			
 
				-                si = si/highThres
			
 
				-            if (si < 0):
			
 
				-                si = si/lowThres
			
 
				-
			
 
				-            flag.append(si)
			
 
				-
			
 
				-        return flag
			
 
				-
			
 
				-if __name__=='__main__':
			
 
				-
			
 
				-    start_time = time.time()
			
 
				-    myMSET = MSET()
			
 
				-    # 1、计算各子系统的健康度(子系统包括：发电机组、传动系统(直驱机组无齿轮箱、无数据)、机舱系统、变流器系统、电网环境、辅助系统（无数据))
			
 
				-    # 1.1、发电机组健康度评分
			
 
				-    Title = pd.read_excel(r'34_QITAIHE.xlsx', header=None, nrows=1, usecols=[12, 13, 14])
			
 
				-    df_D = pd.read_excel(r'34_QITAIHE.xlsx',
			
 
				-                           usecols=[0,12, 13, 14,], parse_dates=True)  # 读取温度指标：齿轮箱油温12、驱动侧发电机轴承温度13、非驱动侧发电机轴承温度14
			
 
				-
			
 
				-    cols = df_D.columns
			
 
				-    df_D[cols] = df_D[cols].apply(pd.to_numeric, errors='coerce')
			
 
				-    df_D = df_D.dropna()  # 删除空行和非数字项
			
 
				-
			
 
				-    x_date = df_D.iloc[len(df_D) // 2 + 1:, 0]  #获取时间序列
			
 
				-    x_date = pd.to_datetime(x_date)
			
 
				-    df_Temp = df_D.iloc[:, 1:]  #获取除时间列以外的数据
			
 
				-    df_Temp_values = df_Temp.values
			
 
				-    df_Temp_values = np.array(df_Temp_values)
			
 
				-    [m, n] = np.shape(df_Temp_values)
			
 
				-    # Residual = []
			
 
				-    flag_Spart_data = []
			
 
				-    for i in range(0, n):
			
 
				-        df_Temp_i = df_Temp_values[:, i]
			
 
				-        trainDataSet_data = df_Temp_i[0:len(df_Temp_i) // 2]
			
 
				-        testDataSet_data = df_Temp_i[len(df_Temp_i) // 2 + 1:]
			
 
				-        trainDataSet_data = np.reshape(trainDataSet_data, (len(trainDataSet_data), 1))
			
 
				-        testDataSet_data = np.reshape(testDataSet_data, (len(testDataSet_data), 1))
			
 
				-
			
 
				-        myMSET.genDLMatrix(trainDataSet_data, 60, 5)
			
 
				-        # stateResidual = self.calcStateResidual(testDataSet_data)
			
 
				-        flag_data = myMSET.calcSPRT(testDataSet_data, np.array(1), decisionGroup=1)
			
 
				-        # Residual.append(stateResidual)
			
 
				-        flag_Spart_data.append(flag_data)
			
 
				-    flag_Spart_data = np.array(flag_Spart_data)
			
 
				-    flag_Spart_data = flag_Spart_data.T
			
 
				-    Temp1 = flag_Spart_data[:,0]
			
 
				-    Temp2 = flag_Spart_data[:,1]
			
 
				-    Temp3 = flag_Spart_data[:,2]
			
 
				-    Temp1_lable = Title.iloc[0, 0]
			
 
				-    Temp2_lable = Title.iloc[0, 1]
			
 
				-    Temp3_lable = Title.iloc[0, 2]
			
 
				-
			
 
				-    print(x_date)
			
 
				-
			
 
				-    # alarmedFlag = np.array([[i, Temp1[i]] for i, x in enumerate(Temp1) if x > 0.99])  # Flag中选出大于0.99的点
			
 
				-
			
 
				-    plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
			
 
				-    plt.close('all')
			
 
				-    plt.subplot(311)
			
 
				-    plt.plot(x_date, Temp1, 'b-o', label=Temp1_lable)
			
 
				-    plt.ylabel(Temp1_lable)
			
 
				-    plt.xlabel('时间')
			
 
				-
			
 
				-    # plt.scatter(alarmedFlag1[:, 0], alarmedFlag1[:, 2], marker='x', c='r')
			
 
				-    plt.subplot(312)
			
 
				-    plt.plot(x_date, Temp2)
			
 
				-    plt.ylabel(Temp2_lable)
			
 
				-    plt.xlabel('时间')
			
 
				-    #plt.scatter(alarmedFlag[:, 0], alarmedFlag[:, 1], marker='x', c='r')
			
 
				-    plt.subplot(313)
			
 
				-    plt.plot(x_date, Temp3)
			
 
				-    plt.ylabel(Temp3_lable)
			
 
				-    plt.xlabel('时间')
			
 
				-    #plt.scatter(alarmedFlag[:, 0], alarmedFlag[:, 1], marker='x', c='r')
			
 
				-    plt.show()
			
 
				-    print(Title)
			
 
				-    print(flag_Spart_data)
			
 
				-    print(np.shape(flag_Spart_data))
			
 
				-
			
 
				-
			
 
				-    end_time = time.time()
			
 
				-    execution_time = end_time - start_time
			
 
				-    print(f"Execution time: {execution_time} seconds")
			
 
				-
			
 
				-
			
 
				-
			
 
				-#
			
 
				-
			
 
				-    '''
			
 
				-    f = open("speed_vib.txt", "r")
			
 
				-    data1 = f.read()
			
 
				-    f.close()
			
 
				-    data1 = data1.split('\n')
			
 
				-    rpm = [(row.split('\t')[0]).strip() for row in data1]
			
 
				-    vib = [(row.split('\t')[-1]).strip() for row in data1]
			
 
				-
			
 
				-    # print(rpm)
			
 
				-    rpm = np.array(rpm).astype(np.float64)
			
 
				-    vib = np.array(vib).astype(np.float64)
			
 
				-
			
 
				-    #vib = [(x-np.mean(vib))/np.std(vib) for x in vib]
			
 
				-    #print(vib)
			
 
				-    trainDataSet = [vib[i] for i in range(0,100) if vib[i] < 5]
			
 
				-    trainDataSet = np.reshape(trainDataSet,(len(trainDataSet),1))
			
 
				-    testDataSet = np.reshape(vib, (len(vib),1))
			
 
				-    '''
			
 
				-
			
 
				-
			
 
				-    # Title = pd.read_csv(r'F1710001001.csv', header=None, nrows=1, usecols=[36,37,38], encoding='gbk')
			
 
				-
			
 
				-    '''
			
 
				-    alarmedFlag = np.array([[i, flag[i]] for i, x in enumerate(flag) if x > 0.99])  # Flag中选出大于0.99的点
			
 
				-    plt.close('all')
			
 
				-    plt.subplot(211)
			
 
				-    plt.plot(testDataSet)
			
 
				-    plt.ylabel('Vibration')
			
 
				-    plt.xlabel('Sample index')
			
 
				-    plt.subplot(212)
			
 
				-    plt.plot(flag)
			
 
				-    plt.ylabel('SPRT results')
			
 
				-    plt.xlabel('Sample index')
			
 
				-    plt.scatter(alarmedFlag[:,0], alarmedFlag[:,1], marker='x',c='r')
			
 
				-
			
 
				-    plt.show()
			
 
				-    '''
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				+        # 2) 健康残差的分布统计
			
 
				+        mu0 = float(np.mean(weightedHealthyResidual))
			
 
				+        sigma0 = float(np.std(weightedHealthyResidual))
			
 
				+
			
 
				+        # 3) 计算 SPRT 的上下阈值
			
 
				+        lowThres = np.log(beta / (1.0 - alpha))    # < 0
			
 
				+        highThres = np.log((1.0 - beta) / alpha)   # > 0
			
 
				+
			
 
				+        flags: list[float] = []
			
 
				+        length = len(weightedStateResidual)
			
 
				+        for i in range(0, length - decisionGroup + 1):
			
 
				+            segment = weightedStateResidual[i : i + decisionGroup]
			
 
				+            mu1 = float(np.mean(segment))
			
 
				+            si = (
			
 
				+                np.sum(segment) * (mu1 - mu0) / (sigma0**2)
			
 
				+                - decisionGroup * ((mu1**2) - (mu0**2)) / (2.0 * (sigma0**2))
			
 
				+            )
			
 
				+
			
 
				+            # 限制 si 在 [lowThres, highThres] 之内
			
 
				+            si = max(min(si, highThres), lowThres)
			
 
				+            # 正负归一化
			
 
				+            if si > 0:
			
 
				+                norm_si = float(si / highThres)
			
 
				+            else:
			
 
				+                norm_si = float(si / lowThres)
			
 
				+            flags.append(norm_si)
			
 
				+
			
 
				+        return flags
			
 
				+
			
 
				+    def check_threshold(self) -> pd.DataFrame:
			
 
				+        """
			
 
				+        阈值分析（阈值固定 0.99）。返回长格式 DataFrame，列：
			
 
				+          ["time_stamp", "temp_channel", "SPRT_score", "status"]
			
 
				+        status = "危险" if SPRT_score > 0.99 else "正常"。
			
 
				+        """
			
 
				+        THRESHOLD = 0.99
			
 
				+
			
 
				+        # 1) 按风机编号 + 时间范围查询原始数据
			
 
				+        df_concat = self._get_data_by_filter()
			
 
				+        if df_concat.empty:
			
 
				+            return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
			
 
				+
			
 
				+        # 2) 筛选存在的温度列
			
 
				+        temp_cols_all = [
			
 
				+            'main_bearing_temperature',
			
 
				+            'gearbox_oil_temperature',
			
 
				+            'generatordrive_end_bearing_temperature',
			
 
				+            'generatornon_drive_end_bearing_temperature'
			
 
				+        ]
			
 
				+        temp_cols = [c for c in temp_cols_all if c in df_concat.columns]
			
 
				+        if not temp_cols:
			
 
				+            return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
			
 
				+
			
 
				+        # 3) 转数值 & 删除 NaN
			
 
				+        df_concat[temp_cols] = df_concat[temp_cols].apply(pd.to_numeric, errors='coerce')
			
 
				+        df_concat = df_concat.dropna(subset=temp_cols + ['time_stamp'])
			
 
				+        if df_concat.empty:
			
 
				+            return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
			
 
				+
			
 
				+        # 4) time_stamp 转 datetime
			
 
				+        df_concat['time_stamp'] = pd.to_datetime(df_concat['time_stamp'])
			
 
				+        x_date = df_concat['time_stamp']
			
 
				+
			
 
				+        # 5) 抽取温度列到 NumPy 数组
			
 
				+        arr = df_concat[temp_cols].values  # shape = [总记录数, 通道数]
			
 
				+        m, n = arr.shape
			
 
				+        half = m // 2
			
 
				+
			
 
				+        all_flags: list[list[float]] = []
			
 
				+        for i in range(n):
			
 
				+            channel = arr[:, i]
			
 
				+            train = channel[:half].reshape(-1, 1)
			
 
				+            test  = channel[half:].reshape(-1, 1)
			
 
				+
			
 
				+            # 用训练集构造 D/L 矩阵
			
 
				+            if self.genDLMatrix(train, dataSize4D=60, dataSize4L=5) != 0:
			
 
				+                # 如果训练集样本不足，直接返回空表
			
 
				+                return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
			
 
				+
			
 
				+            feature_w = np.array([1.0])
			
 
				+            flags = self.calcSPRT(test, feature_w, decisionGroup=1)
			
 
				+            all_flags.append(flags)
			
 
				+
			
 
				+        # 6) 合并为宽表，再 melt 成长表
			
 
				+        flags_arr = np.array(all_flags)  # shape = [通道数, 测试样本数]
			
 
				+        num_test = flags_arr.shape[1]
			
 
				+        ts = x_date.iloc[half : half + num_test].reset_index(drop=True)
			
 
				+
			
 
				+        wide = pd.DataFrame({"time_stamp": ts})
			
 
				+        for idx, col in enumerate(temp_cols):
			
 
				+            wide[col] = flags_arr[idx, :]
			
 
				+
			
 
				+        df_long = wide.melt(
			
 
				+            id_vars=["time_stamp"],
			
 
				+            value_vars=temp_cols,
			
 
				+            var_name="temp_channel",
			
 
				+            value_name="SPRT_score"
			
 
				+        )
			
 
				+        # 把 time_stamp 从 datetime 转成字符串，格式 "YYYY-MM-DD HH:MM:SS" 
			
 
				+        df_long['time_stamp'] = pd.to_datetime(df_long['time_stamp']).dt.strftime("%Y-%m-%d %H:%M:%S")
			
 
				+
			
 
				+        # 7) 添加状态列：SPRT_score > 0.99 → “危险”，否则 “正常”
			
 
				+        df_long['status'] = df_long['SPRT_score'].apply(
			
 
				+            lambda x: "危险" if x > THRESHOLD else "正常"
			
 
				+        )
			
 
				+
			
 
				+        return df_long
			
 
				+
			
 
				+    def get_trend(self) -> dict:
			
 
				+        """
			
 
				+        趋势分析
			
 
				+        获取温度趋势：将温度数据按时间返回。
			
 
				+        返回格式：{
			
 
				+            "timestamps": [ISO8601 字符串列表],
			
 
				+            "channels": [
			
 
				+                {"temp_channel": "main_bearing_temperature", "values": [浮点列表]},
			
 
				+                {"temp_channel": "gearbox_oil_temperature", "values": [...]},
			
 
				+                ...
			
 
				+            ],
			
 
				+            "unit": "°C"
			
 
				+        }
			
 
				+        """
			
 
				+        df = self._get_data_by_filter()
			
 
				+
			
 
				+        if df.empty:
			
 
				+            return {"timestamps": [], "channels": [], "unit": "°C"}
			
 
				+
			
 
				+        # 定义所有需要检查的温度列
			
 
				+        temp_cols_all = [
			
 
				+            'main_bearing_temperature',
			
 
				+            'gearbox_oil_temperature',
			
 
				+            'generatordrive_end_bearing_temperature',
			
 
				+            'generatornon_drive_end_bearing_temperature'
			
 
				+        ]
			
 
				+        # 选择实际存在的列
			
 
				+        temp_cols = [c for c in temp_cols_all if c in df.columns]
			
 
				+        
			
 
				+        # 如果没有温度数据列，返回空数据
			
 
				+        if not temp_cols:
			
 
				+            return {"timestamps": [], "channels": [], "unit": "°C"}
			
 
				+
			
 
				+        # 转数值，并删除 NaN
			
 
				+        df[temp_cols] = df[temp_cols].apply(pd.to_numeric, errors='coerce')
			
 
				+        df = df.dropna(subset=temp_cols + ['time_stamp'])
			
 
				+
			
 
				+        # 转时间戳为 `YYYY-MM-DD HH:MM:SS` 格式
			
 
				+        df['time_stamp'] = pd.to_datetime(df['time_stamp']).dt.strftime("%Y-%m-%d %H:%M:%S")
			
 
				+        df = df.sort_values('time_stamp').reset_index(drop=True)
			
 
				+
			
 
				+        # 时间戳格式化为 ISO 8601 字符串
			
 
				+        timestamps = df['time_stamp'].tolist()
			
 
				+
			
 
				+        # 对每个通道，收集它在相应行的数值
			
 
				+        channels_data = []
			
 
				+        for col in temp_cols:
			
 
				+            channels_data.append({
			
 
				+                "temp_channel": col,
			
 
				+                "values": df[col].tolist()
			
 
				+            })
			
 
				+
			
 
				+        return {
			
 
				+            "timestamps": timestamps,
			
 
				+            "channels": channels_data,
			
 
				+            "unit": "°C"
			
 
				+        }
			
 
				 
			
--- a/__pycache__/Temp_Diag.cpython-39.pyc
+++ b/__pycache__/Temp_Diag.cpython-39.pyc
--- a/__pycache__/api_tempdiag.cpython-39.pyc
+++ b/__pycache__/api_tempdiag.cpython-39.pyc
--- a/api_tempdiag.py
+++ b/api_tempdiag.py
@@ -0,0 +1,146 @@
 
				+# main.py
			
 
				+
			
 
				+from fastapi import FastAPI, HTTPException
			
 
				+from fastapi.responses import JSONResponse
			
 
				+from typing import List
			
 
				+from pydantic import BaseModel, model_validator
			
 
				+import uvicorn
			
 
				+
			
 
				+from Temp_Diag import MSET_Temp  # 上面保存的 MSET_Temp 类
			
 
				+
			
 
				+app = FastAPI(title="Temperature Monitoring API")
			
 
				+
			
 
				+
			
 
				+class TemperatureInput(BaseModel):
			
 
				+    windCode: str
			
 
				+    windTurbineNumberList: List[str]
			
 
				+    startTime: str  # e.g. "2024-06-08 00:00"
			
 
				+    endTime: str    # e.g. "2024-06-08 01:00"
			
 
				+
			
 
				+    @model_validator(mode='before')
			
 
				+    def normalize_fields(cls, values):
			
 
				+        # 确保 windTurbineNumberList 是列表
			
 
				+        raw = values.get('windTurbineNumberList')
			
 
				+        if isinstance(raw, str):
			
 
				+            values['windTurbineNumberList'] = [raw]
			
 
				+        return values
			
 
				+
			
 
				+# 阈值分析
			
 
				+@app.post("/temperature/threshold")
			
 
				+async def route_threshold(input_data: TemperatureInput):
			
 
				+    """
			
 
				+    阈值分析接口（阈值固定 0.99）：
			
 
				+      - 输入：
			
 
				+        {
			
 
				+          "windCode": "WOF01000010",
			
 
				+          "windTurbineNumberList": ["WOG00542"],
			
 
				+          "startTime": "2023-01-01 00:00",
			
 
				+          "endTime": "2023-01-05 12:00"
			
 
				+        }
			
 
				+      - 返回：
			
 
				+        {
			
 
				+          "data": {
			
 
				+            "type": "temperature_threshold",
			
 
				+            "records": [
			
 
				+              {
			
 
				+                "time_stamp": "2024-06-08 00:05:00",
			
 
				+                "temp_channel": "main_bearing_temperature",
			
 
				+                "SPRT_score": 0.123,
			
 
				+                "status": "正常"
			
 
				+              },
			
 
				+              ...
			
 
				+            ]
			
 
				+          },
			
 
				+          "code": 200,
			
 
				+          "message": "success"
			
 
				+        }
			
 
				+    """
			
 
				+    try:
			
 
				+        analyzer = MSET_Temp(
			
 
				+            windCode=input_data.windCode,
			
 
				+            windTurbineNumberList=input_data.windTurbineNumberList,
			
 
				+            startTime=input_data.startTime,
			
 
				+            endTime=input_data.endTime
			
 
				+        )
			
 
				+        df_result = analyzer.check_threshold()  # DataFrame 长格式
			
 
				+        records = df_result.to_dict(orient="records")
			
 
				+        return {
			
 
				+            "data": {
			
 
				+                "type": "temperature_threshold",
			
 
				+                "records": records
			
 
				+            },
			
 
				+            "code": 200,
			
 
				+            "message": "success"
			
 
				+        }
			
 
				+    except Exception as e:
			
 
				+        return JSONResponse(
			
 
				+            status_code=500,
			
 
				+            content={
			
 
				+                "code": 500,
			
 
				+                "message": "analysis failed",
			
 
				+                "detail": str(e)
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+# 趋势分析（暂未调用）
			
 
				+@app.post("/temperature/trend")
			
 
				+async def route_trend(input_data: TemperatureInput):
			
 
				+    """
			
 
				+    趋势分析接口：
			
 
				+      - 输入：
			
 
				+        {
			
 
				+          "windCode": "WOF01000010",
			
 
				+          "windTurbineNumberList": ["WOG00542"],
			
 
				+          "startTime": "2023-01-01 00:00",
			
 
				+          "endTime": "2023-01-05 12:00"
			
 
				+        }
			
 
				+      - 返回：
			
 
				+        {
			
 
				+          "data": {
			
 
				+            "type": "temperature_trend",
			
 
				+            "timestamps": [ "2024-06-08 00:00:00", ... ],
			
 
				+            "channels": [
			
 
				+               { "temp_channel": "main_bearing_temperature", "values": [24.5, 24.7, ...] },
			
 
				+               ...
			
 
				+            ],
			
 
				+            "unit": "°C"
			
 
				+          },
			
 
				+          "code": 200,
			
 
				+          "message": "success"
			
 
				+        }
			
 
				+    """
			
 
				+    try:
			
 
				+        analyzer = MSET_Temp(
			
 
				+            windCode=input_data.windCode,
			
 
				+            windTurbineNumberList=input_data.windTurbineNumberList,
			
 
				+            startTime=input_data.startTime,
			
 
				+            endTime=input_data.endTime
			
 
				+        )
			
 
				+        result = analyzer.get_trend()
			
 
				+        return {
			
 
				+            "data": {
			
 
				+                "type": "temperature_trend",
			
 
				+                **result
			
 
				+            },
			
 
				+            "code": 200,
			
 
				+            "message": "success"
			
 
				+        }
			
 
				+    except Exception as e:
			
 
				+        return JSONResponse(
			
 
				+            status_code=500,
			
 
				+            content={
			
 
				+                "code": 500,
			
 
				+                "message": "analysis failed",
			
 
				+                "detail": str(e)
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    uvicorn.run(
			
 
				+        "main:app",
			
 
				+        host="0.0.0.0",
			
 
				+        port=8000,
			
 
				+        reload=True
			
 
				+    )