Xmia 3 dni temu
rodzic
commit
7bc10b1011

+ 306 - 302
Temp_Diag.PY

@@ -1,335 +1,339 @@
-# @Time    : 2019-6-19
-# @Author  : wilbur.cheng@SKF
-# @FileName: baseMSET.py
-# @Software: a package in skf anomaly detection library to realize multivariate state estimation technique
-#            this package has included all the functions used in MSET, similarity calculation, state memory matrix D,
-#            normal state matrix L, residual calculation, and the basic SPRT (Sequential Probability Ratio Test)
-#            function for binary hypothesis testing.
-#
+# temp_diag.py
 
 import numpy as np
 import pandas as pd
-import matplotlib.pyplot as plt
 from sklearn.neighbors import BallTree
-import openpyxl
-import time
-#from scikit-learn.neighbors import BallTree
+from sqlalchemy import create_engine, text
+import math
 
-class MSET():
+class MSET_Temp:
+    """
+    基于 MSET + SPRT 的温度趋势/阈值分析类。
+    查询条件由 wind_turbine_number 列和 time_stamp 范围决定,
+    SPRT 阈值固定为 0.99,calcSPRT 输出在 [-1,1]。
+    """
 
-    matrixD = None
-    matrixL = None
-    healthyResidual = None
-
-    normalDataBallTree = None
-
-
-
-    def __init__(self):
-        self.model = None
-
-
-    def calcSimilarity(self, x, y, m = 'euc'):
+    def __init__(self, windCode: str, windTurbineNumberList: list[str], startTime: str, endTime: str):
         """
-        Calcualte the similartity of two feature vector
-        :param x: one of input feature list
-        :param y: one of input feature list
-        :param m: method of the similarity calculation method, default is the Eucilidean distance named 'euc';
-                    a city block distance function is used when m set to "cbd'.
-        :return: the two feature similarity, float, range (0,1)
+        :param windCode: 风机类型或机组代码,用于拼表名。例如 "WOG01312" → 表名 "WOG01312_minute"
+        :param windTurbineNumberList: 要查询的 wind_turbine_number(风机编号)列表
+        :param startTime: 起始时间(字符串),格式 "YYYY-MM-DD HH:MM" 
+        :param endTime: 结束时间(字符串),格式 "YYYY-MM-DD HH:MM"
         """
+        self.windCode = windCode.strip()
+        self.windTurbineNumberList = windTurbineNumberList
+        # 强制保留到秒
+        self.startTime = self._truncate_to_seconds(startTime)
+        self.endTime   = self._truncate_to_seconds(endTime)
+
+        # D/L 矩阵相关
+        self.matrixD = None
+        self.matrixL = None
+        self.healthyResidual = None
+        self.normalDataBallTree = None
+
+    def _truncate_to_seconds(self, dt_str: str) -> str:
+        """
+        将用户可能传进来的 ISO 时间字符串或包含毫秒的字符串
+        截断到秒,返回 "YYYY-MM-DD HH:MM:SS" 格式。
+        例如: "2025-06-01T12:34:56.789Z" → "2025-06-01 12:34:56"
+        """
+        # 先将 'T' 替换成空格,去掉尾部可能的 "Z"
+        s = dt_str.replace("T", " ").rstrip("Z")
+        # 如果含有小数秒,截断
+        if "." in s:
+            s = s.split(".")[0]
+        # 如果还有 "+xx:xx" 时区后缀,也截断
+        if "+" in s:
+            s = s.split("+")[0]
+        return s.strip()
+
+    def _get_data_by_filter(self) -> pd.DataFrame:
+        """
+        按 wind_turbine_number 列和 time_stamp 时间范围批量查询,
+        返回一个完整的 DataFrame(已按 time_stamp 升序排序)。
+        """
+        table_name = f"{self.windCode}_minute"
+        engine = create_engine(
+            "mysql+pymysql://root:admin123456@106.120.102.238:10336/energy_data_prod"
+        )
+
+        # 准备 wind_turbine_number 列表的 SQL 片段:('WT1','WT2',...)
+        turbines = ",".join(f"'{wt.strip()}'" for wt in self.windTurbineNumberList)
+        sql = text(f"""
+            SELECT *
+            FROM {table_name}
+            WHERE wind_turbine_number IN ({turbines})
+              AND time_stamp BETWEEN :start AND :end
+            ORDER BY time_stamp ASC
+        """)
+
+        df = pd.read_sql(sql, engine, params={"start": self.startTime, "end": self.endTime})
+        return df
+
+    def calcSimilarity(self, x: np.ndarray, y: np.ndarray, m: str = 'euc') -> float:
+        """
+        计算向量 x 与 y 的相似度,(0,1] 区间:
+          - m='euc' → 欧氏距离
+          - m='cbd' → 城市街区距离
+        """
+        if len(x) != len(y):
+            return 0.0
 
-        if (len(x) != len(y)):
-            return 0
-
-        if (m == 'cbd'):
-            dSimilarity = [1/(1+np.abs(p-q)) for p,q in zip(x,y)]
-            dSimilarity = np.sum(dSimilarity)/len(x)
+        if m == 'cbd':
+            arr = [1.0 / (1.0 + abs(p - q)) for p, q in zip(x, y)]
+            return float(np.sum(arr) / len(arr))
         else:
-            dSimilarity = [np.power(p-q,2) for p, q in zip(x, y)]
-            dSimilarity = 1/(1+np.sqrt(np.sum(dSimilarity)))
-
-        return dSimilarity
-
+            diffsq = [(p - q) ** 2 for p, q in zip(x, y)]
+            return float(1.0 / (1.0 + math.sqrt(np.sum(diffsq))))
 
-    def genDLMatrix(self, trainDataset, dataSize4D=100, dataSize4L=50):
+    def genDLMatrix(self, trainDataset: np.ndarray, dataSize4D=100, dataSize4L=50) -> int:
         """
-        automatically generate the D and L matrix from training data set, assuming the training data set is all normal
-        state data.
-        :param trainDataset: 2D array, [count of data, length of feature]
-        :param dataSize4D: minimized count of state for matrix D
-        :param dataSize4L: minimized count of state for matrix L
-        :return: 0 if successful, -1 if fail
+        根据训练集 trainDataset 生成 D/L 矩阵:
+          - 若样本数 < dataSize4D + dataSize4L,返回 -1
+          - 否则构造 matrixD、matrixL,并用局部加权回归获得 healthyResidual,返回 0
         """
-        [m,n] = np.shape(trainDataset)
-
+        m, n = trainDataset.shape
         if m < dataSize4D + dataSize4L:
-            print('training dataset is too small to generate the matrix D and L')
             return -1
 
+        # Step1:每个特征的最小/最大样本加入 matrixD
         self.matrixD = []
         selectIndex4D = []
-        # Step 1: add all the state with minimum or maximum value in each feature into Matrix D
-
-        for i in range(0, n):
-            feature_i = trainDataset[:,i]
-            minIndex = np.argmin(feature_i)
-            maxIndex = np.argmax(feature_i)
-
-            self.matrixD.append(trainDataset[minIndex, :].tolist())
-            selectIndex4D.append(minIndex)
-            self.matrixD.append(trainDataset[maxIndex, :].tolist())
-            selectIndex4D.append(maxIndex)
-
-        # Step 2: iteratively add the state with the maximum average distance to the states in selected matrixD
-        while(1):
-
-            if (len(selectIndex4D) >= dataSize4D):
-                break
-
-            # Get the free state list
-            freeStateList = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
-
-            # Calculate the average dist of each state in free to selected state in matrix D
-            distList = []
-            for i in freeStateList:
-                tmpState = trainDataset[i]
-                tmpDist = [1-self.calcSimilarity(x, tmpState) for x in self.matrixD]
-                distList.append(np.mean(tmpDist))
-
-            # select the free state with largest average distance to states in matrixD, and update matrixD
-            selectId = freeStateList[distList.index(np.max(distList))]
-            self.matrixD.append(trainDataset[selectId, :].tolist())
-            selectIndex4D.append(selectId)
-        #format matrixD from list to array
-        self.matrixD = np.array(self.matrixD)
-        self.normalDataBallTree = BallTree(self.matrixD, leaf_size=4, metric = lambda i,j: 1-self.calcSimilarity(i,j))
+        for i in range(n):
+            col_i = trainDataset[:, i]
+            idx_min = np.argmin(col_i)
+            idx_max = np.argmax(col_i)
+            self.matrixD.append(trainDataset[idx_min, :].tolist())
+            selectIndex4D.append(idx_min)
+            self.matrixD.append(trainDataset[idx_max, :].tolist())
+            selectIndex4D.append(idx_max)
+
+        # Step2:对剩余样本逐步选出“与 matrixD 平均距离最大”的样本,直至 matrixD 行数 = dataSize4D
+        while len(selectIndex4D) < dataSize4D:
+            freeList = list(set(range(len(trainDataset))) - set(selectIndex4D))
+            distAvg = []
+            for idx in freeList:
+                tmp = trainDataset[idx, :]
+                dlist = [1.0 - self.calcSimilarity(x, tmp) for x in self.matrixD]
+                distAvg.append(np.mean(dlist))
+            select_id = freeList[int(np.argmax(distAvg))]
+            self.matrixD.append(trainDataset[select_id, :].tolist())
+            selectIndex4D.append(select_id)
 
+        self.matrixD = np.array(self.matrixD)
 
-        # Step 3. select remaining state for matrix L
-        #index4L = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
-        #self.matrixL = trainDataset[index4L, :]
-        # consider the limited dataset, using all the train data to matrix L
-        self.matrixL = trainDataset
+        # 用 matrixD 建 BallTree,用于局部加权回归
+        self.normalDataBallTree = BallTree(
+            self.matrixD,
+            leaf_size=4,
+            metric=lambda a, b: 1.0 - self.calcSimilarity(a, b)
+        )
 
-        # Calculate the healthy Residual from matrix L
-        lamdaRatio = 1e-3
-        [m, n] = np.shape(self.matrixD)
-        self.DDSimilarity = np.array([[1-self.calcSimilarity(x,y) for x in self.matrixD] for y in self.matrixD] + lamdaRatio*np.eye(n))
-        self.DDSimilarity = 1/self.DDSimilarity
+        # Step3:把所有训练样本都作为 matrixL
+        self.matrixL = trainDataset.copy()
 
-        #self.healthyResidual = self.calcResidual(self.matrixL)
+        # Step4:用局部加权回归算出健康残差
         self.healthyResidual = self.calcResidualByLocallyWeightedLR(self.matrixL)
-
-
         return 0
 
-    def calcResidualByLocallyWeightedLR(self, newStates):
+    def calcResidualByLocallyWeightedLR(self, newStates: np.ndarray) -> np.ndarray:
         """
-        find the K-nearest neighbors for each input state, then calculate the estimated state by locally weighted average.
-        :param newStates: input states list
-        :return: residual R_x
+        对 newStates 中每个样本,使用 matrixD 的前 20 个最近邻做局部加权回归,计算残差。
+        返回形状 [len(newStates), 特征数] 的残差矩阵。
         """
-        [m,n] = np.shape(newStates)
-        est_X = []
-        # print(newStates)
+        est_list = []
         for x in newStates:
-            (dist, iList) = self.normalDataBallTree.query([x], 20, return_distance=True)
-            weight = 1/(dist[0]+1e-1)
-            weight = weight/sum(weight)
-            eState = np.sum([w*self.matrixD[i] for w,i in zip(weight, iList[0])])
-            est_X.append(eState)
-
-        est_X = np.reshape(est_X, (len(est_X),1))
-        # print(est_X)
-        # print(newStates)
-        return est_X - newStates
-
-    def calcStateResidual(self, newsStates):
-        stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
-        return stateResidual
-
-    def calcSPRT(self, newsStates, feature_weight, alpha=0.1, beta=0.1, decisionGroup=5):
+            dist, idxs = self.normalDataBallTree.query([x], k=20, return_distance=True)
+            w = 1.0 / (dist[0] + 1e-1)
+            w = w / np.sum(w)
+            est = np.sum([w_i * self.matrixD[j] for w_i, j in zip(w, idxs[0])], axis=0)
+            est_list.append(est)
+        est_arr = np.reshape(np.array(est_list), (len(est_list), -1))
+        return est_arr - newStates
+
+    def calcSPRT(
+        self,
+        newsStates: np.ndarray,
+        feature_weight: np.ndarray,
+        alpha: float = 0.1,
+        beta: float = 0.1,
+        decisionGroup: int = 5
+    ) -> list[float]:
         """
-        anomaly detection based Wald's SPRT algorithm, refer to A.Wald, Sequential Analysis,Wiley, New York, NY, USA, 1947
-        :param newsStates: input states list
-        :param feature_weight: the important weight for each feature, Normalized and Nonnegative
-        :param alpha: prescribed false alarm rate, 0 < alpha < 1
-        :param beta: prescribed miss alarm rate, 0 < beta < 1
-        :param decisionGroup: length of the test sample when the decision is done
-
-        :return: anomaly flag for each group of states, 1:anomaly, -1:normal, (-1:1): unable to decision
+        对 newsStates 运行 Wald-SPRT,返回得分列表,长度 = len(newsStates) - decisionGroup + 1,
+        分数在 [-1, 1]:
+          - 越接近 1 → 越“异常(危险)”
+          - 越接近 -1 → 越“正常”
         """
-
-        # Step 1. transfer the raw residual vector to dimension reduced residual using feature weight
-        #stateResidual = self.calcResidual(newsStates)
-        stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
-        # print(stateResidual)
-        weightedStateResidual = [np.dot(x, feature_weight) for x in stateResidual]
-        # print(weightedStateResidual)
+        # 1) 计算残差并做特征加权
+        stateRes = self.calcResidualByLocallyWeightedLR(newsStates)
+        weightedStateResidual = [np.dot(x, feature_weight) for x in stateRes]
         weightedHealthyResidual = [np.dot(x, feature_weight) for x in self.healthyResidual]
 
-        '''
-        plt.subplot(211)
-        plt.plot(weightedHealthyResidual)
-        plt.xlabel('Sample index')
-        plt.ylabel('Healthy State Residual')
-        plt.subplot(212)
-        plt.plot(weightedStateResidual)
-        plt.xlabel('Sample index')
-        plt.ylabel('All State Residual')
-        plt.show()
-        '''
-        # Calculate the distribution of health state residual
-        mu0 = np.mean(weightedHealthyResidual)
-        sigma0 = np.std(weightedHealthyResidual)
-
-
-        #sigma1 = np.std(weightedStateResidual)
-
-        lowThres = np.log(beta/(1-alpha))
-        highThres = np.log((1-beta)/alpha)
-
-        flag = []
-        for i in range(0, len(newsStates)-decisionGroup+1):
-            # For each group to calculate the new state residual distribution
-            # Then check the  hypothesis testing results
-            mu1 = np.mean(weightedStateResidual[i:i+decisionGroup])
-            si = np.sum(weightedStateResidual[i:i+decisionGroup])*(mu1-mu0)/sigma0**2 - decisionGroup*(mu1**2-mu0**2)/(2*sigma0**2)
-
-            if (si > highThres):
-                si = highThres
-            if (si < lowThres):
-                si = lowThres
-
-            if (si > 0):
-                si = si/highThres
-            if (si < 0):
-                si = si/lowThres
-
-            flag.append(si)
-
-        return flag
-
-if __name__=='__main__':
-
-    start_time = time.time()
-    myMSET = MSET()
-    # 1、计算各子系统的健康度(子系统包括:发电机组、传动系统(直驱机组无齿轮箱、无数据)、机舱系统、变流器系统、电网环境、辅助系统(无数据))
-    # 1.1、发电机组健康度评分
-    Title = pd.read_excel(r'34_QITAIHE.xlsx', header=None, nrows=1, usecols=[12, 13, 14])
-    df_D = pd.read_excel(r'34_QITAIHE.xlsx',
-                           usecols=[0,12, 13, 14,], parse_dates=True)  # 读取温度指标:齿轮箱油温12、驱动侧发电机轴承温度13、非驱动侧发电机轴承温度14
-
-    cols = df_D.columns
-    df_D[cols] = df_D[cols].apply(pd.to_numeric, errors='coerce')
-    df_D = df_D.dropna()  # 删除空行和非数字项
-
-    x_date = df_D.iloc[len(df_D) // 2 + 1:, 0]  #获取时间序列
-    x_date = pd.to_datetime(x_date)
-    df_Temp = df_D.iloc[:, 1:]  #获取除时间列以外的数据
-    df_Temp_values = df_Temp.values
-    df_Temp_values = np.array(df_Temp_values)
-    [m, n] = np.shape(df_Temp_values)
-    # Residual = []
-    flag_Spart_data = []
-    for i in range(0, n):
-        df_Temp_i = df_Temp_values[:, i]
-        trainDataSet_data = df_Temp_i[0:len(df_Temp_i) // 2]
-        testDataSet_data = df_Temp_i[len(df_Temp_i) // 2 + 1:]
-        trainDataSet_data = np.reshape(trainDataSet_data, (len(trainDataSet_data), 1))
-        testDataSet_data = np.reshape(testDataSet_data, (len(testDataSet_data), 1))
-
-        myMSET.genDLMatrix(trainDataSet_data, 60, 5)
-        # stateResidual = self.calcStateResidual(testDataSet_data)
-        flag_data = myMSET.calcSPRT(testDataSet_data, np.array(1), decisionGroup=1)
-        # Residual.append(stateResidual)
-        flag_Spart_data.append(flag_data)
-    flag_Spart_data = np.array(flag_Spart_data)
-    flag_Spart_data = flag_Spart_data.T
-    Temp1 = flag_Spart_data[:,0]
-    Temp2 = flag_Spart_data[:,1]
-    Temp3 = flag_Spart_data[:,2]
-    Temp1_lable = Title.iloc[0, 0]
-    Temp2_lable = Title.iloc[0, 1]
-    Temp3_lable = Title.iloc[0, 2]
-
-    print(x_date)
-
-    # alarmedFlag = np.array([[i, Temp1[i]] for i, x in enumerate(Temp1) if x > 0.99])  # Flag中选出大于0.99的点
-
-    plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
-    plt.close('all')
-    plt.subplot(311)
-    plt.plot(x_date, Temp1, 'b-o', label=Temp1_lable)
-    plt.ylabel(Temp1_lable)
-    plt.xlabel('时间')
-
-    # plt.scatter(alarmedFlag1[:, 0], alarmedFlag1[:, 2], marker='x', c='r')
-    plt.subplot(312)
-    plt.plot(x_date, Temp2)
-    plt.ylabel(Temp2_lable)
-    plt.xlabel('时间')
-    #plt.scatter(alarmedFlag[:, 0], alarmedFlag[:, 1], marker='x', c='r')
-    plt.subplot(313)
-    plt.plot(x_date, Temp3)
-    plt.ylabel(Temp3_lable)
-    plt.xlabel('时间')
-    #plt.scatter(alarmedFlag[:, 0], alarmedFlag[:, 1], marker='x', c='r')
-    plt.show()
-    print(Title)
-    print(flag_Spart_data)
-    print(np.shape(flag_Spart_data))
-
-
-    end_time = time.time()
-    execution_time = end_time - start_time
-    print(f"Execution time: {execution_time} seconds")
-
-
-
-#
-
-    '''
-    f = open("speed_vib.txt", "r")
-    data1 = f.read()
-    f.close()
-    data1 = data1.split('\n')
-    rpm = [(row.split('\t')[0]).strip() for row in data1]
-    vib = [(row.split('\t')[-1]).strip() for row in data1]
-
-    # print(rpm)
-    rpm = np.array(rpm).astype(np.float64)
-    vib = np.array(vib).astype(np.float64)
-
-    #vib = [(x-np.mean(vib))/np.std(vib) for x in vib]
-    #print(vib)
-    trainDataSet = [vib[i] for i in range(0,100) if vib[i] < 5]
-    trainDataSet = np.reshape(trainDataSet,(len(trainDataSet),1))
-    testDataSet = np.reshape(vib, (len(vib),1))
-    '''
-
-
-    # Title = pd.read_csv(r'F1710001001.csv', header=None, nrows=1, usecols=[36,37,38], encoding='gbk')
-
-    '''
-    alarmedFlag = np.array([[i, flag[i]] for i, x in enumerate(flag) if x > 0.99])  # Flag中选出大于0.99的点
-    plt.close('all')
-    plt.subplot(211)
-    plt.plot(testDataSet)
-    plt.ylabel('Vibration')
-    plt.xlabel('Sample index')
-    plt.subplot(212)
-    plt.plot(flag)
-    plt.ylabel('SPRT results')
-    plt.xlabel('Sample index')
-    plt.scatter(alarmedFlag[:,0], alarmedFlag[:,1], marker='x',c='r')
-
-    plt.show()
-    '''
-
-
-
-
-
+        # 2) 健康残差的分布统计
+        mu0 = float(np.mean(weightedHealthyResidual))
+        sigma0 = float(np.std(weightedHealthyResidual))
+
+        # 3) 计算 SPRT 的上下阈值
+        lowThres = np.log(beta / (1.0 - alpha))    # < 0
+        highThres = np.log((1.0 - beta) / alpha)   # > 0
+
+        flags: list[float] = []
+        length = len(weightedStateResidual)
+        for i in range(0, length - decisionGroup + 1):
+            segment = weightedStateResidual[i : i + decisionGroup]
+            mu1 = float(np.mean(segment))
+            si = (
+                np.sum(segment) * (mu1 - mu0) / (sigma0**2)
+                - decisionGroup * ((mu1**2) - (mu0**2)) / (2.0 * (sigma0**2))
+            )
+
+            # 限制 si 在 [lowThres, highThres] 之内
+            si = max(min(si, highThres), lowThres)
+            # 正负归一化
+            if si > 0:
+                norm_si = float(si / highThres)
+            else:
+                norm_si = float(si / lowThres)
+            flags.append(norm_si)
+
+        return flags
+
+    def check_threshold(self) -> pd.DataFrame:
+        """
+        阈值分析(阈值固定 0.99)。返回长格式 DataFrame,列:
+          ["time_stamp", "temp_channel", "SPRT_score", "status"]
+        status = "危险" if SPRT_score > 0.99 else "正常"。
+        """
+        THRESHOLD = 0.99
+
+        # 1) 按风机编号 + 时间范围查询原始数据
+        df_concat = self._get_data_by_filter()
+        if df_concat.empty:
+            return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
+
+        # 2) 筛选存在的温度列
+        temp_cols_all = [
+            'main_bearing_temperature',
+            'gearbox_oil_temperature',
+            'generatordrive_end_bearing_temperature',
+            'generatornon_drive_end_bearing_temperature'
+        ]
+        temp_cols = [c for c in temp_cols_all if c in df_concat.columns]
+        if not temp_cols:
+            return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
+
+        # 3) 转数值 & 删除 NaN
+        df_concat[temp_cols] = df_concat[temp_cols].apply(pd.to_numeric, errors='coerce')
+        df_concat = df_concat.dropna(subset=temp_cols + ['time_stamp'])
+        if df_concat.empty:
+            return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
+
+        # 4) time_stamp 转 datetime
+        df_concat['time_stamp'] = pd.to_datetime(df_concat['time_stamp'])
+        x_date = df_concat['time_stamp']
+
+        # 5) 抽取温度列到 NumPy 数组
+        arr = df_concat[temp_cols].values  # shape = [总记录数, 通道数]
+        m, n = arr.shape
+        half = m // 2
+
+        all_flags: list[list[float]] = []
+        for i in range(n):
+            channel = arr[:, i]
+            train = channel[:half].reshape(-1, 1)
+            test  = channel[half:].reshape(-1, 1)
+
+            # 用训练集构造 D/L 矩阵
+            if self.genDLMatrix(train, dataSize4D=60, dataSize4L=5) != 0:
+                # 如果训练集样本不足,直接返回空表
+                return pd.DataFrame(columns=["time_stamp", "temp_channel", "SPRT_score", "status"])
+
+            feature_w = np.array([1.0])
+            flags = self.calcSPRT(test, feature_w, decisionGroup=1)
+            all_flags.append(flags)
+
+        # 6) 合并为宽表,再 melt 成长表
+        flags_arr = np.array(all_flags)  # shape = [通道数, 测试样本数]
+        num_test = flags_arr.shape[1]
+        ts = x_date.iloc[half : half + num_test].reset_index(drop=True)
+
+        wide = pd.DataFrame({"time_stamp": ts})
+        for idx, col in enumerate(temp_cols):
+            wide[col] = flags_arr[idx, :]
+
+        df_long = wide.melt(
+            id_vars=["time_stamp"],
+            value_vars=temp_cols,
+            var_name="temp_channel",
+            value_name="SPRT_score"
+        )
+        # 把 time_stamp 从 datetime 转成字符串,格式 "YYYY-MM-DD HH:MM:SS" 
+        df_long['time_stamp'] = pd.to_datetime(df_long['time_stamp']).dt.strftime("%Y-%m-%d %H:%M:%S")
+
+        # 7) 添加状态列:SPRT_score > 0.99 → “危险”,否则 “正常”
+        df_long['status'] = df_long['SPRT_score'].apply(
+            lambda x: "危险" if x > THRESHOLD else "正常"
+        )
+
+        return df_long
+
+    def get_trend(self) -> dict:
+        """
+        趋势分析
+        获取温度趋势:将温度数据按时间返回。
+        返回格式:{
+            "timestamps": [ISO8601 字符串列表],
+            "channels": [
+                {"temp_channel": "main_bearing_temperature", "values": [浮点列表]},
+                {"temp_channel": "gearbox_oil_temperature", "values": [...]},
+                ...
+            ],
+            "unit": "°C"
+        }
+        """
+        df = self._get_data_by_filter()
+
+        if df.empty:
+            return {"timestamps": [], "channels": [], "unit": "°C"}
+
+        # 定义所有需要检查的温度列
+        temp_cols_all = [
+            'main_bearing_temperature',
+            'gearbox_oil_temperature',
+            'generatordrive_end_bearing_temperature',
+            'generatornon_drive_end_bearing_temperature'
+        ]
+        # 选择实际存在的列
+        temp_cols = [c for c in temp_cols_all if c in df.columns]
+        
+        # 如果没有温度数据列,返回空数据
+        if not temp_cols:
+            return {"timestamps": [], "channels": [], "unit": "°C"}
+
+        # 转数值,并删除 NaN
+        df[temp_cols] = df[temp_cols].apply(pd.to_numeric, errors='coerce')
+        df = df.dropna(subset=temp_cols + ['time_stamp'])
+
+        # 转时间戳为 `YYYY-MM-DD HH:MM:SS` 格式
+        df['time_stamp'] = pd.to_datetime(df['time_stamp']).dt.strftime("%Y-%m-%d %H:%M:%S")
+        df = df.sort_values('time_stamp').reset_index(drop=True)
+
+        # 时间戳格式化为 ISO 8601 字符串
+        timestamps = df['time_stamp'].tolist()
+
+        # 对每个通道,收集它在相应行的数值
+        channels_data = []
+        for col in temp_cols:
+            channels_data.append({
+                "temp_channel": col,
+                "values": df[col].tolist()
+            })
+
+        return {
+            "timestamps": timestamps,
+            "channels": channels_data,
+            "unit": "°C"
+        }
 

BIN
__pycache__/Temp_Diag.cpython-39.pyc


BIN
__pycache__/api_tempdiag.cpython-39.pyc


+ 146 - 0
api_tempdiag.py

@@ -0,0 +1,146 @@
+# main.py
+
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse
+from typing import List
+from pydantic import BaseModel, model_validator
+import uvicorn
+
+from Temp_Diag import MSET_Temp  # 上面保存的 MSET_Temp 类
+
+app = FastAPI(title="Temperature Monitoring API")
+
+
+class TemperatureInput(BaseModel):
+    windCode: str
+    windTurbineNumberList: List[str]
+    startTime: str  # e.g. "2024-06-08 00:00"
+    endTime: str    # e.g. "2024-06-08 01:00"
+
+    @model_validator(mode='before')
+    def normalize_fields(cls, values):
+        # 确保 windTurbineNumberList 是列表
+        raw = values.get('windTurbineNumberList')
+        if isinstance(raw, str):
+            values['windTurbineNumberList'] = [raw]
+        return values
+
+# 阈值分析
+@app.post("/temperature/threshold")
+async def route_threshold(input_data: TemperatureInput):
+    """
+    阈值分析接口(阈值固定 0.99):
+      - 输入:
+        {
+          "windCode": "WOF01000010",
+          "windTurbineNumberList": ["WOG00542"],
+          "startTime": "2023-01-01 00:00",
+          "endTime": "2023-01-05 12:00"
+        }
+      - 返回:
+        {
+          "data": {
+            "type": "temperature_threshold",
+            "records": [
+              {
+                "time_stamp": "2024-06-08 00:05:00",
+                "temp_channel": "main_bearing_temperature",
+                "SPRT_score": 0.123,
+                "status": "正常"
+              },
+              ...
+            ]
+          },
+          "code": 200,
+          "message": "success"
+        }
+    """
+    try:
+        analyzer = MSET_Temp(
+            windCode=input_data.windCode,
+            windTurbineNumberList=input_data.windTurbineNumberList,
+            startTime=input_data.startTime,
+            endTime=input_data.endTime
+        )
+        df_result = analyzer.check_threshold()  # DataFrame 长格式
+        records = df_result.to_dict(orient="records")
+        return {
+            "data": {
+                "type": "temperature_threshold",
+                "records": records
+            },
+            "code": 200,
+            "message": "success"
+        }
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={
+                "code": 500,
+                "message": "analysis failed",
+                "detail": str(e)
+            }
+        )
+
+# 趋势分析(暂未调用)
+@app.post("/temperature/trend")
+async def route_trend(input_data: TemperatureInput):
+    """
+    趋势分析接口:
+      - 输入:
+        {
+          "windCode": "WOF01000010",
+          "windTurbineNumberList": ["WOG00542"],
+          "startTime": "2023-01-01 00:00",
+          "endTime": "2023-01-05 12:00"
+        }
+      - 返回:
+        {
+          "data": {
+            "type": "temperature_trend",
+            "timestamps": [ "2024-06-08 00:00:00", ... ],
+            "channels": [
+               { "temp_channel": "main_bearing_temperature", "values": [24.5, 24.7, ...] },
+               ...
+            ],
+            "unit": "°C"
+          },
+          "code": 200,
+          "message": "success"
+        }
+    """
+    try:
+        analyzer = MSET_Temp(
+            windCode=input_data.windCode,
+            windTurbineNumberList=input_data.windTurbineNumberList,
+            startTime=input_data.startTime,
+            endTime=input_data.endTime
+        )
+        result = analyzer.get_trend()
+        return {
+            "data": {
+                "type": "temperature_trend",
+                **result
+            },
+            "code": 200,
+            "message": "success"
+        }
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={
+                "code": 500,
+                "message": "analysis failed",
+                "detail": str(e)
+            }
+        )
+
+
+
+if __name__ == "__main__":
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True
+    )