Xmia 2 주 전
부모
커밋
9d573816a5
5개의 변경된 파일369개의 추가작업 그리고 12개의 파일을 삭제
  1. 357 0
      Temp_Diag.PY
  2. BIN
      __pycache__/api_20241201.cpython-39.pyc
  3. BIN
      __pycache__/cms_class_20241201.cpython-39.pyc
  4. 8 8
      cms_class_20241201.py
  5. 4 4
      connect mysql.py

+ 357 - 0
Temp_Diag.PY

@@ -0,0 +1,357 @@
+# @Time    : 2019-6-19
+# @Author  : wilbur.cheng@SKF
+# @FileName: baseMSET.py
+# @Software: a package in skf anomaly detection library to realize multivariate state estimation technique
+#            this package has included all the functions used in MSET, similarity calculation, state memory matrix D,
+#            normal state matrix L, residual calculation, and the basic SPRT (Sequential Probability Ratio Test)
+#            function for binary hypothesis testing.
+#
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.neighbors import BallTree
+import openpyxl
+import time
+#from scikit-learn.neighbors import BallTree
+
+class MSET_Temp():
+
+    matrixD = None
+    matrixL = None
+    healthyResidual = None
+
+    normalDataBallTree = None
+
+
+
+    def __init__(self):
+        self.model = None
+
+    def _get_by_id(self, table_name, ids):
+            """Get data from MySQL database by IDs"""
+            df_res = []
+            engine = create_engine('mysql+pymysql://root:admin123456@106.120.102.238:10336/energy_data_prod')
+            for id in ids:
+                table_name=windcode+'_minute'
+                lastday_df_sql = f"SELECT * FROM {table_name} where id = {id}"
+                df = pd.read_sql(lastday_df_sql, engine)
+                df_res.append(df)
+            return df_ress
+
+    def calcSimilarity(self, x, y, m = 'euc'):
+        """
+        Calcualte the similartity of two feature vector
+        :param x: one of input feature list
+        :param y: one of input feature list
+        :param m: method of the similarity calculation method, default is the Eucilidean distance named 'euc';
+                    a city block distance function is used when m set to "cbd'.
+        :return: the two feature similarity, float, range (0,1)
+        """
+
+        if (len(x) != len(y)):
+            return 0
+
+        if (m == 'cbd'):
+            dSimilarity = [1/(1+np.abs(p-q)) for p,q in zip(x,y)]
+            dSimilarity = np.sum(dSimilarity)/len(x)
+        else:
+            dSimilarity = [np.power(p-q,2) for p, q in zip(x, y)]
+            dSimilarity = 1/(1+np.sqrt(np.sum(dSimilarity)))
+
+        return dSimilarity
+
+
+    def genDLMatrix(self, trainDataset, dataSize4D=100, dataSize4L=50):
+        """
+        automatically generate the D and L matrix from training data set, assuming the training data set is all normal
+        state data.
+        :param trainDataset: 2D array, [count of data, length of feature]
+        :param dataSize4D: minimized count of state for matrix D
+        :param dataSize4L: minimized count of state for matrix L
+        :return: 0 if successful, -1 if fail
+        """
+        [m,n] = np.shape(trainDataset)
+
+        if m < dataSize4D + dataSize4L:
+            print('training dataset is too small to generate the matrix D and L')
+            return -1
+
+        self.matrixD = []
+        selectIndex4D = []
+        # Step 1: add all the state with minimum or maximum value in each feature into Matrix D
+
+        for i in range(0, n):
+            feature_i = trainDataset[:,i]
+            minIndex = np.argmin(feature_i)
+            maxIndex = np.argmax(feature_i)
+
+            self.matrixD.append(trainDataset[minIndex, :].tolist())
+            selectIndex4D.append(minIndex)
+            self.matrixD.append(trainDataset[maxIndex, :].tolist())
+            selectIndex4D.append(maxIndex)
+
+        # Step 2: iteratively add the state with the maximum average distance to the states in selected matrixD
+        while(1):
+
+            if (len(selectIndex4D) >= dataSize4D):
+                break
+
+            # Get the free state list
+            freeStateList = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
+
+            # Calculate the average dist of each state in free to selected state in matrix D
+            distList = []
+            for i in freeStateList:
+                tmpState = trainDataset[i]
+                tmpDist = [1-self.calcSimilarity(x, tmpState) for x in self.matrixD]
+                distList.append(np.mean(tmpDist))
+
+            # select the free state with largest average distance to states in matrixD, and update matrixD
+            selectId = freeStateList[distList.index(np.max(distList))]
+            self.matrixD.append(trainDataset[selectId, :].tolist())
+            selectIndex4D.append(selectId)
+        #format matrixD from list to array
+        self.matrixD = np.array(self.matrixD)
+        self.normalDataBallTree = BallTree(self.matrixD, leaf_size=4, metric = lambda i,j: 1-self.calcSimilarity(i,j))
+
+
+        # Step 3. select remaining state for matrix L
+        #index4L = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
+        #self.matrixL = trainDataset[index4L, :]
+        # consider the limited dataset, using all the train data to matrix L
+        self.matrixL = trainDataset
+
+        # Calculate the healthy Residual from matrix L
+        lamdaRatio = 1e-3
+        [m, n] = np.shape(self.matrixD)
+        self.DDSimilarity = np.array([[1-self.calcSimilarity(x,y) for x in self.matrixD] for y in self.matrixD] + lamdaRatio*np.eye(n))
+        self.DDSimilarity = 1/self.DDSimilarity
+
+        #self.healthyResidual = self.calcResidual(self.matrixL)
+        self.healthyResidual = self.calcResidualByLocallyWeightedLR(self.matrixL)
+
+
+        return 0
+
+    def calcResidualByLocallyWeightedLR(self, newStates):
+        """
+        find the K-nearest neighbors for each input state, then calculate the estimated state by locally weighted average.
+        :param newStates: input states list
+        :return: residual R_x
+        """
+        [m,n] = np.shape(newStates)
+        est_X = []
+        # print(newStates)
+        for x in newStates:
+            (dist, iList) = self.normalDataBallTree.query([x], 20, return_distance=True)
+            weight = 1/(dist[0]+1e-1)
+            weight = weight/sum(weight)
+            eState = np.sum([w*self.matrixD[i] for w,i in zip(weight, iList[0])])
+            est_X.append(eState)
+
+        est_X = np.reshape(est_X, (len(est_X),1))
+        # print(est_X)
+        # print(newStates)
+        return est_X - newStates
+
+    def calcStateResidual(self, newsStates):
+        stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
+        return stateResidual
+
+    def calcSPRT(self, newsStates, feature_weight, alpha=0.1, beta=0.1, decisionGroup=5):
+        """
+        anomaly detection based Wald's SPRT algorithm, refer to A.Wald, Sequential Analysis,Wiley, New York, NY, USA, 1947
+        :param newsStates: input states list
+        :param feature_weight: the important weight for each feature, Normalized and Nonnegative
+        :param alpha: prescribed false alarm rate, 0 < alpha < 1
+        :param beta: prescribed miss alarm rate, 0 < beta < 1
+        :param decisionGroup: length of the test sample when the decision is done
+
+        :return: anomaly flag for each group of states, 1:anomaly, -1:normal, (-1:1): unable to decision
+        """
+
+        # Step 1. transfer the raw residual vector to dimension reduced residual using feature weight
+        #stateResidual = self.calcResidual(newsStates)
+        stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
+        # print(stateResidual)
+        weightedStateResidual = [np.dot(x, feature_weight) for x in stateResidual]
+        # print(weightedStateResidual)
+        weightedHealthyResidual = [np.dot(x, feature_weight) for x in self.healthyResidual]
+
+        '''
+        plt.subplot(211)
+        plt.plot(weightedHealthyResidual)
+        plt.xlabel('Sample index')
+        plt.ylabel('Healthy State Residual')
+        plt.subplot(212)
+        plt.plot(weightedStateResidual)
+        plt.xlabel('Sample index')
+        plt.ylabel('All State Residual')
+        plt.show()
+        '''
+        # Calculate the distribution of health state residual
+        mu0 = np.mean(weightedHealthyResidual)
+        sigma0 = np.std(weightedHealthyResidual)
+
+
+        #sigma1 = np.std(weightedStateResidual)
+
+        lowThres = np.log(beta/(1-alpha))
+        highThres = np.log((1-beta)/alpha)
+
+        flag = []
+        for i in range(0, len(newsStates)-decisionGroup+1):
+            # For each group to calculate the new state residual distribution
+            # Then check the  hypothesis testing results
+            mu1 = np.mean(weightedStateResidual[i:i+decisionGroup])
+            si = np.sum(weightedStateResidual[i:i+decisionGroup])*(mu1-mu0)/sigma0**2 - decisionGroup*(mu1**2-mu0**2)/(2*sigma0**2)
+
+            if (si > highThres):
+                si = highThres
+            if (si < lowThres):
+                si = lowThres
+
+            if (si > 0):
+                si = si/highThres
+            if (si < 0):
+                si = si/lowThres
+
+            flag.append(si)
+
+        return flag
+
+
+
+if __name__=='__main__':
+
+    start_time = time.time()
+    myMSET = MSET_Temp()
+    # 1、计算各子系统的健康度(子系统包括:发电机组、传动系统(直驱机组无齿轮箱、无数据)、机舱系统、变流器系统、电网环境、辅助系统(无数据))
+    # 1.1、发电机组健康度评分
+    # Title = pd.read_excel(r'/Users/xmia/Documents/code/Temp_Diag/34_QITAIHE.xlsx', header=None, nrows=1, usecols=[12, 13, 14])
+    # df_D = pd.read_excel(r'/Users/xmia/Documents/code/Temp_Diag/34_QITAIHE.xlsx',
+    #                        usecols=[0, 12, 13, 14], parse_dates=True)  # 读取温度指标:齿轮箱油温12、驱动侧发电机轴承温度13、非驱动侧发电机轴承温度14
+    df = pd.read_csv('/Users/xmia/Desktop/ZN/华电中光/471_QTH1125/2024/#34.csv', usecols=[
+                        'wind_turbine_number',
+                        'time_stamp', 
+                        'main_bearing_temperature', 
+                        'gearbox_oil_temperature', 
+                        'generatordrive_end_bearing_temperature', 
+                        'generatornon_drive_end_bearing_temperature'
+                    ], parse_dates=['time_stamp']) 
+    df = df[df['wind_turbine_number'] == 'WOG01312']
+    df = df.drop(columns=['wind_turbine_number'])
+    df_D = df[df['time_stamp'] > '2024-11-15']
+
+    cols = df_D.columns
+    df_D[cols] = df_D[cols].apply(pd.to_numeric, errors='coerce')
+    df_D = df_D.dropna()  # 删除空行和非数字项
+
+    x_date = df_D.iloc[len(df_D) // 2 + 1:, 0]  #获取时间序列
+    x_date = pd.to_datetime(x_date)
+    df_Temp = df_D.iloc[:, 1:]  #获取除时间列以外的数据
+    df_Temp_values = df_Temp.values
+    df_Temp_values = np.array(df_Temp_values)
+    [m, n] = np.shape(df_Temp_values)
+    # Residual = []
+    flag_Spart_data = []
+    for i in range(0, n):
+        df_Temp_i = df_Temp_values[:, i]
+        trainDataSet_data = df_Temp_i[0:len(df_Temp_i) // 2]
+        testDataSet_data = df_Temp_i[len(df_Temp_i) // 2 + 1:]
+        trainDataSet_data = np.reshape(trainDataSet_data, (len(trainDataSet_data), 1))
+        testDataSet_data = np.reshape(testDataSet_data, (len(testDataSet_data), 1))
+
+        myMSET.genDLMatrix(trainDataSet_data, 60, 5)
+        # stateResidual = self.calcStateResidual(testDataSet_data)
+        flag_data = myMSET.calcSPRT(testDataSet_data, np.array(1), decisionGroup=1)
+        # Residual.append(stateResidual)
+        flag_Spart_data.append(flag_data)
+    flag_Spart_data = np.array(flag_Spart_data)
+    flag_Spart_data = flag_Spart_data.T
+    Temp1 = flag_Spart_data[:,0]
+    Temp2 = flag_Spart_data[:,1]
+    Temp3 = flag_Spart_data[:,2]
+    Temp1_lable = "gearbox_oil_temperature"
+    Temp2_lable = "generatordrive_end_bearing_temperature"
+    Temp3_lable = "generatornon_drive_end_bearing_temperature"
+
+    print(x_date)
+
+    # alarmedFlag = np.array([[i, Temp1[i]] for i, x in enumerate(Temp1) if x > 0.99])  # Flag中选出大于0.99的点
+
+    plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
+    plt.close('all')
+    plt.subplot(311)
+    plt.plot(x_date, Temp1, 'b-o', label=Temp1_lable)
+    plt.ylabel(Temp1_lable)
+    plt.xlabel('时间')
+
+    # plt.scatter(alarmedFlag1[:, 0], alarmedFlag1[:, 2], marker='x', c='r')
+    plt.subplot(312)
+    plt.plot(x_date, Temp2)
+    plt.ylabel(Temp2_lable)
+    plt.xlabel('时间')
+    #plt.scatter(alarmedFlag[:, 0], alarmedFlag[:, 1], marker='x', c='r')
+    plt.subplot(313)
+    plt.plot(x_date, Temp3)
+    plt.ylabel(Temp3_lable)
+    plt.xlabel('时间')
+    #plt.scatter(alarmedFlag[:, 0], alarmedFlag[:, 1], marker='x', c='r')
+    plt.show()
+    print(flag_Spart_data)
+    print(np.shape(flag_Spart_data))
+
+
+    end_time = time.time()
+    execution_time = end_time - start_time
+    print(f"Execution time: {execution_time} seconds")
+
+
+
+#
+
+    '''
+    f = open("speed_vib.txt", "r")
+    data1 = f.read()
+    f.close()
+    data1 = data1.split('\n')
+    rpm = [(row.split('\t')[0]).strip() for row in data1]
+    vib = [(row.split('\t')[-1]).strip() for row in data1]
+
+    # print(rpm)
+    rpm = np.array(rpm).astype(np.float64)
+    vib = np.array(vib).astype(np.float64)
+
+    #vib = [(x-np.mean(vib))/np.std(vib) for x in vib]
+    #print(vib)
+    trainDataSet = [vib[i] for i in range(0,100) if vib[i] < 5]
+    trainDataSet = np.reshape(trainDataSet,(len(trainDataSet),1))
+    testDataSet = np.reshape(vib, (len(vib),1))
+    '''
+
+
+    # Title = pd.read_csv(r'F1710001001.csv', header=None, nrows=1, usecols=[36,37,38], encoding='gbk')
+
+    '''
+    alarmedFlag = np.array([[i, flag[i]] for i, x in enumerate(flag) if x > 0.99])  # Flag中选出大于0.99的点
+    plt.close('all')
+    plt.subplot(211)
+    plt.plot(testDataSet)
+    plt.ylabel('Vibration')
+    plt.xlabel('Sample index')
+    plt.subplot(212)
+    plt.plot(flag)
+    plt.ylabel('SPRT results')
+    plt.xlabel('Sample index')
+    plt.scatter(alarmedFlag[:,0], alarmedFlag[:,1], marker='x',c='r')
+
+    plt.show()
+    '''
+
+
+
+
+
+

BIN
__pycache__/api_20241201.cpython-39.pyc


BIN
__pycache__/cms_class_20241201.cpython-39.pyc


+ 8 - 8
cms_class_20241201.py

@@ -60,7 +60,7 @@ df_data = []
 
 # 主要的类
 class CMSAnalyst:
-    def __init__(self, fmin, fmax, table_name,ids):
+    def __init__(self, fmin, fmax, table_name, ids):
 
         # envelope_spectrum_analysis
         # datas是[df1,df2,.....]
@@ -116,7 +116,7 @@ class CMSAnalyst:
         
     def _get_by_id(self, windcode, ids):
         df_res = []
-        engine = create_engine('mysql+pymysql://root:admin123456@192.168.50.235:30306/energy_data_prod')
+        engine = create_engine('mysql+pymysql://root:admin123456@106.120.102.238:10336/energy_data_prod')
         for id in ids:
             table_name=windcode+'_wave'
             lastday_df_sql = f"SELECT * FROM {table_name} where id = {id} "
@@ -429,14 +429,14 @@ class CMSAnalyst:
 
         # 2、连接233的数据库'energy_show',从表'wind_engine_group'查找风机编号'engine_code'对应的机型编号'mill_type_code'
         engine_code = self.wind_code
-        Engine2 = create_engine('mysql+pymysql://admin:admin123456@192.168.50.233:3306/energy_show')
+        Engine2 = create_engine('mysql+pymysql://admin:admin123456@106.120.102.238:16306/energy_show')
         df_sql2 = f"SELECT * FROM {'wind_engine_group'} where engine_code = {'engine_code'} "
         df2 = pd.read_sql(df_sql2, Engine2)
         mill_type_code = df2['mill_type_code'].iloc[0]
         print(mill_type_code)
 
         # 3、从表'unit_bearings'中通过机型编号'mill_type_code'查找部件'brand'、'model'的参数信息
-        Engine3 = create_engine('mysql+pymysql://admin:admin123456@192.168.50.233:3306/energy_show')
+        Engine3 = create_engine('mysql+pymysql://admin:admin123456@106.120.102.238:16306/energy_show')
         df_sql3 = f"SELECT * FROM {'unit_bearings'} where mill_type_code = {'mill_type_code'} "
         df3 = pd.read_sql(df_sql3, Engine3)
         brand = 'front_bearing' + '_brand'  # parts代替'front_bearing'
@@ -448,7 +448,7 @@ class CMSAnalyst:
         print(_model)
 
         # 4、从表'unit_dict_brand_model'中通过'_brand'、'_model'查找部件的参数信息
-        Engine4 = create_engine('mysql+pymysql://admin:admin123456@192.168.50.233:3306/energy_show')
+        Engine4 = create_engine('mysql+pymysql://admin:admin123456@106.120.102.238:16306/energy_show')
         df_sql4 = f"SELECT * FROM unit_dict_brand_model where manufacture = %s AND model_number = %s"
         params = [(_brand, _model)]
         df4 = pd.read_sql(df_sql4, Engine4, params=params)
@@ -506,9 +506,9 @@ class CMSAnalyst:
         }
 
 if __name__ == "__main__":
-    table_name = "SKF001_wave"
-    ids = [67803,67804]
-    fmin, fmax = None, None
+    # table_name = "SKF001_wave"
+    # ids = [67803,67804]
+    # fmin, fmax = None, None
 
     cms = CMSAnalyst(fmin, fmax,table_name,ids)
     time_domain = cms.time_domain()

+ 4 - 4
connect mysql.py

@@ -12,7 +12,7 @@ import math
 #1、定义从235数据库中通过“windcode”风场编号和'ids'id号来查找波形数据记录
 def _get_by_id(windcode, ids):
     df_res = []
-    engine = create_engine('mysql+pymysql://root:admin123456@192.168.50.235:30306/energy_data_prod')
+    engine = create_engine('mysql+pymysql://root:admin123456@106.120.102.238:10336/energy_data_prod')
     for id in ids:
         table_name = windcode + '_wave'
         lastday_df_sql = f"SELECT * FROM {table_name} where id = {id} "
@@ -47,14 +47,14 @@ print(parts)
 
 #4、连接233的数据库'energy_show',从表'wind_engine_group'查找风机编号'engine_code'对应的机型编号'mill_type_code'
 engine_code = wind_code
-Engine2 = create_engine('mysql+pymysql://admin:admin123456@192.168.50.233:3306/energy_show')
+Engine2 = create_engine('mysql+pymysql://admin:admin123456@106.120.102.238:16306/energy_show')
 df_sql2 = f"SELECT * FROM {'wind_engine_group'} where engine_code = {'engine_code'} "
 df2 = pd.read_sql(df_sql2, Engine2)
 mill_type_code = df2['mill_type_code'].iloc[0]
 print(mill_type_code)
 
 #5、从表'unit_bearings'中通过机型编号'mill_type_code'查找部件'brand'、'model'的参数信息
-Engine3 = create_engine('mysql+pymysql://admin:admin123456@192.168.50.233:3306/energy_show')
+Engine3 = create_engine('mysql+pymysql://admin:admin123456@106.120.102.238:16306/energy_show')
 df_sql3 = f"SELECT * FROM {'unit_bearings'} where mill_type_code = {'mill_type_code'} "
 df3 = pd.read_sql(df_sql3, Engine3)
 brand = parts+'_brand'            # parts代替'front_bearing'
@@ -66,7 +66,7 @@ print(_brand)
 print(_model)
 
 #6、从表'unit_dict_brand_model'中通过'_brand'、'_model'查找部件的参数信息
-Engine4 = create_engine('mysql+pymysql://admin:admin123456@192.168.50.233:3306/energy_show')
+Engine4 = create_engine('mysql+pymysql://admin:admin123456@106.120.102.238:16306/energy_show')
 df_sql4 = f"SELECT * FROM unit_dict_brand_model where manufacture = %s AND model_number = %s"
 params = [(_brand,  _model)]
 df4 = pd.read_sql(df_sql4, Engine4, params=params)