2 kuukautta sitten · 5c67eb907d
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
 
				+# 默认忽略的文件
			
 
				+/shelf/
			
 
				+/workspace.xml
			
--- a/.idea/Temp_Diag_Git.iml
+++ b/.idea/Temp_Diag_Git.iml
@@ -0,0 +1,8 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<module type="PYTHON_MODULE" version="4">
			
 
				+  <component name="NewModuleRootManager">
			
 
				+    <content url="file://$MODULE_DIR$" />
			
 
				+    <orderEntry type="jdk" jdkName="C:\ProgramData\anaconda3" jdkType="Python SDK" />
			
 
				+    <orderEntry type="sourceFolder" forTests="false" />
			
 
				+  </component>
			
 
				+</module>
			
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
 
				+<component name="InspectionProjectProfileManager">
			
 
				+  <settings>
			
 
				+    <option name="USE_PROJECT_PROFILE" value="false" />
			
 
				+    <version value="1.0" />
			
 
				+  </settings>
			
 
				+</component>
			
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="Black">
			
 
				+    <option name="sdkName" value="C:\ProgramData\anaconda3" />
			
 
				+  </component>
			
 
				+  <component name="ProjectRootManager" version="2" project-jdk-name="C:\ProgramData\anaconda3" project-jdk-type="Python SDK" />
			
 
				+</project>
			
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="ProjectModuleManager">
			
 
				+    <modules>
			
 
				+      <module fileurl="file://$PROJECT_DIR$/.idea/Temp_Diag_Git.iml" filepath="$PROJECT_DIR$/.idea/Temp_Diag_Git.iml" />
			
 
				+    </modules>
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,4 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="VcsDirectoryMappings" defaultProject="true" />
			
 
				+</project>
			
--- a/34_QITAIHE.xlsx
+++ b/34_QITAIHE.xlsx
--- a/Temp_Diag.PY
+++ b/Temp_Diag.PY
@@ -0,0 +1,335 @@
 
				+# @Time    : 2019-6-19
			
 
				+# @Author  : wilbur.cheng@SKF
			
 
				+# @FileName: baseMSET.py
			
 
				+# @Software: a package in skf anomaly detection library to realize multivariate state estimation technique
			
 
				+#            this package has included all the functions used in MSET, similarity calculation, state memory matrix D,
			
 
				+#            normal state matrix L, residual calculation, and the basic SPRT (Sequential Probability Ratio Test)
			
 
				+#            function for binary hypothesis testing.
			
 
				+#
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import matplotlib.pyplot as plt
			
 
				+from sklearn.neighbors import BallTree
			
 
				+import openpyxl
			
 
				+import time
			
 
				+#from scikit-learn.neighbors import BallTree
			
 
				+
			
 
				+class MSET():
			
 
				+
			
 
				+    matrixD = None
			
 
				+    matrixL = None
			
 
				+    healthyResidual = None
			
 
				+
			
 
				+    normalDataBallTree = None
			
 
				+
			
 
				+
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.model = None
			
 
				+
			
 
				+
			
 
				+    def calcSimilarity(self, x, y, m = 'euc'):
			
 
				+        """
			
 
				+        Calcualte the similartity of two feature vector
			
 
				+        :param x: one of input feature list
			
 
				+        :param y: one of input feature list
			
 
				+        :param m: method of the similarity calculation method, default is the Eucilidean distance named 'euc';
			
 
				+                    a city block distance function is used when m set to "cbd'.
			
 
				+        :return: the two feature similarity, float, range (0,1)
			
 
				+        """
			
 
				+
			
 
				+        if (len(x) != len(y)):
			
 
				+            return 0
			
 
				+
			
 
				+        if (m == 'cbd'):
			
 
				+            dSimilarity = [1/(1+np.abs(p-q)) for p,q in zip(x,y)]
			
 
				+            dSimilarity = np.sum(dSimilarity)/len(x)
			
 
				+        else:
			
 
				+            dSimilarity = [np.power(p-q,2) for p, q in zip(x, y)]
			
 
				+            dSimilarity = 1/(1+np.sqrt(np.sum(dSimilarity)))
			
 
				+
			
 
				+        return dSimilarity
			
 
				+
			
 
				+
			
 
				+    def genDLMatrix(self, trainDataset, dataSize4D=100, dataSize4L=50):
			
 
				+        """
			
 
				+        automatically generate the D and L matrix from training data set, assuming the training data set is all normal
			
 
				+        state data.
			
 
				+        :param trainDataset: 2D array, [count of data， length of feature]
			
 
				+        :param dataSize4D: minimized count of state for matrix D
			
 
				+        :param dataSize4L: minimized count of state for matrix L
			
 
				+        :return: 0 if successful, -1 if fail
			
 
				+        """
			
 
				+        [m,n] = np.shape(trainDataset)
			
 
				+
			
 
				+        if m < dataSize4D + dataSize4L:
			
 
				+            print('training dataset is too small to generate the matrix D and L')
			
 
				+            return -1
			
 
				+
			
 
				+        self.matrixD = []
			
 
				+        selectIndex4D = []
			
 
				+        # Step 1: add all the state with minimum or maximum value in each feature into Matrix D
			
 
				+
			
 
				+        for i in range(0, n):
			
 
				+            feature_i = trainDataset[:,i]
			
 
				+            minIndex = np.argmin(feature_i)
			
 
				+            maxIndex = np.argmax(feature_i)
			
 
				+
			
 
				+            self.matrixD.append(trainDataset[minIndex, :].tolist())
			
 
				+            selectIndex4D.append(minIndex)
			
 
				+            self.matrixD.append(trainDataset[maxIndex, :].tolist())
			
 
				+            selectIndex4D.append(maxIndex)
			
 
				+
			
 
				+        # Step 2: iteratively add the state with the maximum average distance to the states in selected matrixD
			
 
				+        while(1):
			
 
				+
			
 
				+            if (len(selectIndex4D) >= dataSize4D):
			
 
				+                break
			
 
				+
			
 
				+            # Get the free state list
			
 
				+            freeStateList = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
			
 
				+
			
 
				+            # Calculate the average dist of each state in free to selected state in matrix D
			
 
				+            distList = []
			
 
				+            for i in freeStateList:
			
 
				+                tmpState = trainDataset[i]
			
 
				+                tmpDist = [1-self.calcSimilarity(x, tmpState) for x in self.matrixD]
			
 
				+                distList.append(np.mean(tmpDist))
			
 
				+
			
 
				+            # select the free state with largest average distance to states in matrixD, and update matrixD
			
 
				+            selectId = freeStateList[distList.index(np.max(distList))]
			
 
				+            self.matrixD.append(trainDataset[selectId, :].tolist())
			
 
				+            selectIndex4D.append(selectId)
			
 
				+        #format matrixD from list to array
			
 
				+        self.matrixD = np.array(self.matrixD)
			
 
				+        self.normalDataBallTree = BallTree(self.matrixD, leaf_size=4, metric = lambda i,j: 1-self.calcSimilarity(i,j))
			
 
				+
			
 
				+
			
 
				+        # Step 3. select remaining state for matrix L
			
 
				+        #index4L = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
			
 
				+        #self.matrixL = trainDataset[index4L, :]
			
 
				+        # consider the limited dataset, using all the train data to matrix L
			
 
				+        self.matrixL = trainDataset
			
 
				+
			
 
				+        # Calculate the healthy Residual from matrix L
			
 
				+        lamdaRatio = 1e-3
			
 
				+        [m, n] = np.shape(self.matrixD)
			
 
				+        self.DDSimilarity = np.array([[1-self.calcSimilarity(x,y) for x in self.matrixD] for y in self.matrixD] + lamdaRatio*np.eye(n))
			
 
				+        self.DDSimilarity = 1/self.DDSimilarity
			
 
				+
			
 
				+        #self.healthyResidual = self.calcResidual(self.matrixL)
			
 
				+        self.healthyResidual = self.calcResidualByLocallyWeightedLR(self.matrixL)
			
 
				+
			
 
				+
			
 
				+        return 0
			
 
				+
			
 
				+    def calcResidualByLocallyWeightedLR(self, newStates):
			
 
				+        """
			
 
				+        find the K-nearest neighbors for each input state, then calculate the estimated state by locally weighted average.
			
 
				+        :param newStates: input states list
			
 
				+        :return: residual R_x
			
 
				+        """
			
 
				+        [m,n] = np.shape(newStates)
			
 
				+        est_X = []
			
 
				+        # print(newStates)
			
 
				+        for x in newStates:
			
 
				+            (dist, iList) = self.normalDataBallTree.query([x], 20, return_distance=True)
			
 
				+            weight = 1/(dist[0]+1e-1)
			
 
				+            weight = weight/sum(weight)
			
 
				+            eState = np.sum([w*self.matrixD[i] for w,i in zip(weight, iList[0])])
			
 
				+            est_X.append(eState)
			
 
				+
			
 
				+        est_X = np.reshape(est_X, (len(est_X),1))
			
 
				+        # print(est_X)
			
 
				+        # print(newStates)
			
 
				+        return est_X - newStates
			
 
				+
			
 
				+    def calcStateResidual(self, newsStates):
			
 
				+        stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
			
 
				+        return stateResidual
			
 
				+
			
 
				+    def calcSPRT(self, newsStates, feature_weight, alpha=0.1, beta=0.1, decisionGroup=5):
			
 
				+        """
			
 
				+        anomaly detection based Wald's SPRT algorithm, refer to A.Wald, Sequential Analysis,Wiley, New York, NY, USA, 1947
			
 
				+        :param newsStates: input states list
			
 
				+        :param feature_weight: the important weight for each feature, Normalized and Nonnegative
			
 
				+        :param alpha: prescribed false alarm rate， 0 < alpha < 1
			
 
				+        :param beta: prescribed miss alarm rate, 0 < beta < 1
			
 
				+        :param decisionGroup: length of the test sample when the decision is done
			
 
				+
			
 
				+        :return: anomaly flag for each group of states, 1:anomaly, -1:normal, (-1:1): unable to decision
			
 
				+        """
			
 
				+
			
 
				+        # Step 1. transfer the raw residual vector to dimension reduced residual using feature weight
			
 
				+        #stateResidual = self.calcResidual(newsStates)
			
 
				+        stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
			
 
				+        # print(stateResidual)
			
 
				+        weightedStateResidual = [np.dot(x, feature_weight) for x in stateResidual]
			
 
				+        # print(weightedStateResidual)
			
 
				+        weightedHealthyResidual = [np.dot(x, feature_weight) for x in self.healthyResidual]
			
 
				+
			
 
				+        '''
			
 
				+        plt.subplot(211)
			
 
				+        plt.plot(weightedHealthyResidual)
			
 
				+        plt.xlabel('Sample index')
			
 
				+        plt.ylabel('Healthy State Residual')
			
 
				+        plt.subplot(212)
			
 
				+        plt.plot(weightedStateResidual)
			
 
				+        plt.xlabel('Sample index')
			
 
				+        plt.ylabel('All State Residual')
			
 
				+        plt.show()
			
 
				+        '''
			
 
				+        # Calculate the distribution of health state residual
			
 
				+        mu0 = np.mean(weightedHealthyResidual)
			
 
				+        sigma0 = np.std(weightedHealthyResidual)
			
 
				+
			
 
				+
			
 
				+        #sigma1 = np.std(weightedStateResidual)
			
 
				+
			
 
				+        lowThres = np.log(beta/(1-alpha))
			
 
				+        highThres = np.log((1-beta)/alpha)
			
 
				+
			
 
				+        flag = []
			
 
				+        for i in range(0, len(newsStates)-decisionGroup+1):
			
 
				+            # For each group to calculate the new state residual distribution
			
 
				+            # Then check the  hypothesis testing results
			
 
				+            mu1 = np.mean(weightedStateResidual[i:i+decisionGroup])
			
 
				+            si = np.sum(weightedStateResidual[i:i+decisionGroup])*(mu1-mu0)/sigma0**2 - decisionGroup*(mu1**2-mu0**2)/(2*sigma0**2)
			
 
				+
			
 
				+            if (si > highThres):
			
 
				+                si = highThres
			
 
				+            if (si < lowThres):
			
 
				+                si = lowThres
			
 
				+
			
 
				+            if (si > 0):
			
 
				+                si = si/highThres
			
 
				+            if (si < 0):
			
 
				+                si = si/lowThres
			
 
				+
			
 
				+            flag.append(si)
			
 
				+
			
 
				+        return flag
			
 
				+
			
 
				+if __name__=='__main__':
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    myMSET = MSET()
			
 
				+    # 1、计算各子系统的健康度(子系统包括：发电机组、传动系统(直驱机组无齿轮箱、无数据)、机舱系统、变流器系统、电网环境、辅助系统（无数据))
			
 
				+    # 1.1、发电机组健康度评分
			
 
				+    Title = pd.read_excel(r'34_QITAIHE.xlsx', header=None, nrows=1, usecols=[12, 13, 14])
			
 
				+    df_D = pd.read_excel(r'34_QITAIHE.xlsx',
			
 
				+                           usecols=[0,12, 13, 14,], parse_dates=True)  # 读取温度指标：齿轮箱油温12、驱动侧发电机轴承温度13、非驱动侧发电机轴承温度14
			
 
				+
			
 
				+    cols = df_D.columns
			
 
				+    df_D[cols] = df_D[cols].apply(pd.to_numeric, errors='coerce')
			
 
				+    df_D = df_D.dropna()  # 删除空行和非数字项
			
 
				+
			
 
				+    x_date = df_D.iloc[len(df_D) // 2 + 1:, 0]  #获取时间序列
			
 
				+    x_date = pd.to_datetime(x_date)
			
 
				+    df_Temp = df_D.iloc[:, 1:]  #获取除时间列以外的数据
			
 
				+    df_Temp_values = df_Temp.values
			
 
				+    df_Temp_values = np.array(df_Temp_values)
			
 
				+    [m, n] = np.shape(df_Temp_values)
			
 
				+    # Residual = []
			
 
				+    flag_Spart_data = []
			
 
				+    for i in range(0, n):
			
 
				+        df_Temp_i = df_Temp_values[:, i]
			
 
				+        trainDataSet_data = df_Temp_i[0:len(df_Temp_i) // 2]
			
 
				+        testDataSet_data = df_Temp_i[len(df_Temp_i) // 2 + 1:]
			
 
				+        trainDataSet_data = np.reshape(trainDataSet_data, (len(trainDataSet_data), 1))
			
 
				+        testDataSet_data = np.reshape(testDataSet_data, (len(testDataSet_data), 1))
			
 
				+
			
 
				+        myMSET.genDLMatrix(trainDataSet_data, 60, 5)
			
 
				+        # stateResidual = self.calcStateResidual(testDataSet_data)
			
 
				+        flag_data = myMSET.calcSPRT(testDataSet_data, np.array(1), decisionGroup=1)
			
 
				+        # Residual.append(stateResidual)
			
 
				+        flag_Spart_data.append(flag_data)
			
 
				+    flag_Spart_data = np.array(flag_Spart_data)
			
 
				+    flag_Spart_data = flag_Spart_data.T
			
 
				+    Temp1 = flag_Spart_data[:,0]
			
 
				+    Temp2 = flag_Spart_data[:,1]
			
 
				+    Temp3 = flag_Spart_data[:,2]
			
 
				+    Temp1_lable = Title.iloc[0, 0]
			
 
				+    Temp2_lable = Title.iloc[0, 1]
			
 
				+    Temp3_lable = Title.iloc[0, 2]
			
 
				+
			
 
				+    print(x_date)
			
 
				+
			
 
				+    # alarmedFlag = np.array([[i, Temp1[i]] for i, x in enumerate(Temp1) if x > 0.99])  # Flag中选出大于0.99的点
			
 
				+
			
 
				+    plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
			
 
				+    plt.close('all')
			
 
				+    plt.subplot(311)
			
 
				+    plt.plot(x_date, Temp1, 'b-o', label=Temp1_lable)
			
 
				+    plt.ylabel(Temp1_lable)
			
 
				+    plt.xlabel('时间')
			
 
				+
			
 
				+    # plt.scatter(alarmedFlag1[:, 0], alarmedFlag1[:, 2], marker='x', c='r')
			
 
				+    plt.subplot(312)
			
 
				+    plt.plot(x_date, Temp2)
			
 
				+    plt.ylabel(Temp2_lable)
			
 
				+    plt.xlabel('时间')
			
 
				+    #plt.scatter(alarmedFlag[:, 0], alarmedFlag[:, 1], marker='x', c='r')
			
 
				+    plt.subplot(313)
			
 
				+    plt.plot(x_date, Temp3)
			
 
				+    plt.ylabel(Temp3_lable)
			
 
				+    plt.xlabel('时间')
			
 
				+    #plt.scatter(alarmedFlag[:, 0], alarmedFlag[:, 1], marker='x', c='r')
			
 
				+    plt.show()
			
 
				+    print(Title)
			
 
				+    print(flag_Spart_data)
			
 
				+    print(np.shape(flag_Spart_data))
			
 
				+
			
 
				+
			
 
				+    end_time = time.time()
			
 
				+    execution_time = end_time - start_time
			
 
				+    print(f"Execution time: {execution_time} seconds")
			
 
				+
			
 
				+
			
 
				+
			
 
				+#
			
 
				+
			
 
				+    '''
			
 
				+    f = open("speed_vib.txt", "r")
			
 
				+    data1 = f.read()
			
 
				+    f.close()
			
 
				+    data1 = data1.split('\n')
			
 
				+    rpm = [(row.split('\t')[0]).strip() for row in data1]
			
 
				+    vib = [(row.split('\t')[-1]).strip() for row in data1]
			
 
				+
			
 
				+    # print(rpm)
			
 
				+    rpm = np.array(rpm).astype(np.float64)
			
 
				+    vib = np.array(vib).astype(np.float64)
			
 
				+
			
 
				+    #vib = [(x-np.mean(vib))/np.std(vib) for x in vib]
			
 
				+    #print(vib)
			
 
				+    trainDataSet = [vib[i] for i in range(0,100) if vib[i] < 5]
			
 
				+    trainDataSet = np.reshape(trainDataSet,(len(trainDataSet),1))
			
 
				+    testDataSet = np.reshape(vib, (len(vib),1))
			
 
				+    '''
			
 
				+
			
 
				+
			
 
				+    # Title = pd.read_csv(r'F1710001001.csv', header=None, nrows=1, usecols=[36,37,38], encoding='gbk')
			
 
				+
			
 
				+    '''
			
 
				+    alarmedFlag = np.array([[i, flag[i]] for i, x in enumerate(flag) if x > 0.99])  # Flag中选出大于0.99的点
			
 
				+    plt.close('all')
			
 
				+    plt.subplot(211)
			
 
				+    plt.plot(testDataSet)
			
 
				+    plt.ylabel('Vibration')
			
 
				+    plt.xlabel('Sample index')
			
 
				+    plt.subplot(212)
			
 
				+    plt.plot(flag)
			
 
				+    plt.ylabel('SPRT results')
			
 
				+    plt.xlabel('Sample index')
			
 
				+    plt.scatter(alarmedFlag[:,0], alarmedFlag[:,1], marker='x',c='r')
			
 
				+
			
 
				+    plt.show()
			
 
				+    '''
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+