lcb 1 mēnesi atpakaļ
revīzija
0e534ca827

+ 3 - 0
.idea/.gitignore

@@ -0,0 +1,3 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml

+ 8 - 0
.idea/Health_Evaluation_Git.iml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="C:\ProgramData\anaconda3" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

+ 6 - 0
.idea/inspectionProfiles/profiles_settings.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

+ 4 - 0
.idea/misc.xml

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="C:\ProgramData\anaconda3" project-jdk-type="Python SDK" />
+</project>

+ 8 - 0
.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Health_Evaluation_Git.iml" filepath="$PROJECT_DIR$/.idea/Health_Evaluation_Git.iml" />
+    </modules>
+  </component>
+</project>

+ 4 - 0
.idea/vcs.xml

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings" defaultProject="true" />
+</project>

BIN
F26_SEP_ZHANGYAOXIAN.xlsx


+ 418 - 0
balltreeMSET.PY

@@ -0,0 +1,418 @@
+# @Time    : 2019-6-19
+# @Author  : wilbur.cheng@SKF
+# @FileName: baseMSET.py
+# @Software: a package in skf anomaly detection library to realize multivariate state estimation technique
+#            this package has included all the functions used in MSET, similarity calculation, state memory matrix D,
+#            normal state matrix L, residual calculation, and the basic SPRT (Sequential Probability Ratio Test)
+#            function for binary hypothesis testing.
+#
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.neighbors import BallTree
+import openpyxl
+import time
+#from scikit-learn.neighbors import BallTree
+
+class MSET():
+
+    matrixD = None
+    matrixL = None
+    healthyResidual = None
+
+    normalDataBallTree = None
+
+
+
+    def __init__(self):
+        self.model = None
+
+
+    def calcSimilarity(self, x, y, m = 'euc'):
+        """
+        Calcualte the similartity of two feature vector
+        :param x: one of input feature list
+        :param y: one of input feature list
+        :param m: method of the similarity calculation method, default is the Eucilidean distance named 'euc';
+                    a city block distance function is used when m set to "cbd'.
+        :return: the two feature similarity, float, range (0,1)
+        """
+
+        if (len(x) != len(y)):
+            return 0
+
+        if (m == 'cbd'):
+            dSimilarity = [1/(1+np.abs(p-q)) for p,q in zip(x,y)]
+            dSimilarity = np.sum(dSimilarity)/len(x)
+        else:
+            dSimilarity = [np.power(p-q,2) for p, q in zip(x, y)]
+            dSimilarity = 1/(1+np.sqrt(np.sum(dSimilarity)))
+
+        return dSimilarity
+
+
+    def genDLMatrix(self, trainDataset, dataSize4D=100, dataSize4L=50):
+        """
+        automatically generate the D and L matrix from training data set, assuming the training data set is all normal
+        state data.
+        :param trainDataset: 2D array, [count of data, length of feature]
+        :param dataSize4D: minimized count of state for matrix D
+        :param dataSize4L: minimized count of state for matrix L
+        :return: 0 if successful, -1 if fail
+        """
+        [m,n] = np.shape(trainDataset)
+
+        if m < dataSize4D + dataSize4L:
+            print('training dataset is too small to generate the matrix D and L')
+            return -1
+
+        self.matrixD = []
+        selectIndex4D = []
+        # Step 1: add all the state with minimum or maximum value in each feature into Matrix D
+
+        for i in range(0, n):
+            feature_i = trainDataset[:,i]
+            minIndex = np.argmin(feature_i)
+            maxIndex = np.argmax(feature_i)
+
+            self.matrixD.append(trainDataset[minIndex, :].tolist())
+            selectIndex4D.append(minIndex)
+            self.matrixD.append(trainDataset[maxIndex, :].tolist())
+            selectIndex4D.append(maxIndex)
+
+        # Step 2: iteratively add the state with the maximum average distance to the states in selected matrixD
+        while(1):
+
+            if (len(selectIndex4D) >= dataSize4D):
+                break
+
+            # Get the free state list
+            freeStateList = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
+
+            # Calculate the average dist of each state in free to selected state in matrix D
+            distList = []
+            for i in freeStateList:
+                tmpState = trainDataset[i]
+                tmpDist = [1-self.calcSimilarity(x, tmpState) for x in self.matrixD]
+                distList.append(np.mean(tmpDist))
+
+            # select the free state with largest average distance to states in matrixD, and update matrixD
+            selectId = freeStateList[distList.index(np.max(distList))]
+            self.matrixD.append(trainDataset[selectId, :].tolist())
+            selectIndex4D.append(selectId)
+        #format matrixD from list to array
+        self.matrixD = np.array(self.matrixD)
+        self.normalDataBallTree = BallTree(self.matrixD, leaf_size=4, metric = lambda i,j: 1-self.calcSimilarity(i,j))
+
+
+        # Step 3. select remaining state for matrix L
+        #index4L = list(set(np.arange(0, len(trainDataset))) - set(selectIndex4D))
+        #self.matrixL = trainDataset[index4L, :]
+        # consider the limited dataset, using all the train data to matrix L
+        self.matrixL = trainDataset
+
+        # Calculate the healthy Residual from matrix L
+        lamdaRatio = 1e-3
+        [m, n] = np.shape(self.matrixD)
+        self.DDSimilarity = np.array([[1-self.calcSimilarity(x,y) for x in self.matrixD] for y in self.matrixD] + lamdaRatio*np.eye(n))
+        self.DDSimilarity = 1/self.DDSimilarity
+
+        #self.healthyResidual = self.calcResidual(self.matrixL)
+        self.healthyResidual = self.calcResidualByLocallyWeightedLR(self.matrixL)
+
+
+        return 0
+
+    def calcResidualByLocallyWeightedLR(self, newStates):
+        """
+        find the K-nearest neighbors for each input state, then calculate the estimated state by locally weighted average.
+        :param newStates: input states list
+        :return: residual R_x
+        """
+        [m,n] = np.shape(newStates)
+        est_X = []
+        # print(newStates)
+        for x in newStates:
+            (dist, iList) = self.normalDataBallTree.query([x], 20, return_distance=True)
+            weight = 1/(dist[0]+1e-1)
+            weight = weight/sum(weight)
+            eState = np.sum([w*self.matrixD[i] for w,i in zip(weight, iList[0])])
+            est_X.append(eState)
+
+        est_X = np.reshape(est_X, (len(est_X),1))
+        # print(est_X)
+        # print(newStates)
+        return est_X - newStates
+
+    def calcStateResidual(self, newsStates):
+        stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
+        return stateResidual
+
+    def calcSPRT(self, newsStates, feature_weight, alpha=0.1, beta=0.1, decisionGroup=5):
+        """
+        anomaly detection based Wald's SPRT algorithm, refer to A.Wald, Sequential Analysis,Wiley, New York, NY, USA, 1947
+        :param newsStates: input states list
+        :param feature_weight: the important weight for each feature, Normalized and Nonnegative
+        :param alpha: prescribed false alarm rate, 0 < alpha < 1
+        :param beta: prescribed miss alarm rate, 0 < beta < 1
+        :param decisionGroup: length of the test sample when the decision is done
+
+        :return: anomaly flag for each group of states, 1:anomaly, -1:normal, (-1:1): unable to decision
+        """
+
+        # Step 1. transfer the raw residual vector to dimension reduced residual using feature weight
+        #stateResidual = self.calcResidual(newsStates)
+        stateResidual = self.calcResidualByLocallyWeightedLR(newsStates)
+        # print(stateResidual)
+        weightedStateResidual = [np.dot(x, feature_weight) for x in stateResidual]
+        # print(weightedStateResidual)
+        weightedHealthyResidual = [np.dot(x, feature_weight) for x in self.healthyResidual]
+
+        '''
+        plt.subplot(211)
+        plt.plot(weightedHealthyResidual)
+        plt.xlabel('Sample index')
+        plt.ylabel('Healthy State Residual')
+        plt.subplot(212)
+        plt.plot(weightedStateResidual)
+        plt.xlabel('Sample index')
+        plt.ylabel('All State Residual')
+        plt.show()
+        '''
+        # Calculate the distribution of health state residual
+        mu0 = np.mean(weightedHealthyResidual)
+        sigma0 = np.std(weightedHealthyResidual)
+
+
+        #sigma1 = np.std(weightedStateResidual)
+
+        lowThres = np.log(beta/(1-alpha))
+        highThres = np.log((1-beta)/alpha)
+
+        flag = []
+        for i in range(0, len(newsStates)-decisionGroup+1):
+            # For each group to calculate the new state residual distribution
+            # Then check the  hypothesis testing results
+            mu1 = np.mean(weightedStateResidual[i:i+decisionGroup])
+            si = np.sum(weightedStateResidual[i:i+decisionGroup])*(mu1-mu0)/sigma0**2 - decisionGroup*(mu1**2-mu0**2)/(2*sigma0**2)
+
+            if (si > highThres):
+                si = highThres
+            if (si < lowThres):
+                si = lowThres
+
+            if (si > 0):
+                si = si/highThres
+            if (si < 0):
+                si = si/lowThres
+            si = 100-si*100
+            flag.append(si)
+
+        return flag
+
+    def CRITIC_prepare(self, data, flag=1): # 计算权重前的数据预处理
+        '''
+
+        :param data: 输入数据,类型是DataFrame
+        :param flag: flag=0特征正向归一化,flag=1特征负向归一化
+        :return:返回 DataFrame数据
+        '''
+        data_columns = data.columns.values
+        maxnum = np.max(data, axis=0)
+        minnum = np.min(data, axis=0)
+        if flag == 0:  # 正向指标归一化计算
+            Y = (data - minnum) * 1.0 / (maxnum - minnum)
+        if flag == 1:  # 负向指标归一化计算
+            Y = (maxnum - data) / (maxnum - minnum)
+
+        # 对ln0处理
+        Y0 = np.array(Y * 1.0)
+        Y0[np.where(Y0 == 0)] = 0.00001
+        Y0 = pd.DataFrame(Y0, columns=data_columns)
+        return Y0
+
+    def CRITIC(self, data):  # CRITIC客观法计算子系统中各测点的权重
+        '''
+
+        :param data: 归一化预处理之后的DataFrame数据
+        :return: 返回权重Series以及按指标排序后的得分项
+        '''
+        n, m = data.shape
+        s = np.std(data, axis=0)
+        r = np.corrcoef(data, rowvar=False)
+        a = np.sum(1 - r, axis=1)
+        c = s * a
+        w = c / np.sum(c)
+        # score = np.round(np.sum(data * w, axis=1), 6)
+        # data['score'] = score
+        # data.sort_values(by=['score'], ascending=False, inplace=True)
+        return w
+
+    def calcHealthy(self, df_data):    # 计算子系统的健康度
+        cols = df_data.columns
+        df_data[cols] = df_data[cols].apply(pd.to_numeric, errors='coerce')
+        df_data = df_data.dropna()  # 删除空行和非数字项
+
+        W_prepare_data = self.CRITIC_prepare(df_data)
+        Weights_data = self.CRITIC(W_prepare_data)
+
+
+        df_data_values = df_data.values
+        df_data_values = np.array(df_data_values)
+        [m, n] = np.shape(df_data_values)
+        # Residual = []
+        flag_Spart_data = []
+        for i in range(0, n):
+            df_data_i = df_data_values[:, i]
+            trainDataSet_data = df_data_i[0:len(df_data_i) // 2]
+            testDataSet_data = df_data_i[len(df_data_i) // 2 + 1:]
+            trainDataSet_data = np.reshape(trainDataSet_data, (len(trainDataSet_data), 1))
+            testDataSet_data = np.reshape(testDataSet_data, (len(testDataSet_data), 1))
+
+            self.genDLMatrix(trainDataSet_data, 60, 5)
+            # stateResidual = self.calcStateResidual(testDataSet_data)
+            flag_data = self.calcSPRT(testDataSet_data, np.array(1), decisionGroup=1)
+            # Residual.append(stateResidual)
+            flag_Spart_data.append(flag_data)
+        # weights = np.array([1/3, 1/3, 1/3])
+        W_data = np.array(Weights_data)
+        W_data = W_data.reshape(-1, 1)
+        flag_Spart_data = np.array(flag_Spart_data)
+        flag_Spart_data = flag_Spart_data.T
+        Score_data = np.dot(flag_Spart_data, W_data)
+        Health_data = np.mean(Score_data)
+
+        return Weights_data, Health_data
+
+    def ahp(self, matrix): #层次分析法ahp(主观)计算风机各子系统的权重
+        # 计算特征值和特征向量
+        eigenvalue, eigenvector = np.linalg.eig(matrix)
+        # 提取最大特征值对应的特征向量,并归一化
+        max_eigenvalue_index = np.argmax(eigenvalue)
+        max_eigenvalue = eigenvalue[max_eigenvalue_index]
+        weight = eigenvector[:, max_eigenvalue_index]
+        weight = weight / np.sum(weight)
+        weight = weight.real
+        return weight, max_eigenvalue
+
+    def consistency_check(self, matrix, weight): #层次分析法ahp的一致性检验
+        n = len(matrix)
+        # 计算一致性指标 CI
+        lambda_max = np.sum(np.dot(matrix, weight) / weight) / n
+        CI = (lambda_max - n) / (n - 1)
+
+        # 随机一致性指标 RI 的值,这里我们假设矩阵大小不超过10
+        RI_list = [0, 0, 0.58, 0.90, 1.12, 1.24, 1.32, 1.41, 1.45, 1.49, 1.52, 1.54, 1.56, 1.58, 1.59, 1.61, 1.63, 1.64,
+                   1.65]
+        RI = RI_list[n - 1]
+
+        # 计算一致性比例 CR
+        CR = CI / RI
+        return CI, CR
+
+if __name__=='__main__':
+
+    start_time = time.time()
+    myMSET = MSET()
+    # 1、计算各子系统的健康度(子系统包括:发电机组、传动系统(直驱机组无齿轮箱、无数据)、机舱系统、变流器系统、电网环境、辅助系统(无数据))
+    # 1.1、发电机组健康度评分
+    df_Gen = pd.read_excel(r'F26_SEP_ZHANGYAOXIAN.xlsx',
+                           usecols=[10, 11, 12, 17])  # 读取发电机组指标:发电机U相温度10、发电机V相温度11、发电机W相温度12、发电机轴承温度17
+    W_Gen,Health_Gen = myMSET.calcHealthy(df_Gen)
+    print(W_Gen)
+    print('发电机组健康度:', Health_Gen)
+
+    # 1.2、机舱系统健康度评分
+    df_Nac = pd.read_excel(r'F26_SEP_ZHANGYAOXIAN.xlsx',
+                           usecols=[23,24,41,42]) # 读取机舱系统指标:塔筒前后振动23、塔筒左右振动24、机舱位置41、机舱温度42
+    W_Nac, Health_Nac = myMSET.calcHealthy(df_Nac)
+    print(W_Nac)
+    print('机舱系统健康度:', Health_Nac)
+
+    # 1.3、变流器系统健康度评分
+    df_Con = pd.read_excel(r'F26_SEP_ZHANGYAOXIAN.xlsx',
+                           usecols=[18,19]) # 读取变流器系统指标:变流器有功功率19、变流器冷却介质温度18
+    W_Con, Health_Con = myMSET.calcHealthy(df_Con)
+    print(W_Con)
+    print('变流器系统健康度:', Health_Con)
+
+    # 1.4、电网环境健康度评分
+    df_Grid = pd.read_excel(r'F26_SEP_ZHANGYAOXIAN.xlsx',
+                            usecols=[32,38,64,65,66]) # 读取电网环境指标:有功功率38、无功功率32、电网A相电流64、电网B相电流65、电网C相电流66
+    W_Grid, Health_Grid = myMSET.calcHealthy(df_Grid)
+    print(W_Grid)
+    print('电网环境健康度:', Health_Grid)
+
+    # 2、计算各子系统的权重
+    # 输入判断矩阵(发电机组、机舱系统、变流器系统、电网环境)
+    matrix_subsys = np.array([
+        [1, 2, 3, 4],
+        [1/2, 1, 2, 3],
+        [1/3, 1/2, 1, 2],
+        [1/4, 1/3, 1/2, 1],
+    ])
+    # 计算判断矩阵的权重和最大特征值
+    weight_subsys, max_eigenvalue_subsys = myMSET.ahp(matrix_subsys)
+    print(weight_subsys)
+
+    # 检查一致性
+    CI1, CR1 = myMSET.consistency_check(matrix_subsys, weight_subsys)
+
+    # 3、计算整机的健康度
+    Score_subsys = np.array([Health_Gen, Health_Nac, Health_Con, Health_Grid])
+    weight_subsys = weight_subsys.reshape(-1, 1)
+    Score_WT = np.dot(Score_subsys, weight_subsys)
+    print('整机健康度:', Score_WT)
+
+    end_time = time.time()
+    execution_time = end_time - start_time
+    print(f"Execution time: {execution_time} seconds")
+
+
+
+#
+
+    '''
+    f = open("speed_vib.txt", "r")
+    data1 = f.read()
+    f.close()
+    data1 = data1.split('\n')
+    rpm = [(row.split('\t')[0]).strip() for row in data1]
+    vib = [(row.split('\t')[-1]).strip() for row in data1]
+
+    # print(rpm)
+    rpm = np.array(rpm).astype(np.float64)
+    vib = np.array(vib).astype(np.float64)
+
+    #vib = [(x-np.mean(vib))/np.std(vib) for x in vib]
+    #print(vib)
+    trainDataSet = [vib[i] for i in range(0,100) if vib[i] < 5]
+    trainDataSet = np.reshape(trainDataSet,(len(trainDataSet),1))
+    testDataSet = np.reshape(vib, (len(vib),1))
+    '''
+
+
+    # Title = pd.read_csv(r'F1710001001.csv', header=None, nrows=1, usecols=[36,37,38], encoding='gbk')
+
+    '''
+    alarmedFlag = np.array([[i, flag[i]] for i, x in enumerate(flag) if x > 0.99])  # Flag中选出大于0.99的点
+    plt.close('all')
+    plt.subplot(211)
+    plt.plot(testDataSet)
+    plt.ylabel('Vibration')
+    plt.xlabel('Sample index')
+    plt.subplot(212)
+    plt.plot(flag)
+    plt.ylabel('SPRT results')
+    plt.xlabel('Sample index')
+    plt.scatter(alarmedFlag[:,0], alarmedFlag[:,1], marker='x',c='r')
+
+    plt.show()
+    '''
+
+
+
+
+
+