9 ay önce · a48b07fae9
--- a/service/plt_service.py
+++ b/service/plt_service.py
@@ -159,3 +159,5 @@ if __name__ == '__main__':
 
				     print(get_data_by_batch_no_and_type("test_", "second"))
			
 
				     # print(update_trans_status_success("test_唐龙-定时任务测试", "second", 10))
			
 
				     begin = datetime.datetime.now()
			
 
				+
			
 
				+    print(get_all_wind('WOF091200006'))
			
--- a/tmp_file/organize_xinhua_files.py
+++ b/tmp_file/organize_xinhua_files.py
@@ -50,7 +50,7 @@ def create_file_path(path, is_file_path=False):
 
				 
			
 
				 
			
 
				 def boolean_is_check_data(df_cols):
			
 
				-    fault_list = ['快速停机', '故障名称', '故障代码', '故障停机']
			
 
				+    fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机']
			
 
				 
			
 
				     df_cols = [str(i).split('_')[-1] for i in df_cols]
			
 
				     for fault in fault_list:
			
--- a/tmp_file/对比文件夹列名差值.py
+++ b/tmp_file/对比文件夹列名差值.py
@@ -1,92 +1,106 @@
 
				 import multiprocessing
			
 
				+import os
			
 
				+
			
 
				+import pandas as pd
			
 
				 
			
 
				 from utils.file.trans_methods import *
			
 
				 
			
 
				 
			
 
				-def boolean_is_check_data(df_vas):
			
 
				-    # fault_list = ['Checked', 'Indeterminate', 'Unchecked']
			
 
				-    # for fault in fault_list:
			
 
				-    #     if fault in df_vas:
			
 
				-    #         return True
			
 
				+def boolean_is_check_data(df_cols):
			
 
				+    fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机', '远方限功率运行状态']
			
 
				+
			
 
				+    df_cols = [str(i).split('_')[-1] for i in df_cols]
			
 
				+    for fault in fault_list:
			
 
				+        if fault in df_cols:
			
 
				+            return True
			
 
				 
			
 
				     return False
			
 
				 
			
 
				 
			
 
				-def compareTwoFolders(df1s, df2s):
			
 
				-    for is_falut in [False]:
			
 
				+def compareTwoFolders(df1s, other_dfs):
			
 
				+    for is_falut in [True, False]:
			
 
				         list1 = list()
			
 
				-        for df in df1s:
			
 
				-            tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
			
 
				+        result_df = pd.DataFrame()
			
 
				+        for df1 in df1s:
			
 
				+            tmp_list = [str(i).split('_')[-1] for i in list(df1.columns) if i != 'sheet_name']
			
 
				             if is_falut:
			
 
				-                if boolean_is_check_data(df.values):
			
 
				+                if boolean_is_check_data(df1.columns):
			
 
				                     list1.extend(tmp_list)
			
 
				             else:
			
 
				-                if not boolean_is_check_data(df.values):
			
 
				+                if not boolean_is_check_data(df1.columns):
			
 
				                     list1.extend(tmp_list)
			
 
				 
			
 
				-        list2 = list()
			
 
				-        for df in df2s:
			
 
				-            tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
			
 
				-            if is_falut:
			
 
				-                if boolean_is_check_data(df.values):
			
 
				-                    list2.extend(tmp_list)
			
 
				-            else:
			
 
				-                if not boolean_is_check_data(df.values):
			
 
				-                    list2.extend(tmp_list)
			
 
				-
			
 
				         set1 = set(list1)
			
 
				-        set2 = set(list2)
			
 
				 
			
 
				         list1 = list(set1)
			
 
				-        list2 = list(set2)
			
 
				         list1.sort()
			
 
				-        list2.sort()
			
 
				+        list1.extend([''] * 20)
			
 
				+
			
 
				+        result_df['风机1'] = list1
			
 
				 
			
 
				-        print(list1)
			
 
				-        print(list2)
			
 
				+        for wind_name, dfs in other_dfs.items():
			
 
				+            list2 = list()
			
 
				+            for df in dfs:
			
 
				+                tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
			
 
				+                if is_falut:
			
 
				+                    if boolean_is_check_data(df.columns):
			
 
				+                        list2.extend(tmp_list)
			
 
				+                else:
			
 
				+                    if not boolean_is_check_data(df.columns):
			
 
				+                        list2.extend(tmp_list)
			
 
				 
			
 
				-        list3 = list(set1 - set2)
			
 
				-        list3.sort()
			
 
				+            set2 = set(list2)
			
 
				+            list2 = list(set2)
			
 
				+            list2.sort()
			
 
				 
			
 
				-        list4 = list(set2 - set1)
			
 
				-        list4.sort()
			
 
				-        print(list3)
			
 
				-        print(list4)
			
 
				+            list3 = list(set1 - set2)
			
 
				+            list3.sort()
			
 
				 
			
 
				-        list5 = list(set1)
			
 
				-        list5.extend(list(set2))
			
 
				-        list5 = list(set(list5))
			
 
				-        list5.sort()
			
 
				+            list4 = list(set2 - set1)
			
 
				+            list4.sort()
			
 
				+            print(list3)
			
 
				+            print(list4)
			
 
				 
			
 
				-        max_count = len(list5)
			
 
				-        list1.extend([''] * (max_count - len(list1)))
			
 
				-        list2.extend([''] * (max_count - len(list2)))
			
 
				-        list3.extend([''] * (max_count - len(list3)))
			
 
				-        list4.extend([''] * (max_count - len(list4)))
			
 
				+            max_count = len(list1)
			
 
				+            list1.extend([''] * (max_count - len(list1)))
			
 
				+            list2.extend([''] * (max_count - len(list2)))
			
 
				+            list3.extend([''] * (max_count - len(list3)))
			
 
				+            list4.extend([''] * (max_count - len(list4)))
			
 
				 
			
 
				-        file_name = 'col_compare.csv' if not is_falut else 'col_compare_falut.csv'
			
 
				-        with open(file_name, 'w', encoding='utf8') as f:
			
 
				-            f.write(",".join(["全部字段", "对方提供(3-25风机)", "自己获取(1-2风机)", "对方提供多的字段", "自己提供多的字段"]))
			
 
				-            f.write('\n')
			
 
				-            for e, a, b, c, d in zip(list5, list1, list2, list3, list4):
			
 
				-                f.write(",".join([e, a, b, c, d]))
			
 
				-                f.write('\n')
			
 
				+            result_df['风机' + str(wind_name) + '_字段'] = list2
			
 
				+            result_df['风机' + str(wind_name) + '_比风机1少字段'] = list3
			
 
				+            result_df['风机' + str(wind_name) + '_比风机1多字段'] = list4
			
 
				 
			
 
				-            f.flush()
			
 
				+        file_name = 'col_compare.csv' if not is_falut else 'col_compare_fault.csv'
			
 
				+        result_df.to_csv(file_name, encoding='utf-8')
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     begin = datetime.datetime.now()
			
 
				-    dir1 = r'D:\data\新华水电\风机SCADA数据\9月风机数据_对方复制'
			
 
				-    dir2 = r'D:\data\新华水电\风机SCADA数据\自己复制'
			
 
				+    dir1 = r'D:\data\新华水电\风机SCADA数据\标准'
			
 
				+    dir2 = r'D:\data\新华水电\风机SCADA数据\9月风机数据'
			
 
				     files1 = read_excel_files(dir1)
			
 
				     files2 = read_excel_files(dir2)
			
 
				     with multiprocessing.Pool(10) as pool:
			
 
				         df1s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files1])
			
 
				 
			
 
				-    with multiprocessing.Pool(10) as pool:
			
 
				-        df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2])
			
 
				-
			
 
				-    compareTwoFolders(df1s, df2s)
			
 
				+    other_dfs = dict()
			
 
				+    for root, dirs, files in os.walk(dir2):
			
 
				+        if dirs:
			
 
				+            for dir in dirs:
			
 
				+                wind_name = dir.split('#')[0]
			
 
				+                for file in os.listdir(dir2 + os.sep + dir):
			
 
				+                    print(dir, file)
			
 
				+                    df = read_file_to_df(os.path.join(dir2, dir, file), nrows=1)
			
 
				+                    if wind_name in other_dfs.keys():
			
 
				+                        other_dfs[wind_name].append(df)
			
 
				+                    else:
			
 
				+                        other_dfs[wind_name] = [df]
			
 
				+
			
 
				+    # with multiprocessing.Pool(10) as pool:
			
 
				+    #     df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2])
			
 
				+    #
			
 
				+
			
 
				+    compareTwoFolders(df1s, other_dfs)
			
 
				 
			
 
				     print(datetime.datetime.now() - begin)
			
--- a/utils/db/ConnectMysql.py
+++ b/utils/db/ConnectMysql.py
@@ -13,7 +13,8 @@ from utils.log.trans_log import trans_print
 
				 class ConnectMysql:
			
 
				 
			
 
				     def __init__(self, connet_name):
			
 
				-        self.yaml_data = yaml_conf(os.environ.get('ETL_CONF'))
			
 
				+        config_path = os.path.abspath(__file__).split("utils")[0] + 'conf' + os.sep + 'etl_config_dev.yaml'
			
 
				+        self.yaml_data = yaml_conf(os.environ.get('ETL_CONF', config_path))
			
 
				         self.connet_name = connet_name
			
 
				         self.config = self.yaml_data[self.connet_name]