Browse Source

添加新华两方文件对比

wzl 9 months ago
parent
commit
9ca512a4bb
2 changed files with 89 additions and 1 deletions
  1. 87 0
      tmp_file/对比文件夹列名差值.py
  2. 2 1
      utils/log/trans_log.py

+ 87 - 0
tmp_file/对比文件夹列名差值.py

@@ -0,0 +1,87 @@
+import multiprocessing
+
+from utils.file.trans_methods import *
+
+
+def boolean_is_check_data(df_vas):
+    fault_list = ['Checked', 'Indeterminate', 'Unchecked']
+    for fault in fault_list:
+        if fault in df_vas:
+            return True
+
+    return False
+
+
+def compareTwoFolders(df1s, df2s):
+    for is_falut in [False, True]:
+        list1 = list()
+        for df in df1s:
+            tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
+            if is_falut:
+                if boolean_is_check_data(df.values):
+                    list1.extend(tmp_list)
+            else:
+                if not boolean_is_check_data(df.values):
+                    list1.extend(tmp_list)
+
+        list2 = list()
+        for df in df2s:
+            tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
+            if is_falut:
+                if boolean_is_check_data(df.values):
+                    list2.extend(tmp_list)
+            else:
+                if not boolean_is_check_data(df.values):
+                    list2.extend(tmp_list)
+
+        set1 = set(list1)
+        set2 = set(list2)
+
+        list1 = list(set1)
+        list2 = list(set2)
+        list1.sort()
+        list2.sort()
+
+        print(list1)
+        print(list2)
+
+        list3 = list(set1 - set2)
+        list3.sort()
+
+        list4 = list(set2 - set1)
+        list4.sort()
+        print(list3)
+        print(list4)
+
+        max_count = max(len(list1), len(list2), len(list3), len(list4))
+        list1.extend([''] * (max_count - len(list1)))
+        list2.extend([''] * (max_count - len(list2)))
+        list3.extend([''] * (max_count - len(list3)))
+        list4.extend([''] * (max_count - len(list4)))
+
+        file_name = 'col_compare.csv' if not is_falut else 'col_compare_falut.csv'
+        with open(file_name, 'w', encoding='utf8') as f:
+            f.write(",".join(["对方提供", "自己获取", "对方提供多的字段", "自己提供多的字段"]))
+            f.write('\n')
+            for a, b, c, d in zip(list1, list2, list3, list4):
+                f.write(",".join([a, b, c, d]))
+                f.write('\n')
+
+            f.flush()
+
+
+if __name__ == '__main__':
+    begin = datetime.datetime.now()
+    dir1 = r'D:\data\新华水电\风机SCADA数据\9月风机数据_对方复制'
+    dir2 = r'D:\data\新华水电\风机SCADA数据\自己复制'
+    files1 = read_excel_files(dir1)
+    files2 = read_excel_files(dir2)
+    with multiprocessing.Pool(10) as pool:
+        df1s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files1])
+
+    with multiprocessing.Pool(10) as pool:
+        df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2])
+
+    compareTwoFolders(df1s, df2s)
+
+    print(datetime.datetime.now() - begin)

+ 2 - 1
utils/log/trans_log.py

@@ -35,7 +35,8 @@ stout_handle.setLevel(logging.INFO)
 stout_handle.addFilter(ContextFilter())
 logger.addHandler(stout_handle)
 
-config = yaml_conf(os.environ.get('ETL_CONF'))
+config_path = os.path.abspath(__file__).split("utils")[0] + 'conf' + os.sep + 'etl_config_dev.yaml'
+config = yaml_conf(os.environ.get('ETL_CONF', config_path))
 log_path_dir = read_conf(config, 'log_path_dir', "/data/logs")
 
 log_path = log_path_dir + os.sep + r'etl_tools_' + (os.environ['env'] if 'env' in os.environ else 'dev')