|
@@ -0,0 +1,87 @@
|
|
|
+import multiprocessing
|
|
|
+
|
|
|
+from utils.file.trans_methods import *
|
|
|
+
|
|
|
+
|
|
|
+def boolean_is_check_data(df_vas):
|
|
|
+ fault_list = ['Checked', 'Indeterminate', 'Unchecked']
|
|
|
+ for fault in fault_list:
|
|
|
+ if fault in df_vas:
|
|
|
+ return True
|
|
|
+
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+def compareTwoFolders(df1s, df2s):
|
|
|
+ for is_falut in [False, True]:
|
|
|
+ list1 = list()
|
|
|
+ for df in df1s:
|
|
|
+ tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
|
|
|
+ if is_falut:
|
|
|
+ if boolean_is_check_data(df.values):
|
|
|
+ list1.extend(tmp_list)
|
|
|
+ else:
|
|
|
+ if not boolean_is_check_data(df.values):
|
|
|
+ list1.extend(tmp_list)
|
|
|
+
|
|
|
+ list2 = list()
|
|
|
+ for df in df2s:
|
|
|
+ tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
|
|
|
+ if is_falut:
|
|
|
+ if boolean_is_check_data(df.values):
|
|
|
+ list2.extend(tmp_list)
|
|
|
+ else:
|
|
|
+ if not boolean_is_check_data(df.values):
|
|
|
+ list2.extend(tmp_list)
|
|
|
+
|
|
|
+ set1 = set(list1)
|
|
|
+ set2 = set(list2)
|
|
|
+
|
|
|
+ list1 = list(set1)
|
|
|
+ list2 = list(set2)
|
|
|
+ list1.sort()
|
|
|
+ list2.sort()
|
|
|
+
|
|
|
+ print(list1)
|
|
|
+ print(list2)
|
|
|
+
|
|
|
+ list3 = list(set1 - set2)
|
|
|
+ list3.sort()
|
|
|
+
|
|
|
+ list4 = list(set2 - set1)
|
|
|
+ list4.sort()
|
|
|
+ print(list3)
|
|
|
+ print(list4)
|
|
|
+
|
|
|
+ max_count = max(len(list1), len(list2), len(list3), len(list4))
|
|
|
+ list1.extend([''] * (max_count - len(list1)))
|
|
|
+ list2.extend([''] * (max_count - len(list2)))
|
|
|
+ list3.extend([''] * (max_count - len(list3)))
|
|
|
+ list4.extend([''] * (max_count - len(list4)))
|
|
|
+
|
|
|
+ file_name = 'col_compare.csv' if not is_falut else 'col_compare_falut.csv'
|
|
|
+ with open(file_name, 'w', encoding='utf8') as f:
|
|
|
+ f.write(",".join(["对方提供", "自己获取", "对方提供多的字段", "自己提供多的字段"]))
|
|
|
+ f.write('\n')
|
|
|
+ for a, b, c, d in zip(list1, list2, list3, list4):
|
|
|
+ f.write(",".join([a, b, c, d]))
|
|
|
+ f.write('\n')
|
|
|
+
|
|
|
+ f.flush()
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ begin = datetime.datetime.now()
|
|
|
+ dir1 = r'D:\data\新华水电\风机SCADA数据\9月风机数据_对方复制'
|
|
|
+ dir2 = r'D:\data\新华水电\风机SCADA数据\自己复制'
|
|
|
+ files1 = read_excel_files(dir1)
|
|
|
+ files2 = read_excel_files(dir2)
|
|
|
+ with multiprocessing.Pool(10) as pool:
|
|
|
+ df1s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files1])
|
|
|
+
|
|
|
+ with multiprocessing.Pool(10) as pool:
|
|
|
+ df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2])
|
|
|
+
|
|
|
+ compareTwoFolders(df1s, df2s)
|
|
|
+
|
|
|
+ print(datetime.datetime.now() - begin)
|