import multiprocessing from utils.file.trans_methods import * def boolean_is_check_data(df_vas): # fault_list = ['Checked', 'Indeterminate', 'Unchecked'] # for fault in fault_list: # if fault in df_vas: # return True return False def compareTwoFolders(df1s, df2s): for is_falut in [False]: list1 = list() for df in df1s: tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name'] if is_falut: if boolean_is_check_data(df.values): list1.extend(tmp_list) else: if not boolean_is_check_data(df.values): list1.extend(tmp_list) list2 = list() for df in df2s: tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name'] if is_falut: if boolean_is_check_data(df.values): list2.extend(tmp_list) else: if not boolean_is_check_data(df.values): list2.extend(tmp_list) set1 = set(list1) set2 = set(list2) list1 = list(set1) list2 = list(set2) list1.sort() list2.sort() print(list1) print(list2) list3 = list(set1 - set2) list3.sort() list4 = list(set2 - set1) list4.sort() print(list3) print(list4) list5 = list(set1) list5.extend(list(set2)) list5 = list(set(list5)) list5.sort() max_count = len(list5) list1.extend([''] * (max_count - len(list1))) list2.extend([''] * (max_count - len(list2))) list3.extend([''] * (max_count - len(list3))) list4.extend([''] * (max_count - len(list4))) file_name = 'col_compare.csv' if not is_falut else 'col_compare_falut.csv' with open(file_name, 'w', encoding='utf8') as f: f.write(",".join(["全部字段", "对方提供(3-25风机)", "自己获取(1-2风机)", "对方提供多的字段", "自己提供多的字段"])) f.write('\n') for e, a, b, c, d in zip(list5, list1, list2, list3, list4): f.write(",".join([e, a, b, c, d])) f.write('\n') f.flush() if __name__ == '__main__': begin = datetime.datetime.now() dir1 = r'D:\data\新华水电\风机SCADA数据\9月风机数据_对方复制' dir2 = r'D:\data\新华水电\风机SCADA数据\自己复制' files1 = read_excel_files(dir1) files2 = read_excel_files(dir2) with multiprocessing.Pool(10) as pool: df1s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files1]) with multiprocessing.Pool(10) as pool: df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2]) compareTwoFolders(df1s, df2s) print(datetime.datetime.now() - begin)