import multiprocessing import os import pandas as pd from utils.file.trans_methods import * def boolean_is_check_data(df_cols): fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机', '远方限功率运行状态'] df_cols = [str(i).split('_')[-1] for i in df_cols] for fault in fault_list: if fault in df_cols: return True return False def compareTwoFolders(df1s, other_dfs): for is_falut in [True, False]: list1 = list() result_df = pd.DataFrame() for df1 in df1s: tmp_list = [str(i).split('_')[-1] for i in list(df1.columns) if i != 'sheet_name'] if is_falut: if boolean_is_check_data(df1.columns): list1.extend(tmp_list) else: if not boolean_is_check_data(df1.columns): list1.extend(tmp_list) set1 = set(list1) list1 = list(set1) list1.sort() list1.extend([''] * 20) result_df['风机1'] = list1 for wind_name, dfs in other_dfs.items(): list2 = list() for df in dfs: tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name'] if is_falut: if boolean_is_check_data(df.columns): list2.extend(tmp_list) else: if not boolean_is_check_data(df.columns): list2.extend(tmp_list) set2 = set(list2) list2 = list(set2) list2.sort() list3 = list(set1 - set2) list3.sort() list4 = list(set2 - set1) list4.sort() print(list3) print(list4) max_count = len(list1) list1.extend([''] * (max_count - len(list1))) list2.extend([''] * (max_count - len(list2))) list3.extend([''] * (max_count - len(list3))) list4.extend([''] * (max_count - len(list4))) result_df['风机' + str(wind_name) + '_字段'] = list2 result_df['风机' + str(wind_name) + '_比风机1少字段'] = list3 result_df['风机' + str(wind_name) + '_比风机1多字段'] = list4 file_name = 'col_compare.csv' if not is_falut else 'col_compare_fault.csv' result_df.to_csv(file_name, encoding='utf-8') if __name__ == '__main__': begin = datetime.datetime.now() dir1 = r'D:\data\新华水电\风机SCADA数据\标准' dir2 = r'D:\data\新华水电\风机SCADA数据\9月风机数据' files1 = read_excel_files(dir1) files2 = read_excel_files(dir2) with multiprocessing.Pool(10) as pool: df1s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files1]) other_dfs = dict() for root, dirs, files in os.walk(dir2): if dirs: for dir in dirs: wind_name = dir.split('#')[0] for file in os.listdir(dir2 + os.sep + dir): print(dir, file) df = read_file_to_df(os.path.join(dir2, dir, file), nrows=1) if wind_name in other_dfs.keys(): other_dfs[wind_name].append(df) else: other_dfs[wind_name] = [df] # with multiprocessing.Pool(10) as pool: # df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2]) # compareTwoFolders(df1s, other_dfs) print(datetime.datetime.now() - begin)