from utils.file.trans_methods import * def boolean_is_check_data(df_cols): fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机', '工作模式'] df_cols = [str(i).split('_')[-1] for i in df_cols] for fault in fault_list: if fault in df_cols: return True return False def compareTwoFolders(list1, other_dfs): for is_falut in [True]: result_df = pd.DataFrame() # for df1 in df1s: # tmp_list = [str(i).split('_')[-1] for i in list(df1.columns) if i != 'sheet_name'] # if is_falut: # if boolean_is_check_data(df1.columns): # list1.extend(tmp_list) # else: # if not boolean_is_check_data(df1.columns): # list1.extend(tmp_list) set1 = set(list1) list1 = list(set1) list1.sort() result_df['完整列名'] = list1 for wind_name, dfs in other_dfs.items(): list2 = list() for df in dfs: tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name'] if is_falut: if boolean_is_check_data(df.columns): list2.extend(tmp_list) else: if not boolean_is_check_data(df.columns): list2.extend(tmp_list) set2 = set(list2) list2 = list(set2) list2.sort() list3 = list(set1 - set2) list3.sort() # list4 = list(set2 - set1) # list4.sort() # print(list3) # print(list4) max_count = len(list1) list1.extend([''] * (max_count - len(list1))) list2.extend([''] * (max_count - len(list2))) list3.extend([''] * (max_count - len(list3))) # list4.extend([''] * (max_count - len(list4))) result_df[str(wind_name) + '字段'] = list2 result_df[str(wind_name) + '比完整列名少字段'] = list3 # result_df['风机' + str(wind_name) + '_比风机1多字段'] = list4 file_name = 'col_compare.csv' if not is_falut else 'col_compare_fault.csv' result_df.to_csv(file_name, encoding='utf-8', index=False) if __name__ == '__main__': begin = datetime.datetime.now() dir2 = r'D:\data\新华水电\风机SCADA数据' files2 = read_excel_files(dir2) other_dfs = dict() list1 = list() for file in files2: month = path.basename(path.dirname(path.dirname(file)))[0:2] wind_name = month + path.basename(path.dirname(file)).split('#')[0] + '号风机' df = read_file_to_df(file, nrows=1) if boolean_is_check_data(df.columns): list1.extend([str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']) if wind_name in other_dfs.keys(): other_dfs[wind_name].append(df) else: other_dfs[wind_name] = [df] # with multiprocessing.Pool(10) as pool: # df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2]) # list1 = [i for i in list(set(list1)) if i != 'sheet_name'] compareTwoFolders(list1, other_dfs) print(datetime.datetime.now() - begin)