from multiprocessing import Pool import sys, os path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) print(path) sys.path.insert(0, path) print(sys.path) from utils.file.trans_methods import * from utils.systeminfo.sysinfo import use_files_get_max_cpu_count def read_and_save_file(filename): try: basename = os.path.basename(filename) wind_number = basename.split("_")[0] df = read_file_to_df(filename, header=1) df['风机号'] = wind_number df['描述'] = pd.to_datetime(df['描述'], format='%d-%m-%Y %H:%M:%S') df.set_index(keys=['描述', '风机号'], inplace=True) return wind_number, df except Exception as e: print(basename, 'error') raise e if __name__ == '__main__': read_path = r'/data/download/collection_data/1进行中/枫香风电场-贵州-大唐/收资数据/枫香风电场收资表/1.10分钟SCADA数据' save_path = r'/data/download/collection_data/1进行中/枫香风电场-贵州-大唐/清理数据/枫香风电场收资表/1.10分钟SCADA数据' # read_path = r'D:\trans_data\枫香\收资数据\min' # save_path = r'D:\trans_data\枫香\清理数据\min' create_file_path(save_path, False) all_fils = read_excel_files(read_path) process_count = use_files_get_max_cpu_count(all_fils) with Pool(process_count) as pool: results = pool.starmap(read_and_save_file, [(i,) for i in all_fils]) df_dict = dict() for result in results: wind_number, df = result cols = list(df.columns) cols.sort() cols_str = '-'.join(cols) if wind_number in df_dict.keys(): if cols_str in df_dict[wind_number].keys(): df_dict[wind_number][cols_str] = pd.concat([df_dict[wind_number][cols_str], df], axis=0) else: df_dict[wind_number][cols_str] = df else: df_dict[wind_number] = {cols_str: df} for wind_number, cols_dict in df_dict.items(): df = pd.concat(cols_dict.values(), axis=1) df.sort_index(inplace=True) df.reset_index(inplace=True) df.to_csv(os.path.join(save_path, f"{wind_number}.csv"), encoding="utf-8", index=False)