import multiprocessing import os import pandas as pd read_dir = 'D:\data\张崾先风电场\故障事件数据' save_dir = 'D:\data\崾先风电场\故障事件数据整理' print(os.listdir(read_dir)) def read_solve_data(file_dir): base_dir = os.path.basename(file_dir) df = pd.DataFrame() for file in os.listdir(file_dir): df = pd.concat([df, pd.read_csv(file_dir + '/' + file, encoding='gbk')]) df['开始时间'] = pd.to_datetime(df['开始时间'], errors='coerce') df = df.query("(开始时间 >= '2024-01-01 00:00:00') & (开始时间 < '2024-12-01 00:00:00')") df['month'] = df['开始时间'].dt.month months = df['month'].unique() for month in months: df_month = df[df['month'] == month] os.makedirs(save_dir + os.sep + base_dir, exist_ok=True) df_month.to_csv(save_dir + os.sep + base_dir + os.sep + str(month) + '.csv', index=False) if __name__ == '__main__': dirs = os.listdir(read_dir) with multiprocessing.Pool(4) as pool: pool.map(read_solve_data, [read_dir + os.sep + i for i in dirs])