1234567891011121314151617181920212223242526272829303132 |
- import multiprocessing
- import os
- import pandas as pd
- read_dir = 'D:\data\张崾先风电场\故障事件数据'
- save_dir = 'D:\data\崾先风电场\故障事件数据整理'
- print(os.listdir(read_dir))
- def read_solve_data(file_dir):
- base_dir = os.path.basename(file_dir)
- df = pd.DataFrame()
- for file in os.listdir(file_dir):
- df = pd.concat([df, pd.read_csv(file_dir + '/' + file, encoding='gbk')])
- df['开始时间'] = pd.to_datetime(df['开始时间'], errors='coerce')
- df = df.query("(开始时间 >= '2024-01-01 00:00:00') & (开始时间 < '2024-12-01 00:00:00')")
- df['month'] = df['开始时间'].dt.month
- months = df['month'].unique()
- for month in months:
- df_month = df[df['month'] == month]
- os.makedirs(save_dir + os.sep + base_dir, exist_ok=True)
- df_month.to_csv(save_dir + os.sep + base_dir + os.sep + str(month) + '.csv', index=False)
- if __name__ == '__main__':
- dirs = os.listdir(read_dir)
- with multiprocessing.Pool(4) as pool:
- pool.map(read_solve_data, [read_dir + os.sep + i for i in dirs])
|