张崾先风电场-故障整理.py 1.1 KB

1234567891011121314151617181920212223242526272829303132
  1. import multiprocessing
  2. import os
  3. import pandas as pd
  4. read_dir = 'D:\data\张崾先风电场\故障事件数据'
  5. save_dir = 'D:\data\崾先风电场\故障事件数据整理'
  6. print(os.listdir(read_dir))
  7. def read_solve_data(file_dir):
  8. base_dir = os.path.basename(file_dir)
  9. df = pd.DataFrame()
  10. for file in os.listdir(file_dir):
  11. df = pd.concat([df, pd.read_csv(file_dir + '/' + file, encoding='gbk')])
  12. df['开始时间'] = pd.to_datetime(df['开始时间'], errors='coerce')
  13. df = df.query("(开始时间 >= '2024-01-01 00:00:00') & (开始时间 < '2024-12-01 00:00:00')")
  14. df['month'] = df['开始时间'].dt.month
  15. months = df['month'].unique()
  16. for month in months:
  17. df_month = df[df['month'] == month]
  18. os.makedirs(save_dir + os.sep + base_dir, exist_ok=True)
  19. df_month.to_csv(save_dir + os.sep + base_dir + os.sep + str(month) + '.csv', index=False)
  20. if __name__ == '__main__':
  21. dirs = os.listdir(read_dir)
  22. with multiprocessing.Pool(4) as pool:
  23. pool.map(read_solve_data, [read_dir + os.sep + i for i in dirs])