张崾先筛选20241210.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import datetime
  2. import multiprocessing
  3. import os
  4. import sys
  5. import pandas as pd
  6. sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
  7. from utils.file.trans_methods import read_file_to_df, read_excel_files
  8. # read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241209'
  9. # save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/变桨-20241210'
  10. # user_cols = ['Time', '机舱外温度', '桨叶角度A', '桨叶角度B', '桨叶角度C',
  11. # '轴1电机电流', '轴2电机电流', '轴3电机电流',
  12. # '轴1电机温度', '轴2电机温度', '轴3电机温度']
  13. read_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241210'
  14. save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/偏航-20241210'
  15. user_cols = ['Time', '实际风速', '偏航误差', '电缆扭角', '激活偏航解缆阀','激活顺时针偏航','激活逆时针偏航']
  16. os.makedirs(save_dir, exist_ok=True)
  17. def read_and_save(file_path, read_dir, save_dir):
  18. begin = datetime.datetime.now()
  19. df = read_file_to_df(file_path, read_cols=user_cols)
  20. df['Time'] = pd.to_datetime(df['Time'], errors='coerce')
  21. df.sort_values(by=['Time'], inplace=True)
  22. df.to_csv(os.path.join(save_dir, os.path.basename(file_path)), index=False, encoding='utf8')
  23. print(os.path.basename(file_path), '耗时:', (datetime.datetime.now() - begin))
  24. if __name__ == '__main__':
  25. begin = datetime.datetime.now()
  26. all_files = read_excel_files(read_dir)
  27. with multiprocessing.Pool(16) as pool:
  28. pool.starmap(read_and_save, [(file, read_dir, save_dir) for file in all_files])
  29. print('总耗时:', (datetime.datetime.now() - begin))