select_part_cols.py 1.1 KB

123456789101112131415161718192021222324252627
  1. import datetime
  2. import multiprocessing
  3. from os import *
  4. import pandas as pd
  5. read_dir = r'/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF01000010-WOB000002_ZY24年7-9月秒级/second'
  6. save_dir = r'/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF01000010-WOB000002_ZY24年7-9月秒级/second_select_yaw_error1_20241014'
  7. def read_and_select_and_save(file):
  8. df = pd.read_csv(read_dir + sep + file,
  9. usecols=['active_power', 'wind_velocity', 'pitch_angle_blade_1', 'yaw_error1', 'lab'])
  10. df = df[df['yaw_error1'] <= 360]
  11. df['yaw_error1'] = df['yaw_error1'].apply(lambda x: x - 360 if 180 <= x <= 360 else x)
  12. condition = (df['active_power'] > 0) & (df['wind_velocity'] > 0)
  13. df = df[condition]
  14. df.to_csv(path.join(save_dir, file), index=False, encoding='utf8')
  15. print(f'{file}处理完成')
  16. if __name__ == '__main__':
  17. begin = datetime.datetime.now()
  18. with multiprocessing.Pool(32) as pool:
  19. pool.starmap(read_and_select_and_save, [(file,) for file in listdir(read_dir)])
  20. print(f'总耗时:{datetime.datetime.now() - begin}')