import datetime import multiprocessing import os import pandas as pd read_dir = r'/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF01000010-WOB000002_ZY24年7-9月秒级/second' save_dir = r'/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF01000010-WOB000002_ZY24年7-9月秒级/second_select_yaw_error1_20241014' def read_and_select_and_save(file): df = pd.read_csv(read_dir + os.sep + file, usecols=['active_power', 'wind_velocity', 'pitch_angle_blade_1', 'yaw_error1', 'lab']) df = df[df['yaw_error1'] <= 360] df['yaw_error1'] = df['yaw_error1'].apply(lambda x: x - 360 if 180 <= x <= 360 else x) condition = (df['active_power'] > 0) & (df['wind_velocity'] > 0) df = df[condition] df.to_csv(os.path.join(save_dir, file), index=False, encoding='utf8') print(f'{file}处理完成') if __name__ == '__main__': begin = datetime.datetime.now() with multiprocessing.Pool(32) as pool: pool.starmap(read_and_select_and_save, [(file,) for file in os.listdir(read_dir)]) print(f'总耗时:{datetime.datetime.now() - begin}')