1234567891011121314151617181920212223242526 |
- import datetime
- import multiprocessing
- import os
- import pandas as pd
- read_dir = r'/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF01000010-WOB000002_ZY24年7-9月秒级/second'
- save_dir = r'/data/download/collection_data/2完成/招远风电场-山东-大唐/清理数据/WOF01000010-WOB000002_ZY24年7-9月秒级/second_select_yaw_error1_20241014'
- def read_and_select_and_save(file):
- df = pd.read_csv(read_dir + os.sep + file,
- usecols=['active_power', 'wind_velocity', 'pitch_angle_blade_1', 'yaw_error1', 'lab'])
- df = df[df['yaw_error1'] <= 360]
- df['yaw_error1'] = df['yaw_error1'].apply(lambda x: x - 360 if 180 <= x <= 360 else x)
- condition = (df['active_power'] > 0) & (df['wind_velocity'] > 0)
- df = df[condition]
- df.to_csv(os.path.join(save_dir, file), index=False, encoding='utf8')
- print(f'{file}处理完成')
- if __name__ == '__main__':
- begin = datetime.datetime.now()
- with multiprocessing.Pool(32) as pool:
- pool.starmap(read_and_select_and_save, [(file,) for file in os.listdir(read_dir)])
- print(f'总耗时:{datetime.datetime.now() - begin}')
|