# -*- coding: utf-8 -*- """ Spyder 编辑器 这是一个临时脚本文件。 """ import datetime import multiprocessing from os import * import numpy as np import pandas as pd dianjian_str = """ wind_turbine_number time_stamp 时间 active_power 有功功率 kW rotor_speed 风轮转速 rpm generator_speed 发电机转速 rpm wind_velocity 风速 m/s pitch_angle_blade_1 叶片1角度 ° pitch_angle_blade_2 叶片2角度 ° pitch_angle_blade_3 叶片3角度 ° cabin_position 机舱位置 ° true_wind_direction yaw_error1 风向 ° twisted_cable_angle main_bearing_temperature 主轴温度 ℃ gearbox_oil_temperature 齿轮箱温度 ℃ gearbox_low_speed_shaft_bearing_temperature 齿轮箱轴承温度 ℃ gearboxmedium_speed_shaftbearing_temperature gearbox_high_speed_shaft_bearing_temperature 齿轮箱轴承温度2 ℃ generatordrive_end_bearing_temperature 发电机驱动侧轴承温度 ℃ generatornon_drive_end_bearing_temperature 发电机非驱动侧轴承温度 ℃ cabin_temperature 机舱温度 ℃ outside_cabin_temperature 舱外温度 ℃ generator_winding1_temperature generator_winding2_temperature generator_winding3_temperature front_back_vibration_of_the_cabin side_to_side_vibration_of_the_cabin required_gearbox_speed inverter_speed_master_control actual_torque given_torque clockwise_yaw_count counterclockwise_yaw_count unusable power_curve_available set_value_of_active_power 有功功率设定 kW wind_turbine_status wind_turbine_status2 turbulence_intensity """ datas = [i for i in dianjian_str.split("\n") if i] dianjian_dict = dict() for data in datas: ds = data.split("\t") if len(ds) == 3: dianjian_dict[ds[0]] = ds[2] else: dianjian_dict[ds[0]] = '' def read_df(file_path): df = pd.read_csv(file_path, header=[0, 1]) col_nams_map = dict() pre_col = "" for tuple_col in df.columns: col1 = tuple_col[0] col2 = tuple_col[1] if str(col1).startswith("Unnamed"): if pre_col: col1 = pre_col pre_col = '' else: col1 = '' else: pre_col = col1 if str(col2).startswith("Unnamed"): col2 = '' col_nams_map[str(tuple_col)] = ''.join([col1, col2]) # print(col_nams_map) # for k, v in col_nams_map.items(): # if str(v).endswith('采样值'): # col_nams_map[k] = str(v)[:-3] df.columns = [str(col) for col in df.columns] df.rename(columns=col_nams_map, inplace=True) # for col, name in dianjian_dict.items(): # if name in df.columns: # df.rename(columns={name: col}, inplace=True) # for col in df.columns: # if col not in dianjian_dict.keys(): # del df[col] return df def get_wind_name_files(path): files = listdir(path) return files def combine_df(save_path, file): begin = datetime.datetime.now() df = read_df(file) print("读取", file, df.shape) df.replace("-", np.nan,inplace=True) df.to_csv(path.join(save_path, path.basename(file)), encoding='utf-8', index=False) print('整理完成', '耗时:', (datetime.datetime.now() - begin).seconds) if __name__ == '__main__': read_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/min-666' save_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/清理数据/min-666' # read_path = r'D:\trans_data\诺木洪\收资数据\min-666' # save_path = r'D:\trans_data\诺木洪\清理数据\min-666' if not path.exists(save_path): makedirs(save_path, exist_ok=True) with multiprocessing.Pool(20) as pool: pool.starmap(combine_df, [(save_path, read_path + sep + file) for file in listdir(read_path)])