123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 |
- # -*- coding: utf-8 -*-
- """
- Spyder 编辑器
- 这是一个临时脚本文件。
- """
- import datetime
- import multiprocessing
- from os import *
- import numpy as np
- import pandas as pd
- dianjian_str = """
- wind_turbine_number
- time_stamp 时间
- active_power 有功功率 kW
- rotor_speed 风轮转速 rpm
- generator_speed 发电机转速 rpm
- wind_velocity 风速 m/s
- pitch_angle_blade_1 叶片1角度 °
- pitch_angle_blade_2 叶片2角度 °
- pitch_angle_blade_3 叶片3角度 °
- cabin_position 机舱位置 °
- true_wind_direction
- yaw_error1 风向 °
- twisted_cable_angle
- main_bearing_temperature 主轴温度 ℃
- gearbox_oil_temperature 齿轮箱温度 ℃
- gearbox_low_speed_shaft_bearing_temperature 齿轮箱轴承温度 ℃
- gearboxmedium_speed_shaftbearing_temperature
- gearbox_high_speed_shaft_bearing_temperature 齿轮箱轴承温度2 ℃
- generatordrive_end_bearing_temperature 发电机驱动侧轴承温度 ℃
- generatornon_drive_end_bearing_temperature 发电机非驱动侧轴承温度 ℃
- cabin_temperature 机舱温度 ℃
- outside_cabin_temperature 舱外温度 ℃
- generator_winding1_temperature
- generator_winding2_temperature
- generator_winding3_temperature
- front_back_vibration_of_the_cabin
- side_to_side_vibration_of_the_cabin
- required_gearbox_speed
- inverter_speed_master_control
- actual_torque
- given_torque
- clockwise_yaw_count
- counterclockwise_yaw_count
- unusable
- power_curve_available
- set_value_of_active_power 有功功率设定 kW
- wind_turbine_status
- wind_turbine_status2
- turbulence_intensity
- """
- datas = [i for i in dianjian_str.split("\n") if i]
- dianjian_dict = dict()
- for data in datas:
- ds = data.split("\t")
- if len(ds) == 3:
- dianjian_dict[ds[0]] = ds[2]
- else:
- dianjian_dict[ds[0]] = ''
- def read_df(file_path):
- df = pd.read_csv(file_path, header=[0, 1])
- col_nams_map = dict()
- pre_col = ""
- for tuple_col in df.columns:
- col1 = tuple_col[0]
- col2 = tuple_col[1]
- if str(col1).startswith("Unnamed"):
- if pre_col:
- col1 = pre_col
- pre_col = ''
- else:
- col1 = ''
- else:
- pre_col = col1
- if str(col2).startswith("Unnamed"):
- col2 = ''
- col_nams_map[str(tuple_col)] = ''.join([col1, col2])
- # print(col_nams_map)
- # for k, v in col_nams_map.items():
- # if str(v).endswith('采样值'):
- # col_nams_map[k] = str(v)[:-3]
- df.columns = [str(col) for col in df.columns]
- df.rename(columns=col_nams_map, inplace=True)
- # for col, name in dianjian_dict.items():
- # if name in df.columns:
- # df.rename(columns={name: col}, inplace=True)
- # for col in df.columns:
- # if col not in dianjian_dict.keys():
- # del df[col]
- return df
- def get_wind_name_files(path):
- files = listdir(path)
- return files
- def combine_df(save_path, file):
- begin = datetime.datetime.now()
- df = read_df(file)
- print("读取", file, df.shape)
- df.replace("-", np.nan,inplace=True)
- df.to_csv(path.join(save_path, path.basename(file)), encoding='utf-8', index=False)
- print('整理完成', '耗时:', (datetime.datetime.now() - begin).seconds)
- if __name__ == '__main__':
- read_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/收资数据/min-666'
- save_path = r'/data/download/collection_data/1进行中/诺木洪风电场-甘肃-华电/清理数据/min-666'
- # read_path = r'D:\trans_data\诺木洪\收资数据\min-666'
- # save_path = r'D:\trans_data\诺木洪\清理数据\min-666'
- if not path.exists(save_path):
- makedirs(save_path, exist_ok=True)
- with multiprocessing.Pool(20) as pool:
- pool.starmap(combine_df, [(save_path, read_path + sep + file) for file in listdir(read_path)])
|