1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- import datetime
- import os
- import warnings
- warnings.filterwarnings("ignore")
- def __build_directory_dict(directory_dict, path, filter_types=None):
- # 遍历目录下的所有项
- for item in os.listdir(path):
- item_path = os.path.join(path, item)
- if os.path.isdir(item_path):
- __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
- elif os.path.isfile(item_path):
- if path not in directory_dict:
- directory_dict[path] = []
- if filter_types is None or len(filter_types) == 0:
- directory_dict[path].append(item_path)
- elif str(item_path).split(".")[-1] in filter_types:
- if str(item_path).count("~$") == 0:
- directory_dict[path].append(item_path)
- # 读取路径下所有的excel文件
- def read_excel_files(read_path, filter_types=None):
- if not os.path.exists(read_path):
- return []
- if filter_types is None:
- filter_types = ['xls', 'xlsx', 'csv', 'gz']
- if os.path.isfile(read_path):
- return [read_path]
- directory_dict = {}
- __build_directory_dict(directory_dict, read_path, filter_types=filter_types)
- return [path for paths in directory_dict.values() for path in paths if path]
- if __name__ == '__main__':
- read_dir = r'C:\Users\wzl\Downloads\Compressed\491_盂县验证'
- begin = datetime.datetime.now()
- all_files = read_excel_files(read_dir)
- for index, file in enumerate(all_files):
- now_begin = datetime.datetime.now()
- # 文件/home/trans/下载/盂县_data/491_盂县验证/202504/18.csv,总个数258,当前第202个写入数据库耗时:0:00:12.924708
- if index < 202:
- continue
- print(index, file)
- # df = pd.read_csv(file)
- # columns_str = 'wind_turbine_number,wind_turbine_name,time_stamp,active_power,rotor_speed,generator_speed,wind_velocity,pitch_angle_blade_1,pitch_angle_blade_2,pitch_angle_blade_3,cabin_position,true_wind_direction,yaw_error1,set_value_of_active_power,gearbox_oil_temperature,generatordrive_end_bearing_temperature,generatornon_drive_end_bearing_temperature,cabin_temperature,twisted_cable_angle,front_back_vibration_of_the_cabin,side_to_side_vibration_of_the_cabin,actual_torque,given_torque,clockwise_yaw_count,counterclockwise_yaw_count,unusable,power_curve_available,required_gearbox_speed,inverter_speed_master_control,outside_cabin_temperature,main_bearing_temperature,gearbox_high_speed_shaft_bearing_temperature,gearboxmedium_speed_shaftbearing_temperature,gearbox_low_speed_shaft_bearing_temperature,generator_winding1_temperature,generator_winding2_temperature,generator_winding3_temperature,wind_turbine_status,wind_turbine_status2,turbulence_intensity,lab,year,month,day,year_month'
- # cols = columns_str.split(',')
- #
- # for col in cols:
- # if col not in df.columns:
- # df[col] = np.nan
- #
- # trans_service.load_data_local('WOF35800082_second', df)
- # print(
- # f'文件{file},总个数{len(all_files)},当前第{index + 1}个写入数据库耗时:{datetime.datetime.now() - now_begin}')
- print(f'总耗时:{datetime.datetime.now() - begin}')
|