import datetime import os import warnings warnings.filterwarnings("ignore") def __build_directory_dict(directory_dict, path, filter_types=None): # 遍历目录下的所有项 for item in os.listdir(path): item_path = os.path.join(path, item) if os.path.isdir(item_path): __build_directory_dict(directory_dict, item_path, filter_types=filter_types) elif os.path.isfile(item_path): if path not in directory_dict: directory_dict[path] = [] if filter_types is None or len(filter_types) == 0: directory_dict[path].append(item_path) elif str(item_path).split(".")[-1] in filter_types: if str(item_path).count("~$") == 0: directory_dict[path].append(item_path) # 读取路径下所有的excel文件 def read_excel_files(read_path, filter_types=None): if not os.path.exists(read_path): return [] if filter_types is None: filter_types = ['xls', 'xlsx', 'csv', 'gz'] if os.path.isfile(read_path): return [read_path] directory_dict = {} __build_directory_dict(directory_dict, read_path, filter_types=filter_types) return [path for paths in directory_dict.values() for path in paths if path] if __name__ == '__main__': read_dir = r'C:\Users\wzl\Downloads\Compressed\491_盂县验证' begin = datetime.datetime.now() all_files = read_excel_files(read_dir) for index, file in enumerate(all_files): now_begin = datetime.datetime.now() # 文件/home/trans/下载/盂县_data/491_盂县验证/202504/18.csv,总个数258,当前第202个写入数据库耗时:0:00:12.924708 if index < 202: continue print(index, file) # df = pd.read_csv(file) # columns_str = 'wind_turbine_number,wind_turbine_name,time_stamp,active_power,rotor_speed,generator_speed,wind_velocity,pitch_angle_blade_1,pitch_angle_blade_2,pitch_angle_blade_3,cabin_position,true_wind_direction,yaw_error1,set_value_of_active_power,gearbox_oil_temperature,generatordrive_end_bearing_temperature,generatornon_drive_end_bearing_temperature,cabin_temperature,twisted_cable_angle,front_back_vibration_of_the_cabin,side_to_side_vibration_of_the_cabin,actual_torque,given_torque,clockwise_yaw_count,counterclockwise_yaw_count,unusable,power_curve_available,required_gearbox_speed,inverter_speed_master_control,outside_cabin_temperature,main_bearing_temperature,gearbox_high_speed_shaft_bearing_temperature,gearboxmedium_speed_shaftbearing_temperature,gearbox_low_speed_shaft_bearing_temperature,generator_winding1_temperature,generator_winding2_temperature,generator_winding3_temperature,wind_turbine_status,wind_turbine_status2,turbulence_intensity,lab,year,month,day,year_month' # cols = columns_str.split(',') # # for col in cols: # if col not in df.columns: # df[col] = np.nan # # trans_service.load_data_local('WOF35800082_second', df) # print( # f'文件{file},总个数{len(all_files)},当前第{index + 1}个写入数据库耗时:{datetime.datetime.now() - now_begin}') print(f'总耗时:{datetime.datetime.now() - begin}')