tmp_read_file_and_save_db.py 3.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. import datetime
  2. import os
  3. import warnings
  4. warnings.filterwarnings("ignore")
  5. def __build_directory_dict(directory_dict, path, filter_types=None):
  6. # 遍历目录下的所有项
  7. for item in os.listdir(path):
  8. item_path = os.path.join(path, item)
  9. if os.path.isdir(item_path):
  10. __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
  11. elif os.path.isfile(item_path):
  12. if path not in directory_dict:
  13. directory_dict[path] = []
  14. if filter_types is None or len(filter_types) == 0:
  15. directory_dict[path].append(item_path)
  16. elif str(item_path).split(".")[-1] in filter_types:
  17. if str(item_path).count("~$") == 0:
  18. directory_dict[path].append(item_path)
  19. # 读取路径下所有的excel文件
  20. def read_excel_files(read_path, filter_types=None):
  21. if not os.path.exists(read_path):
  22. return []
  23. if filter_types is None:
  24. filter_types = ['xls', 'xlsx', 'csv', 'gz']
  25. if os.path.isfile(read_path):
  26. return [read_path]
  27. directory_dict = {}
  28. __build_directory_dict(directory_dict, read_path, filter_types=filter_types)
  29. return [path for paths in directory_dict.values() for path in paths if path]
  30. if __name__ == '__main__':
  31. read_dir = r'C:\Users\wzl\Downloads\Compressed\491_盂县验证'
  32. begin = datetime.datetime.now()
  33. all_files = read_excel_files(read_dir)
  34. for index, file in enumerate(all_files):
  35. now_begin = datetime.datetime.now()
  36. # 文件/home/trans/下载/盂县_data/491_盂县验证/202504/18.csv,总个数258,当前第202个写入数据库耗时:0:00:12.924708
  37. if index < 202:
  38. continue
  39. print(index, file)
  40. # df = pd.read_csv(file)
  41. # columns_str = 'wind_turbine_number,wind_turbine_name,time_stamp,active_power,rotor_speed,generator_speed,wind_velocity,pitch_angle_blade_1,pitch_angle_blade_2,pitch_angle_blade_3,cabin_position,true_wind_direction,yaw_error1,set_value_of_active_power,gearbox_oil_temperature,generatordrive_end_bearing_temperature,generatornon_drive_end_bearing_temperature,cabin_temperature,twisted_cable_angle,front_back_vibration_of_the_cabin,side_to_side_vibration_of_the_cabin,actual_torque,given_torque,clockwise_yaw_count,counterclockwise_yaw_count,unusable,power_curve_available,required_gearbox_speed,inverter_speed_master_control,outside_cabin_temperature,main_bearing_temperature,gearbox_high_speed_shaft_bearing_temperature,gearboxmedium_speed_shaftbearing_temperature,gearbox_low_speed_shaft_bearing_temperature,generator_winding1_temperature,generator_winding2_temperature,generator_winding3_temperature,wind_turbine_status,wind_turbine_status2,turbulence_intensity,lab,year,month,day,year_month'
  42. # cols = columns_str.split(',')
  43. #
  44. # for col in cols:
  45. # if col not in df.columns:
  46. # df[col] = np.nan
  47. #
  48. # trans_service.load_data_local('WOF35800082_second', df)
  49. # print(
  50. # f'文件{file},总个数{len(all_files)},当前第{index + 1}个写入数据库耗时:{datetime.datetime.now() - now_begin}')
  51. print(f'总耗时:{datetime.datetime.now() - begin}')