cp_online_data_to_other.py 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. import datetime
  2. import multiprocessing
  3. import shutil
  4. from os import *
  5. not_move_dir = ["乌梅山风电场-江西-大唐",
  6. "诺木洪风电场-甘肃-华电",
  7. "平陆风电场-山西-中广核",
  8. "泗洪协合风电场-安徽-深能南控",
  9. "诺木洪风电场-青海-华电",
  10. "长清风电场-山东-国电"
  11. ]
  12. read_dir = r"/data/download/collection_data"
  13. # read_dir = r'Z:\collection_data'
  14. save_base_dir = r"/data/download/datang_shangxian"
  15. def __build_directory_dict(directory_dict, path, filter_types=None):
  16. # 遍历目录下的所有项
  17. for item in listdir(path):
  18. if item not in not_move_dir:
  19. item_path = path.join(path, item)
  20. if path.isdir(item_path):
  21. __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
  22. elif path.isfile(item_path):
  23. if path not in directory_dict:
  24. directory_dict[path] = []
  25. if filter_types is None or len(filter_types) == 0:
  26. directory_dict[path].append(item_path)
  27. elif str(item_path).split(".")[-1] in filter_types:
  28. if str(item_path).count("~$") == 0:
  29. directory_dict[path].append(item_path)
  30. # 读取路径下所有的excel文件
  31. def read_excel_files(read_path):
  32. if path.isfile(read_path):
  33. return [read_path]
  34. directory_dict = {}
  35. __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
  36. return [path for paths in directory_dict.values() for path in paths if path]
  37. # 读取路径下所有的文件
  38. def read_files(read_path):
  39. if path.isfile(read_path):
  40. return [read_path]
  41. directory_dict = {}
  42. __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar'])
  43. return [path for paths in directory_dict.values() for path in paths if path]
  44. # 创建路径
  45. def create_file_path(path, is_file_path=False):
  46. """
  47. 创建路径
  48. :param path:创建文件夹的路径
  49. :param is_file_path: 传入的path是否包含具体的文件名
  50. """
  51. if is_file_path:
  52. path = path.dirname(path)
  53. if not path.exists(path):
  54. makedirs(path, exist_ok=True)
  55. def copy_to_new(from_path):
  56. to_path = from_path.replace(read_dir, save_base_dir)
  57. is_file = False
  58. if to_path.count('.') > 0:
  59. is_file = True
  60. create_file_path(to_path, is_file_path=is_file)
  61. shutil.copy(from_path, to_path)
  62. print("开始:", datetime.datetime.now())
  63. begin = datetime.datetime.now()
  64. read_all_files = [i for i in read_files(read_dir) if i.find("收资数据") > -1]
  65. print(len(read_all_files))
  66. print("统计耗时:", datetime.datetime.now() - begin)
  67. cp_begin = datetime.datetime.now()
  68. with multiprocessing.Pool(40) as pool:
  69. pool.starmap(copy_to_new, [(path,) for path in read_all_files])
  70. print(len(read_all_files), "耗时:", datetime.datetime.now() - cp_begin, "总耗时:", datetime.datetime.now() - begin)
  71. print("结束:", datetime.datetime.now())