import datetime import multiprocessing import os import shutil not_move_dir = ["乌梅山风电场-江西-大唐", "诺木洪风电场-甘肃-华电", "平陆风电场-山西-中广核", "泗洪协合风电场-安徽-深能南控", "诺木洪风电场-青海-华电", "长清风电场-山东-国电" ] read_dir = r"/data/download/collection_data" # read_dir = r'Z:\collection_data' save_base_dir = r"/data/download/datang_shangxian" def __build_directory_dict(directory_dict, path, filter_types=None): # 遍历目录下的所有项 for item in os.listdir(path): if item not in not_move_dir: item_path = os.path.join(path, item) if os.path.isdir(item_path): __build_directory_dict(directory_dict, item_path, filter_types=filter_types) elif os.path.isfile(item_path): if path not in directory_dict: directory_dict[path] = [] if filter_types is None or len(filter_types) == 0: directory_dict[path].append(item_path) elif str(item_path).split(".")[-1] in filter_types: if str(item_path).count("~$") == 0: directory_dict[path].append(item_path) # 读取路径下所有的excel文件 def read_excel_files(read_path): if os.path.isfile(read_path): return [read_path] directory_dict = {} __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz']) return [path for paths in directory_dict.values() for path in paths if path] # 读取路径下所有的文件 def read_files(read_path): if os.path.isfile(read_path): return [read_path] directory_dict = {} __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar']) return [path for paths in directory_dict.values() for path in paths if path] # 创建路径 def create_file_path(path, is_file_path=False): """ 创建路径 :param path:创建文件夹的路径 :param is_file_path: 传入的path是否包含具体的文件名 """ if is_file_path: path = os.path.dirname(path) if not os.path.exists(path): os.makedirs(path, exist_ok=True) def copy_to_new(from_path): to_path = from_path.replace(read_dir, save_base_dir) is_file = False if to_path.count('.') > 0: is_file = True create_file_path(to_path, is_file_path=is_file) shutil.copy(from_path, to_path) print("开始:", datetime.datetime.now()) begin = datetime.datetime.now() read_all_files = [i for i in read_files(read_dir) if i.find("收资数据") > -1] print(len(read_all_files)) print("统计耗时:", datetime.datetime.now() - begin) cp_begin = datetime.datetime.now() with multiprocessing.Pool(40) as pool: pool.starmap(copy_to_new, [(path,) for path in read_all_files]) print(len(read_all_files), "耗时:", datetime.datetime.now() - cp_begin, "总耗时:", datetime.datetime.now() - begin) print("结束:", datetime.datetime.now())