12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- import datetime
- import multiprocessing
- import shutil
- from os import *
- not_move_dir = ["乌梅山风电场-江西-大唐",
- "诺木洪风电场-甘肃-华电",
- "平陆风电场-山西-中广核",
- "泗洪协合风电场-安徽-深能南控",
- "诺木洪风电场-青海-华电",
- "长清风电场-山东-国电"
- ]
- read_dir = r"/data/download/collection_data"
- # read_dir = r'Z:\collection_data'
- save_base_dir = r"/data/download/datang_shangxian"
- def __build_directory_dict(directory_dict, path, filter_types=None):
- # 遍历目录下的所有项
- for item in listdir(path):
- if item not in not_move_dir:
- item_path = path.join(path, item)
- if path.isdir(item_path):
- __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
- elif path.isfile(item_path):
- if path not in directory_dict:
- directory_dict[path] = []
- if filter_types is None or len(filter_types) == 0:
- directory_dict[path].append(item_path)
- elif str(item_path).split(".")[-1] in filter_types:
- if str(item_path).count("~$") == 0:
- directory_dict[path].append(item_path)
- # 读取路径下所有的excel文件
- def read_excel_files(read_path):
- if path.isfile(read_path):
- return [read_path]
- directory_dict = {}
- __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
- return [path for paths in directory_dict.values() for path in paths if path]
- # 读取路径下所有的文件
- def read_files(read_path):
- if path.isfile(read_path):
- return [read_path]
- directory_dict = {}
- __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar'])
- return [path for paths in directory_dict.values() for path in paths if path]
- # 创建路径
- def create_file_path(path, is_file_path=False):
- """
- 创建路径
- :param path:创建文件夹的路径
- :param is_file_path: 传入的path是否包含具体的文件名
- """
- if is_file_path:
- path = path.dirname(path)
- if not path.exists(path):
- makedirs(path, exist_ok=True)
- def copy_to_new(from_path):
- to_path = from_path.replace(read_dir, save_base_dir)
- is_file = False
- if to_path.count('.') > 0:
- is_file = True
- create_file_path(to_path, is_file_path=is_file)
- shutil.copy(from_path, to_path)
- print("开始:", datetime.datetime.now())
- begin = datetime.datetime.now()
- read_all_files = [i for i in read_files(read_dir) if i.find("收资数据") > -1]
- print(len(read_all_files))
- print("统计耗时:", datetime.datetime.now() - begin)
- cp_begin = datetime.datetime.now()
- with multiprocessing.Pool(40) as pool:
- pool.starmap(copy_to_new, [(path,) for path in read_all_files])
- print(len(read_all_files), "耗时:", datetime.datetime.now() - cp_begin, "总耗时:", datetime.datetime.now() - begin)
- print("结束:", datetime.datetime.now())
|