|
@@ -3,9 +3,9 @@
|
|
|
# @Author : 魏志亮
|
|
|
import ast
|
|
|
import datetime
|
|
|
+import os
|
|
|
import shutil
|
|
|
import warnings
|
|
|
-from os import *
|
|
|
|
|
|
import chardet
|
|
|
import pandas as pd
|
|
@@ -28,7 +28,7 @@ def detect_file_encoding(filename):
|
|
|
if encoding is None:
|
|
|
encoding = 'gb18030'
|
|
|
|
|
|
- if encoding.lower() in ['utf-8', 'ascii', 'utf8']:
|
|
|
+ if encoding.lower() in ['utf-8', 'ascii', 'utf8', 'utf-8-sig']:
|
|
|
return 'utf-8'
|
|
|
|
|
|
return 'gb18030'
|
|
@@ -89,7 +89,7 @@ def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None, no
|
|
|
find_cols = list()
|
|
|
if trans_cols:
|
|
|
header = find_read_header(file_path, trans_cols, resolve_col_prefix)
|
|
|
- trans_print(path.basename(file_path), "读取第", header, "行")
|
|
|
+ trans_print(os.path.basename(file_path), "读取第", header, "行")
|
|
|
if header is None:
|
|
|
if not_find_header == 'raise':
|
|
|
message = '未匹配到开始行,请检查并重新指定'
|
|
@@ -137,7 +137,7 @@ def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None, no
|
|
|
trans_print('文件读取成功:', file_path, '数据数量:', df.shape, '耗时:', datetime.datetime.now() - begin)
|
|
|
except Exception as e:
|
|
|
trans_print('读取文件出错', file_path, str(e))
|
|
|
- message = '文件:' + path.basename(file_path) + ',' + str(e)
|
|
|
+ message = '文件:' + os.path.basename(file_path) + ',' + str(e)
|
|
|
raise ValueError(message)
|
|
|
|
|
|
return df
|
|
@@ -145,11 +145,11 @@ def read_file_to_df(file_path, read_cols=list(), trans_cols=None, nrows=None, no
|
|
|
|
|
|
def __build_directory_dict(directory_dict, path, filter_types=None):
|
|
|
# 遍历目录下的所有项
|
|
|
- for item in listdir(path):
|
|
|
- item_path = path.join(path, item)
|
|
|
- if path.isdir(item_path):
|
|
|
+ for item in os.listdir(path):
|
|
|
+ item_path = os.path.join(path, item)
|
|
|
+ if os.path.isdir(item_path):
|
|
|
__build_directory_dict(directory_dict, item_path, filter_types=filter_types)
|
|
|
- elif path.isfile(item_path):
|
|
|
+ elif os.path.isfile(item_path):
|
|
|
if path not in directory_dict:
|
|
|
directory_dict[path] = []
|
|
|
|
|
@@ -164,7 +164,7 @@ def __build_directory_dict(directory_dict, path, filter_types=None):
|
|
|
def read_excel_files(read_path, filter_types=None):
|
|
|
if filter_types is None:
|
|
|
filter_types = ['xls', 'xlsx', 'csv', 'gz']
|
|
|
- if path.isfile(read_path):
|
|
|
+ if os.path.isfile(read_path):
|
|
|
return [read_path]
|
|
|
|
|
|
directory_dict = {}
|
|
@@ -177,12 +177,12 @@ def read_excel_files(read_path, filter_types=None):
|
|
|
def read_files(read_path, filter_types=None):
|
|
|
if filter_types is None:
|
|
|
filter_types = ['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar']
|
|
|
- if path.isfile(read_path):
|
|
|
+ if os.path.isfile(read_path):
|
|
|
return [read_path]
|
|
|
directory_dict = {}
|
|
|
__build_directory_dict(directory_dict, read_path, filter_types=filter_types)
|
|
|
|
|
|
- return [path for paths in directory_dict.values() for path in paths if path]
|
|
|
+ return [path1 for paths in directory_dict.values() for path1 in paths if path1]
|
|
|
|
|
|
|
|
|
def copy_to_new(from_path, to_path):
|
|
@@ -196,17 +196,17 @@ def copy_to_new(from_path, to_path):
|
|
|
|
|
|
|
|
|
# 创建路径
|
|
|
-def create_file_path(path, is_file_path=False):
|
|
|
+def create_file_path(read_path, is_file_path=False):
|
|
|
"""
|
|
|
创建路径
|
|
|
- :param path:创建文件夹的路径
|
|
|
+ :param read_path:创建文件夹的路径
|
|
|
:param is_file_path: 传入的path是否包含具体的文件名
|
|
|
"""
|
|
|
if is_file_path:
|
|
|
- path = path.dirname(path)
|
|
|
+ read_path = os.path.dirname(read_path)
|
|
|
|
|
|
- if not path.exists(path):
|
|
|
- makedirs(path, exist_ok=True)
|
|
|
+ if not os.path.exists(read_path):
|
|
|
+ os.makedirs(read_path, exist_ok=True)
|
|
|
|
|
|
|
|
|
def valid_eval(eval_str):
|