import os import chardet import pandas as pd # 获取文件编码 def detect_file_encoding(filename): # 读取文件的前1000个字节(足够用于大多数编码检测) with open(filename, 'rb') as f: rawdata = f.read(1000) result = chardet.detect(rawdata) encoding = result['encoding'] if encoding is None: encoding = 'gb18030' if encoding.lower() in ['utf-8', 'ascii', 'utf8', 'utf-8-sig']: return 'utf-8' return 'gb18030' def read_file_to_df(file_path, nrows=None): df = pd.DataFrame() try: if str(file_path).lower().endswith("csv"): encoding = detect_file_encoding(file_path) df = pd.read_csv(file_path, encoding=encoding, on_bad_lines='warn', nrows=nrows) else: xls = pd.ExcelFile(file_path) sheet_names = xls.sheet_names for sheet_name in sheet_names: now_df = pd.read_excel(xls, sheet_name=sheet_name, nrows=nrows) now_df['sheet_name'] = sheet_name df = pd.concat([df, now_df]) xls.close() except Exception as e: message = '文件:' + os.path.basename(file_path) + ',' + str(e) raise ValueError(message) return df if __name__ == '__main__': df = read_file_to_df(r"D:\data\11-12月.xls") print(df)