zhzn
/
energy-data-trans


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
							import os

import chardet
import pandas as pd


# 获取文件编码
def detect_file_encoding(filename):
    # 读取文件的前1000个字节（足够用于大多数编码检测）
    with open(filename, 'rb') as f:
        rawdata = f.read(1000)
    result = chardet.detect(rawdata)
    encoding = result['encoding']

    if encoding is None:
        encoding = 'gb18030'

    if encoding.lower() in ['utf-8', 'ascii', 'utf8', 'utf-8-sig']:
        return 'utf-8'

    return 'gb18030'


def read_file_to_df(file_path, nrows=None):
    df = pd.DataFrame()
    try:
        if str(file_path).lower().endswith("csv"):
            encoding = detect_file_encoding(file_path)
            df = pd.read_csv(file_path, encoding=encoding, on_bad_lines='warn', nrows=nrows)
        else:
            xls = pd.ExcelFile(file_path)
            sheet_names = xls.sheet_names
            for sheet_name in sheet_names:
                now_df = pd.read_excel(xls, sheet_name=sheet_name, nrows=nrows)
                now_df['sheet_name'] = sheet_name
                df = pd.concat([df, now_df])
            xls.close()
    except Exception as e:
        message = '文件:' + os.path.basename(file_path) + ',' + str(e)
        raise ValueError(message)

    return df


if __name__ == '__main__':
    df = read_file_to_df(r"D:\data\11-12月.xls")
    print(df)