zhzn
/
energy-data-trans


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
							from multiprocessing import Pool

import sys, os

path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
print(path)
sys.path.insert(0, path)
print(sys.path)

from utils.file.trans_methods import *
from utils.systeminfo.sysinfo import use_files_get_max_cpu_count


def read_and_save_file(filename):
    try:
        basename = os.path.basename(filename)
        wind_number = basename.split("_")[0]
        df = read_file_to_df(filename, header=1)
        df['风机号'] = wind_number
        df['描述'] = pd.to_datetime(df['描述'], format='%d-%m-%Y %H:%M:%S')
        df.set_index(keys=['描述', '风机号'], inplace=True)
        return wind_number, df
    except Exception as e:
        print(basename, 'error')
        raise e


if __name__ == '__main__':
    read_path = r'/data/download/collection_data/1进行中/枫香风电场-贵州-大唐/收资数据/枫香风电场收资表/1.10分钟SCADA数据'
    save_path = r'/data/download/collection_data/1进行中/枫香风电场-贵州-大唐/清理数据/枫香风电场收资表/1.10分钟SCADA数据'
    # read_path = r'D:\trans_data\枫香\收资数据\min'
    # save_path = r'D:\trans_data\枫香\清理数据\min'
    create_file_path(save_path, False)
    all_fils = read_excel_files(read_path)
    process_count = use_files_get_max_cpu_count(all_fils)

    with Pool(process_count) as pool:
        results = pool.starmap(read_and_save_file, [(i,) for i in all_fils])

    df_dict = dict()
    for result in results:
        wind_number, df = result
        cols = list(df.columns)
        cols.sort()
        cols_str = '-'.join(cols)
        if wind_number in df_dict.keys():
            if cols_str in df_dict[wind_number].keys():
                df_dict[wind_number][cols_str] = pd.concat([df_dict[wind_number][cols_str], df], axis=0)
            else:
                df_dict[wind_number][cols_str] = df
        else:
            df_dict[wind_number] = {cols_str: df}

    for wind_number, cols_dict in df_dict.items():
        df = pd.concat(cols_dict.values(), axis=1)
        df.sort_index(inplace=True)
        df.reset_index(inplace=True)
        df.to_csv(os.path.join(save_path, f"{wind_number}.csv"), encoding="utf-8", index=False)