12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- from multiprocessing import Pool
- import sys, os
- path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
- print(path)
- sys.path.insert(0, path)
- print(sys.path)
- from utils.file.trans_methods import *
- from utils.systeminfo.sysinfo import use_files_get_max_cpu_count
- def read_and_save_file(filename):
- try:
- basename = os.path.basename(filename)
- wind_number = basename.split("_")[0]
- df = read_file_to_df(filename, header=1)
- df['风机号'] = wind_number
- df['描述'] = pd.to_datetime(df['描述'], format='%d-%m-%Y %H:%M:%S')
- df.set_index(keys=['描述', '风机号'], inplace=True)
- return wind_number, df
- except Exception as e:
- print(basename, 'error')
- raise e
- if __name__ == '__main__':
- read_path = r'/data/download/collection_data/1进行中/枫香风电场-贵州-大唐/收资数据/枫香风电场收资表/1.10分钟SCADA数据'
- save_path = r'/data/download/collection_data/1进行中/枫香风电场-贵州-大唐/清理数据/枫香风电场收资表/1.10分钟SCADA数据'
- # read_path = r'D:\trans_data\枫香\收资数据\min'
- # save_path = r'D:\trans_data\枫香\清理数据\min'
- create_file_path(save_path, False)
- all_fils = read_excel_files(read_path)
- process_count = use_files_get_max_cpu_count(all_fils)
- with Pool(process_count) as pool:
- results = pool.starmap(read_and_save_file, [(i,) for i in all_fils])
- df_dict = dict()
- for result in results:
- wind_number, df = result
- cols = list(df.columns)
- cols.sort()
- cols_str = '-'.join(cols)
- if wind_number in df_dict.keys():
- if cols_str in df_dict[wind_number].keys():
- df_dict[wind_number][cols_str] = pd.concat([df_dict[wind_number][cols_str], df], axis=0)
- else:
- df_dict[wind_number][cols_str] = df
- else:
- df_dict[wind_number] = {cols_str: df}
- for wind_number, cols_dict in df_dict.items():
- df = pd.concat(cols_dict.values(), axis=1)
- df.sort_index(inplace=True)
- df.reset_index(inplace=True)
- df.to_csv(os.path.join(save_path, f"{wind_number}.csv"), encoding="utf-8", index=False)
|