import os import pandas as pd def trans_time_granularity(read_dir: str, save_dir: str, time_str: str, time_granularity: str, group_by: list): for root, dirs, files in os.walk(read_dir): for file in files: file_path = os.path.join(root, file) df = pd.read_csv(file_path) # df = df.drop(index=0) df[time_str] = pd.to_datetime(df[time_str], errors='coerce') df[time_str] = df[time_str].dt.ceil(time_granularity) groupby_df = df.groupby(group_by).mean(numeric_only=True).reset_index() save_file = file_path.replace(read_dir, save_dir) if not os.path.exists(os.path.dirname(save_file)): os.makedirs(os.path.dirname(save_file)) groupby_df.to_csv(save_file, index=False, encoding='utf-8') if __name__ == '__main__': read_dir = r'D:\data\tmp_data\龙源\minute' save_dir = r'D:\data\tmp_data\龙源\minute12' trans_time_granularity(read_dir, save_dir, 'time_stamp', '20min', ['time_stamp'])