123456789101112131415161718192021222324252627 |
- import os
- import pandas as pd
- def trans_time_granularity(read_dir: str, save_dir: str, time_str: str, time_granularity: str, group_by: list):
- for root, dirs, files in os.walk(read_dir):
- for file in files:
- file_path = os.path.join(root, file)
- df = pd.read_csv(file_path)
- # df = df.drop(index=0)
- df[time_str] = pd.to_datetime(df[time_str], errors='coerce')
- df[time_str] = df[time_str].dt.ceil(time_granularity)
- groupby_df = df.groupby(group_by).mean(numeric_only=True).reset_index()
- save_file = file_path.replace(read_dir, save_dir)
- if not os.path.exists(os.path.dirname(save_file)):
- os.makedirs(os.path.dirname(save_file))
- groupby_df.to_csv(save_file, index=False, encoding='utf-8')
- if __name__ == '__main__':
- read_dir = r'D:\data\tmp_data\龙源\minute'
- save_dir = r'D:\data\tmp_data\龙源\minute12'
- trans_time_granularity(read_dir, save_dir, 'time_stamp', '20min', ['time_stamp'])
|