颗粒度变大.py 1.0 KB

123456789101112131415161718192021222324252627
  1. import os
  2. import pandas as pd
  3. def trans_time_granularity(read_dir: str, save_dir: str, time_str: str, time_granularity: str, group_by: list):
  4. for root, dirs, files in os.walk(read_dir):
  5. for file in files:
  6. file_path = os.path.join(root, file)
  7. df = pd.read_csv(file_path)
  8. # df = df.drop(index=0)
  9. df[time_str] = pd.to_datetime(df[time_str], errors='coerce')
  10. df[time_str] = df[time_str].dt.ceil(time_granularity)
  11. groupby_df = df.groupby(group_by).mean(numeric_only=True).reset_index()
  12. save_file = file_path.replace(read_dir, save_dir)
  13. if not os.path.exists(os.path.dirname(save_file)):
  14. os.makedirs(os.path.dirname(save_file))
  15. groupby_df.to_csv(save_file, index=False, encoding='utf-8')
  16. if __name__ == '__main__':
  17. read_dir = r'D:\data\tmp_data\龙源\minute'
  18. save_dir = r'D:\data\tmp_data\龙源\minute12'
  19. trans_time_granularity(read_dir, save_dir, 'time_stamp', '20min', ['time_stamp'])