生成测试数据.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. from datetime import datetime, timedelta
  2. import numpy as np
  3. import pandas as pd
  4. def main():
  5. # 设置参数
  6. num_rows = 600 # 示例数据行数
  7. num_cols = 14118
  8. # 创建时间戳列(第0列)
  9. start_date = datetime(2023, 1, 1, 0, 10, 0)
  10. timestamps = [str(start_date + timedelta(seconds=i)) for i in range(num_rows)]
  11. # 创建浮点型数据列(第1-10229列)
  12. float_data = np.random.uniform(low=0.0, high=100.0, size=(num_rows, 10229))
  13. # 创建整型数据列(前部分:10230-14117中的前一部分)
  14. # 假设前2000个整型列范围[100001,700001]
  15. int_data_part1 = np.random.randint(low=100001, high=700002, size=(num_rows, 2000))
  16. # 剩下的整型列范围[-5,1000]
  17. remaining_int_cols = num_cols - 10230 - 2000
  18. int_data_part2 = np.random.randint(low=-5, high=1001, size=(num_rows, remaining_int_cols))
  19. # 合并所有数据
  20. all_data = np.hstack([
  21. np.array(timestamps).reshape(-1, 1), # 时间戳列
  22. float_data, # 浮点型数据
  23. int_data_part1, # 第一段整型数据
  24. int_data_part2 # 第二段整型数据
  25. ])
  26. # 创建列名
  27. col_names = ['timestamp']
  28. col_names += [f'float_{i}' for i in range(1, 10230)]
  29. col_names += [f'int_part1_{i}' for i in range(10230, 10230 + 2000)]
  30. col_names += [f'int_part2_{i}' for i in range(10230 + 2000, 14118)]
  31. # 创建DataFrame
  32. df = pd.DataFrame(all_data, columns=col_names)
  33. # 设置正确的数据类型
  34. df['timestamp'] = pd.to_datetime(df['timestamp'])
  35. for col in df.columns[1:10230]:
  36. df[col] = df[col].astype(float)
  37. for col in df.columns[10230:]:
  38. df[col] = df[col].astype(int)
  39. print(df.columns)
  40. print(df.shape)
  41. print(df.info())
  42. df.to_csv('../conf/test.csv', header=None, index=False, encoding='utf8')
  43. if __name__ == '__main__':
  44. import time
  45. begin = time.time()
  46. main()
  47. print(time.time() - begin)