123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- from datetime import datetime, timedelta
- import numpy as np
- import pandas as pd
- def main():
- # 设置参数
- num_rows = 600 # 示例数据行数
- num_cols = 14118
- # 创建时间戳列(第0列)
- start_date = datetime(2023, 1, 1, 0, 10, 0)
- timestamps = [str(start_date + timedelta(seconds=i)) for i in range(num_rows)]
- # 创建浮点型数据列(第1-10229列)
- float_data = np.random.uniform(low=0.0, high=100.0, size=(num_rows, 10229))
- # 创建整型数据列(前部分:10230-14117中的前一部分)
- # 假设前2000个整型列范围[100001,700001]
- int_data_part1 = np.random.randint(low=100001, high=700002, size=(num_rows, 2000))
- # 剩下的整型列范围[-5,1000]
- remaining_int_cols = num_cols - 10230 - 2000
- int_data_part2 = np.random.randint(low=-5, high=1001, size=(num_rows, remaining_int_cols))
- # 合并所有数据
- all_data = np.hstack([
- np.array(timestamps).reshape(-1, 1), # 时间戳列
- float_data, # 浮点型数据
- int_data_part1, # 第一段整型数据
- int_data_part2 # 第二段整型数据
- ])
- # 创建列名
- col_names = ['timestamp']
- col_names += [f'float_{i}' for i in range(1, 10230)]
- col_names += [f'int_part1_{i}' for i in range(10230, 10230 + 2000)]
- col_names += [f'int_part2_{i}' for i in range(10230 + 2000, 14118)]
- # 创建DataFrame
- df = pd.DataFrame(all_data, columns=col_names)
- # 设置正确的数据类型
- df['timestamp'] = pd.to_datetime(df['timestamp'])
- for col in df.columns[1:10230]:
- df[col] = df[col].astype(float)
- for col in df.columns[10230:]:
- df[col] = df[col].astype(int)
- print(df.columns)
- print(df.shape)
- print(df.info())
- df.to_csv('../conf/test.csv', header=None, index=False, encoding='utf8')
- if __name__ == '__main__':
- import time
- begin = time.time()
- main()
- print(time.time() - begin)
|