from datetime import datetime, timedelta import numpy as np import pandas as pd def main(): # 设置参数 num_rows = 600 # 示例数据行数 num_cols = 14118 # 创建时间戳列(第0列) start_date = datetime(2023, 1, 1, 0, 10, 0) timestamps = [str(start_date + timedelta(seconds=i)) for i in range(num_rows)] # 创建浮点型数据列(第1-10229列) float_data = np.random.uniform(low=0.0, high=100.0, size=(num_rows, 10229)) # 创建整型数据列(前部分:10230-14117中的前一部分) # 假设前2000个整型列范围[100001,700001] int_data_part1 = np.random.randint(low=100001, high=700002, size=(num_rows, 2000)) # 剩下的整型列范围[-5,1000] remaining_int_cols = num_cols - 10230 - 2000 int_data_part2 = np.random.randint(low=-5, high=1001, size=(num_rows, remaining_int_cols)) # 合并所有数据 all_data = np.hstack([ np.array(timestamps).reshape(-1, 1), # 时间戳列 float_data, # 浮点型数据 int_data_part1, # 第一段整型数据 int_data_part2 # 第二段整型数据 ]) # 创建列名 col_names = ['timestamp'] col_names += [f'float_{i}' for i in range(1, 10230)] col_names += [f'int_part1_{i}' for i in range(10230, 10230 + 2000)] col_names += [f'int_part2_{i}' for i in range(10230 + 2000, 14118)] # 创建DataFrame df = pd.DataFrame(all_data, columns=col_names) # 设置正确的数据类型 df['timestamp'] = pd.to_datetime(df['timestamp']) for col in df.columns[1:10230]: df[col] = df[col].astype(float) for col in df.columns[10230:]: df[col] = df[col].astype(int) print(df.columns) print(df.shape) print(df.info()) df.to_csv('../conf/test.csv', header=None, index=False, encoding='utf8') if __name__ == '__main__': import time begin = time.time() main() print(time.time() - begin)