zhzn
/
CGN_taiyuan_104


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
							from datetime import datetime, timedelta

import numpy as np
import pandas as pd


def main():
    # 设置参数
    num_rows = 600  # 示例数据行数
    num_cols = 14118

    # 创建时间戳列（第0列）
    start_date = datetime(2023, 1, 1, 0, 10, 0)
    timestamps = [str(start_date + timedelta(seconds=i)) for i in range(num_rows)]

    # 创建浮点型数据列（第1-10229列）
    float_data = np.random.uniform(low=0.0, high=100.0, size=(num_rows, 10229))

    # 创建整型数据列（前部分：10230-14117中的前一部分）
    # 假设前2000个整型列范围[100001,700001]
    int_data_part1 = np.random.randint(low=100001, high=700002, size=(num_rows, 2000))

    # 剩下的整型列范围[-5,1000]
    remaining_int_cols = num_cols - 10230 - 2000
    int_data_part2 = np.random.randint(low=-5, high=1001, size=(num_rows, remaining_int_cols))

    # 合并所有数据
    all_data = np.hstack([
        np.array(timestamps).reshape(-1, 1),  # 时间戳列
        float_data,  # 浮点型数据
        int_data_part1,  # 第一段整型数据
        int_data_part2  # 第二段整型数据
    ])

    # 创建列名
    col_names = ['timestamp']
    col_names += [f'float_{i}' for i in range(1, 10230)]
    col_names += [f'int_part1_{i}' for i in range(10230, 10230 + 2000)]
    col_names += [f'int_part2_{i}' for i in range(10230 + 2000, 14118)]

    # 创建DataFrame
    df = pd.DataFrame(all_data, columns=col_names)

    # 设置正确的数据类型
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    for col in df.columns[1:10230]:
        df[col] = df[col].astype(float)
    for col in df.columns[10230:]:
        df[col] = df[col].astype(int)

    print(df.columns)
    print(df.shape)
    print(df.info())

    df.to_csv('../conf/test.csv', header=None, index=False, encoding='utf8')


if __name__ == '__main__':
    import time

    begin = time.time()
    main()
    print(time.time() - begin)