8 ay önce · 0da8d87171
--- a/conf/conf_train.yaml
+++ b/conf/conf_train.yaml
@@ -2,7 +2,7 @@
 
				 
			
 
				 # 模型配置
			
 
				 model_name: "../models/pretrained/DeepSeek-R1-Distill-Qwen-1.5B"
			
 
				-max_seq_length: 8192  # 2048 单次会话的最大 token 长度
			
 
				+max_seq_length: 12288  # 2048 单次会话的最大 token 长度
			
 
				 dtype: "float16"  # 数据类型，可选 "float16" 或 "bfloat16"
			
 
				 load_in_4bit: True  # 是否以4位精度加载模型
			
 
				 fast_inference: False # Enable vLLM fast inference
			
@@ -23,7 +23,7 @@ per_device_train_batch_size: 2  # 1 每个设备的训练批次（batch）大小
 
				 gradient_accumulation_steps: 4  # 1 梯度累积步数 ,用于在较小的batch size下模拟较大的batch
			
 
				 num_generations: 8  # 8 表示每次训练时生成的候选输出数量
			
 
				 max_prompt_length: 256  # 256 模型输入的最大长度
			
 
				-max_completion_length: 384  # 200 模型输入（生成）的最大长度
			
 
				+max_completion_length: 2048  # 200 模型输入（生成）的最大长度
			
 
				 num_train_epochs: 3  # 训练轮数
			
 
				 max_steps: 256  # 250 训练的最大步数
			
 
				 save_steps: 256  # 250 保存模型的步数（多少步保存一次模型）