4 月之前 · 842e332caa
--- a/src/train_model_grpo.py
+++ b/src/train_model_grpo.py
@@ -180,15 +180,15 @@ class ModelTrainer:
 
				             weight_decay = 0.1,
			
 
				             warmup_ratio = 0.1,
			
 
				             lr_scheduler_type = "cosine",
			
 
				-            optim = "adamw_8bit",
			
 
				+            optim ="adamw_8bit", # "adamw_8bit" if device == "cuda" else "adamw_torch",  # CPU 使用 adamw_torch
			
 
				             logging_steps = 1,
			
 
				             bf16 = is_bfloat16_supported(),
			
 
				             fp16 = not is_bfloat16_supported(),
			
 
				             per_device_train_batch_size = 1,
			
 
				             gradient_accumulation_steps = 1, # Increase to 4 for smoother training
			
 
				-            num_generations = 4, # 每次生成 4 个输出
			
 
				-            max_prompt_length = 256, # 输入提示的最大长度
			
 
				-            max_completion_length = 200, # 生成内容的最大长度
			
 
				+            num_generations = 128, # 256 # 每次生成 4 个输出
			
 
				+            max_prompt_length = 128, # 256 # 输入提示的最大长度
			
 
				+            max_completion_length = 128,# 200 # 生成内容的最大长度
			
 
				             num_train_epochs = 1, # Set to 1 for a full training run
			
 
				             max_steps = 10,  # 250
			
 
				             save_steps = 10, # 250
			
@@ -227,7 +227,7 @@ if __name__ == "__main__":
 
				     # 配置参数
			
 
				     model_name = os.path.join('..', 'models', 'pretrained', 'DeepSeek-R1-Distill-Qwen-1.5B')
			
 
				     # model_name: 预训练模型的路径
			
 
				-    max_seq_length = 2048  # 单次会话（single session） 的最大 token 长度，一个token大约3-4 字节（Byte）
			
 
				+    max_seq_length = 512  # 单次会话（single session） 的最大 token 长度，一个token大约3-4 字节（Byte）
			
 
				     dtype = torch.float16  # 数据类型
			
 
				     load_in_4bit = True  # 是否以4位精度加载模型
			
 
				     lora_rank=16