Browse Source

换用github jwjohns/unsloth-GRPO-qwen2.5 验证GRPO训练模型

zhouyang.xie 2 months ago
parent
commit
98d070b8c5
2 changed files with 2 additions and 2 deletions
  1. 1 1
      conf/conf_train.yaml
  2. 1 1
      src/train_model_grpo_v1.2.py

+ 1 - 1
conf/conf_train.yaml

@@ -11,7 +11,7 @@ gpu_memory_utilization: 0.85 # GPU VRAM 占用率
 
 # 训练配置
 use_vllm: False # use vLLM for fast inference!
-learning_rate: 1e-4  # 5e-6 学习率 1e-4 (0.0001) to 5e-5 (0.00005).
+learning_rate: 5e-6  # 5e-6 学习率 1e-4 (0.0001) to 5e-5 (0.00005).
 adam_beta1: 0.9  # Adam 优化器的 beta1 参数
 adam_beta2: 0.99  # Adam 优化器的 beta2 参数
 weight_decay: 0.1  # 权重衰减,用于防止过拟合,设置为0.1

+ 1 - 1
src/train_model_grpo_v1.2.py

@@ -201,7 +201,7 @@ class GRPOTrainerWrapper:
         """
         return GRPOConfig(
             use_vllm=self.config.use_vllm,
-            learning_rate=self.config.learning_rate,
+            learning_rate=float(self.config.learning_rate) ,
             adam_beta1=self.config.adam_beta1,
             adam_beta2=self.config.adam_beta2,
             weight_decay=self.config.weight_decay,