Bladeren bron

遵循面向对象思想重构train_model_grpo.py

zhouyang.xie 4 maanden geleden
bovenliggende
commit
3634ab78f0
1 gewijzigde bestanden met toevoegingen van 2 en 2 verwijderingen
  1. 2 2
      src/train_model_grpo_v1.py

+ 2 - 2
src/train_model_grpo_v1.py

@@ -90,10 +90,10 @@ class ModelTrainer:
         )
 
         torch.cuda.empty_cache()
-        print("self.config.learning_rate=",self.config.learning_rate)
+        print("self.config.learning_rate=",float(self.config.learning_rate))
         training_args = GRPOConfig(
             use_vllm=self.config.use_vllm,
-            learning_rate=self.config.learning_rate,
+            learning_rate=float(self.config.learning_rate),
             adam_beta1=self.config.adam_beta1,
             adam_beta2=self.config.adam_beta2,
             weight_decay=self.config.weight_decay,