|
@@ -90,10 +90,10 @@ class ModelTrainer:
|
|
|
)
|
|
|
|
|
|
torch.cuda.empty_cache()
|
|
|
- print("self.config.learning_rate=",self.config.learning_rate)
|
|
|
+ print("self.config.learning_rate=",float(self.config.learning_rate))
|
|
|
training_args = GRPOConfig(
|
|
|
use_vllm=self.config.use_vllm,
|
|
|
- learning_rate=self.config.learning_rate,
|
|
|
+ learning_rate=float(self.config.learning_rate),
|
|
|
adam_beta1=self.config.adam_beta1,
|
|
|
adam_beta2=self.config.adam_beta2,
|
|
|
weight_decay=self.config.weight_decay,
|