|
@@ -201,7 +201,7 @@ class GRPOTrainerWrapper:
|
|
|
"""
|
|
|
return GRPOConfig(
|
|
|
use_vllm=self.config.use_vllm,
|
|
|
- learning_rate=self.config.learning_rate,
|
|
|
+ learning_rate=float(self.config.learning_rate) ,
|
|
|
adam_beta1=self.config.adam_beta1,
|
|
|
adam_beta2=self.config.adam_beta2,
|
|
|
weight_decay=self.config.weight_decay,
|