Ver código fonte

修改注释

zhouyang.xie 3 meses atrás
pai
commit
57918b817c
1 arquivos alterados com 2 adições e 2 exclusões
  1. 2 2
      src/train_model_grpo.py

+ 2 - 2
src/train_model_grpo.py

@@ -166,8 +166,8 @@ class ModelTrainer:
             max_prompt_length = 256,
             max_completion_length = 200,
             # num_train_epochs = 1, # Set to 1 for a full training run
-            max_steps = 20,
-            save_steps = 20,
+            max_steps = 20,  # 250
+            save_steps = 20, # 250
             max_grad_norm = 0.1,
             report_to = "none", # Can use Weights & Biases
             output_dir = os.path.join('..', 'models',"outputs"),