Browse Source

修改 grpo v1.1.py源代码- correctness_reward_func函数

zhouyang.xie 2 months ago
parent
commit
5a1c263040
1 changed files with 1 additions and 0 deletions
  1. 1 0
      src/train_model_grpo_v1.1.py

+ 1 - 0
src/train_model_grpo_v1.1.py

@@ -361,6 +361,7 @@ class ModelTrainer:
         :param answer: 正确答案
         :return: 补全内容正确的得分列表
         """
+        print("completions : \n ",completions)
         responses = [completion[0]['content'] for completion in completions]
         q = prompts[0][-1]['content']
         extracted_responses = [ModelTrainer.extract_xml_answer(r) for r in responses]