Prechádzať zdrojové kódy

修改训练代码为train_model_grpo_v1.2.py 以解决训练输出未达预期问题

zhouyang.xie 2 mesiacov pred
rodič
commit
acebf42a2f
1 zmenil súbory, kde vykonal 8 pridanie a 2 odobranie
  1. 8 2
      src/train_model_grpo_v1.2.py

+ 8 - 2
src/train_model_grpo_v1.2.py

@@ -244,14 +244,20 @@ class ModelTrainer:
         :return: 提取的答案
         """
         try:
-            print("text -> \n",text)
+            print("text -> \n", text)
             if "<answer>" in text and "</answer>" in text:
                 answer = text.split("<answer>")[-1]
                 answer = answer.split("</answer>")[0]
                 return answer.strip()
             else:
                 print("Warning: <answer> tag not found in response.")
-                return text.strip()  # 返回原始文本作为备用
+                # 尝试提取其他有意义的部分
+                if "诊断" in text:
+                    return text.split("诊断")[-1].strip()
+                elif "排查建议" in text:
+                    return text.split("排查建议")[-1].strip()
+                else:
+                    return text.strip()  # 返回原始文本作为备用
         except Exception as e:
             print(f"Error extracting XML answer: {e}")
             return ""  # 返回空字符串或其他默认值