Explorar el Código

修改训练代码为train_model_grpo_v1.2.py 以解决训练输出未达预期问题

zhouyang.xie hace 4 meses
padre
commit
f8443b3682
Se han modificado 1 ficheros con 4 adiciones y 4 borrados
  1. 4 4
      src/generate_data.py

+ 4 - 4
src/generate_data.py

@@ -31,16 +31,16 @@ class DataGenerator:
 
              # 当 human 和 bot 都取到后,拼装一个新的 JSON
             target_data = {
-                "question": case_data[1]+" 请予以故障诊断?",
+                "question": case_data[1],
                 "answer":   "".join([case_data[2],case_data[3],case_data[4]]),
                 "prompt": [
                     {
-                        "content": f"\nRespond in the following format:\n<reasoning>\n {case_data[2]} \n</reasoning>\n<answer>\n {case_data[3]}  {case_data[4]} \n</answer>\n",
-                        # "content": "\nRespond in the following format:\n<reasoning>\n...\n</reasoning>\n<answer>\n...\n</answer>\n",
+                        # "content": f"\nRespond in the following format:\n<reasoning>\n {case_data[2]} \n</reasoning>\n<answer>\n {case_data[3]}  {case_data[4]} \n</answer>\n",
+                        "content": "\nRespond in the following format:\n<reasoning>\n...\n</reasoning>\n<answer>\n...\n</answer>\n",
                         "role": "system"
                     },
                     {
-                        "content": case_data[1]+" 请予以故障诊断?",
+                        "content": case_data[1],
                         "role": "user"
                     }
                 ]