Bläddra i källkod

换用github jwjohns/unsloth-GRPO-qwen2.5 验证GRPO训练模型

zhouyang.xie 2 månader sedan
förälder
incheckning
c485f38baf
1 ändrade filer med 10 tillägg och 2 borttagningar
  1. 10 2
      src/train_model_github_jwjohns.py

+ 10 - 2
src/train_model_github_jwjohns.py

@@ -74,7 +74,11 @@ from modelscope.msdatasets import MsDataset
 def get_gsm8k_questions(split="train") -> Dataset:
     # data = load_dataset('openai/gsm8k', 'main')[split]
     data =  MsDataset.load('openai-mirror/gsm8k', subset_name='main', split=split)
-    print("original datasets for train ->\n",data)
+    # Save original datasets to JSONL
+    with open(f'gsm8k_original_{split}.jsonl', 'w') as f:
+        for item in data:
+            f.write(json.dumps(item) + '\n')
+    
     data = data.map(lambda x: {
         'prompt': [
             {'role': 'system', 'content': SYSTEM_PROMPT},
@@ -82,7 +86,11 @@ def get_gsm8k_questions(split="train") -> Dataset:
         ],
         'answer': extract_hash_answer(x['answer'])
     })
-    print("format datasets for train ->\n",data)
+    
+    # Save formatted datasets to JSONL
+    with open(f'gsm8k_formatted_{split}.jsonl', 'w') as f:
+        for item in data:
+            f.write(json.dumps(item) + '\n')
     return data
 
 # Get dataset