Prechádzať zdrojové kódy

换用github jwjohns/unsloth-GRPO-qwen2.5 验证GRPO训练模型

zhouyang.xie 2 mesiacov pred
rodič
commit
4995352642

BIN
datasets/downloads/2116db3173991f7a8f3f8e7c4131a29289b32e939ba98bfc840569e4e70383f7


+ 1 - 0
datasets/downloads/2116db3173991f7a8f3f8e7c4131a29289b32e939ba98bfc840569e4e70383f7.json

@@ -0,0 +1 @@
+{"url": "https://www.modelscope.cn/api/v1/datasets/openai-mirror/gsm8k/repo?Source=SDK&Revision=master&FilePath=main%2Ftest-00000-of-00001.parquet", "etag": null}

BIN
datasets/downloads/7f4d639d15bd9cea5358f29d67b83d8225e0b067287a7e4948f935c8cea17784


+ 1 - 0
datasets/downloads/7f4d639d15bd9cea5358f29d67b83d8225e0b067287a7e4948f935c8cea17784.json

@@ -0,0 +1 @@
+{"url": "https://www.modelscope.cn/api/v1/datasets/openai-mirror/gsm8k/repo?Source=SDK&Revision=master&FilePath=main%2Ftrain-00000-of-00001.parquet", "etag": null}

+ 1 - 0
datasets/openai-mirror___gsm8k/main-7ff6dd6a92438c85/0.0.0/master/dataset_info.json

@@ -0,0 +1 @@
+{"description": "", "citation": "", "homepage": "", "license": "", "features": {"question": {"dtype": "string", "_type": "Value"}, "answer": {"dtype": "string", "_type": "Value"}}, "builder_name": "parquet", "dataset_name": "gsm8k", "config_name": "main", "version": {"version_str": "0.0.0", "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 3963202, "num_examples": 7473, "dataset_name": "gsm8k"}, "test": {"name": "test", "num_bytes": 713732, "num_examples": 1319, "dataset_name": "gsm8k"}}, "download_checksums": {"hf://datasets/openai-mirror/gsm8k@master/main/train-00000-of-00001.parquet": {"num_bytes": 2306545, "checksum": null}, "hf://datasets/openai-mirror/gsm8k@master/main/test-00000-of-00001.parquet": {"num_bytes": 419088, "checksum": null}}, "download_size": 2725633, "dataset_size": 4676934, "size_in_bytes": 7402567}

BIN
datasets/openai-mirror___gsm8k/main-7ff6dd6a92438c85/0.0.0/master/gsm8k-test.arrow


BIN
datasets/openai-mirror___gsm8k/main-7ff6dd6a92438c85/0.0.0/master/gsm8k-train.arrow


+ 5 - 0
src/dataset_download.py

@@ -0,0 +1,5 @@
+#数据集下载
+from modelscope.msdatasets import MsDataset
+
+ds =  MsDataset.load('openai-mirror/gsm8k', subset_name='main', split='train', cache_dir="../datasets/")
+#您可按需配置 subset_name、split,参照“快速使用”示例代码

+ 4 - 2
src/qwen_notebook_clone.py → src/train_model_github_jwjohns.py

@@ -74,6 +74,7 @@ from modelscope.msdatasets import MsDataset
 def get_gsm8k_questions(split="train") -> Dataset:
     # data = load_dataset('openai/gsm8k', 'main')[split]
     data =  MsDataset.load('openai-mirror/gsm8k', subset_name='main', split=split)
+    print("original datasets for train ->\n",data)
     data = data.map(lambda x: {
         'prompt': [
             {'role': 'system', 'content': SYSTEM_PROMPT},
@@ -81,6 +82,7 @@ def get_gsm8k_questions(split="train") -> Dataset:
         ],
         'answer': extract_hash_answer(x['answer'])
     })
+    print("format datasets for train ->\n",data)
     return data
 
 # Get dataset
@@ -151,8 +153,8 @@ training_args = GRPOConfig(
     num_generations = 8,
     max_prompt_length = 256,
     max_completion_length = 200,
-    max_steps = 250,  # 2000 Increased 8x for longer training
-    save_steps = 250,  # Save checkpoints more frequently
+    max_steps = 10,  # 2000 Increased 8x for longer training
+    save_steps = 10,  # Save checkpoints more frequently
     max_grad_norm = 0.1,
     report_to = "tensorboard",  # Enable tensorboard reporting for metrics display
     output_dir = "outputs",