Browse Source

换用github jwjohns/unsloth-GRPO-qwen2.5 验证GRPO训练模型

zhouyang.xie 4 months ago
parent
commit
d32c4de968
1 changed files with 2 additions and 2 deletions
  1. 2 2
      src/qwen_notebook_clone.py

+ 2 - 2
src/qwen_notebook_clone.py

@@ -19,7 +19,7 @@ model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=model_name,
     model_name=model_name,
     max_seq_length=512, # 2028
     max_seq_length=512, # 2028
     load_in_4bit=True,
     load_in_4bit=True,
-    fast_inference=True,
+    fast_inference=False,
     max_lora_rank=128,
     max_lora_rank=128,
     gpu_memory_utilization=0.5,
     gpu_memory_utilization=0.5,
 )
 )
@@ -133,7 +133,7 @@ from vllm import SamplingParams
 
 
 # IMPORTANT: Extended training configuration for better results
 # IMPORTANT: Extended training configuration for better results
 training_args = GRPOConfig(
 training_args = GRPOConfig(
-    use_vllm = True, 
+    use_vllm = False, 
     learning_rate = 5e-6,
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
     adam_beta2 = 0.99,