Ver código fonte

换用github jwjohns/unsloth-GRPO-qwen2.5 验证GRPO训练模型

zhouyang.xie 4 meses atrás
pai
commit
d32c4de968
1 arquivos alterados com 2 adições e 2 exclusões
  1. 2 2
      src/qwen_notebook_clone.py

+ 2 - 2
src/qwen_notebook_clone.py

@@ -19,7 +19,7 @@ model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=model_name,
     max_seq_length=512, # 2028
     load_in_4bit=True,
-    fast_inference=True,
+    fast_inference=False,
     max_lora_rank=128,
     gpu_memory_utilization=0.5,
 )
@@ -133,7 +133,7 @@ from vllm import SamplingParams
 
 # IMPORTANT: Extended training configuration for better results
 training_args = GRPOConfig(
-    use_vllm = True, 
+    use_vllm = False, 
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,