Parcourir la source

换用github jwjohns/unsloth-GRPO-qwen2.5 验证GRPO训练模型

zhouyang.xie il y a 4 mois
Parent
commit
c413d5f5b4
1 fichiers modifiés avec 2 ajouts et 2 suppressions
  1. 2 2
      src/qwen_notebook_clone.py

+ 2 - 2
src/qwen_notebook_clone.py

@@ -17,11 +17,11 @@ PatchFastRL("GRPO", FastLanguageModel)
 model_name = f"../models/pretrained/DeepSeek-R1-Distill-Qwen-1.5B"
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=model_name,
-    max_seq_length=1024, # 2028
+    max_seq_length=512, # 2028
     load_in_4bit=True,
     fast_inference=True,
     max_lora_rank=128,
-    gpu_memory_utilization=0.60,
+    gpu_memory_utilization=0.5,
 )
 
 model = FastLanguageModel.get_peft_model(