Просмотр исходного кода

换用github jwjohns/unsloth-GRPO-qwen2.5 验证GRPO训练模型

zhouyang.xie 4 месяцев назад
Родитель
Сommit
40e0961e0f
1 измененных файлов с 2 добавлено и 2 удалено
  1. 2 2
      src/qwen_notebook_clone.py

+ 2 - 2
src/qwen_notebook_clone.py

@@ -14,7 +14,7 @@ os.environ["UNSLOTH_DISPLAY_METRICS"] = "true"
 PatchFastRL("GRPO", FastLanguageModel)
 
 # Load the model just like the notebook
-model_name = f"../models/Qwen/Qwen2.5-3B-Instruct"
+model_name = f"../models/pretrained/DeepSeek-R1-Distill-Qwen-1.5B"
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=model_name,
     max_seq_length=2048,
@@ -213,6 +213,6 @@ trainer.train()
 
 # Save the trained model
 print("Saving LoRA weights to grpo_saved_lora...")
-model.save_lora("grpo_saved_lora")
+model.save_lora(f"../models/trained/grpoModel")
 
 print("Training complete!")