|
@@ -14,7 +14,7 @@ os.environ["UNSLOTH_DISPLAY_METRICS"] = "true"
|
|
|
PatchFastRL("GRPO", FastLanguageModel)
|
|
|
|
|
|
# Load the model just like the notebook
|
|
|
-model_name = f"../models/Qwen/Qwen2.5-3B-Instruct"
|
|
|
+model_name = f"../models/pretrained/DeepSeek-R1-Distill-Qwen-1.5B"
|
|
|
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
|
model_name=model_name,
|
|
|
max_seq_length=2048,
|
|
@@ -213,6 +213,6 @@ trainer.train()
|
|
|
|
|
|
# Save the trained model
|
|
|
print("Saving LoRA weights to grpo_saved_lora...")
|
|
|
-model.save_lora("grpo_saved_lora")
|
|
|
+model.save_lora(f"../models/trained/grpoModel")
|
|
|
|
|
|
print("Training complete!")
|