4 months ago · 40e0961e0f
--- a/src/qwen_notebook_clone.py
+++ b/src/qwen_notebook_clone.py
@@ -14,7 +14,7 @@ os.environ["UNSLOTH_DISPLAY_METRICS"] = "true"
 
				 PatchFastRL("GRPO", FastLanguageModel)
			
 
				 
			
 
				 # Load the model just like the notebook
			
 
				-model_name = f"../models/Qwen/Qwen2.5-3B-Instruct"
			
 
				+model_name = f"../models/pretrained/DeepSeek-R1-Distill-Qwen-1.5B"
			
 
				 model, tokenizer = FastLanguageModel.from_pretrained(
			
 
				     model_name=model_name,
			
 
				     max_seq_length=2048,
			
@@ -213,6 +213,6 @@ trainer.train()
 
				 
			
 
				 # Save the trained model
			
 
				 print("Saving LoRA weights to grpo_saved_lora...")
			
 
				-model.save_lora("grpo_saved_lora")
			
 
				+model.save_lora(f"../models/trained/grpoModel")
			
 
				 
			
 
				 print("Training complete!")