|
@@ -17,11 +17,11 @@ PatchFastRL("GRPO", FastLanguageModel)
|
|
model_name = f"../models/pretrained/DeepSeek-R1-Distill-Qwen-1.5B"
|
|
model_name = f"../models/pretrained/DeepSeek-R1-Distill-Qwen-1.5B"
|
|
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
model_name=model_name,
|
|
model_name=model_name,
|
|
- max_seq_length=1024, # 2028
|
|
|
|
|
|
+ max_seq_length=512, # 2028
|
|
load_in_4bit=True,
|
|
load_in_4bit=True,
|
|
fast_inference=True,
|
|
fast_inference=True,
|
|
max_lora_rank=128,
|
|
max_lora_rank=128,
|
|
- gpu_memory_utilization=0.60,
|
|
|
|
|
|
+ gpu_memory_utilization=0.5,
|
|
)
|
|
)
|
|
|
|
|
|
model = FastLanguageModel.get_peft_model(
|
|
model = FastLanguageModel.get_peft_model(
|