|
@@ -19,7 +19,7 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
model_name=model_name,
|
|
model_name=model_name,
|
|
max_seq_length=512, # 2028
|
|
max_seq_length=512, # 2028
|
|
load_in_4bit=True,
|
|
load_in_4bit=True,
|
|
- fast_inference=True,
|
|
|
|
|
|
+ fast_inference=False,
|
|
max_lora_rank=128,
|
|
max_lora_rank=128,
|
|
gpu_memory_utilization=0.5,
|
|
gpu_memory_utilization=0.5,
|
|
)
|
|
)
|
|
@@ -133,7 +133,7 @@ from vllm import SamplingParams
|
|
|
|
|
|
# IMPORTANT: Extended training configuration for better results
|
|
# IMPORTANT: Extended training configuration for better results
|
|
training_args = GRPOConfig(
|
|
training_args = GRPOConfig(
|
|
- use_vllm = True,
|
|
|
|
|
|
+ use_vllm = False,
|
|
learning_rate = 5e-6,
|
|
learning_rate = 5e-6,
|
|
adam_beta1 = 0.9,
|
|
adam_beta1 = 0.9,
|
|
adam_beta2 = 0.99,
|
|
adam_beta2 = 0.99,
|