|
@@ -117,7 +117,7 @@ class ModelTrainer:
|
|
|
max_seq_length=self.max_seq_length,
|
|
|
load_in_4bit=self.load_in_4bit, # 值为True 以 4 bit量化进行微调,为False LoRA 16bit。这将内存使用量减少了 4 倍,使我们能够在免费的 16GB 内存 GPU 中实际进行微调。4 位量化本质上将权重转换为一组有限的数字以减少内存使用量。这样做的缺点是准确度会下降 1-2%。如果您想要这种微小的额外准确度,请在较大的 GPU(如 H100)上将其设置为 False。
|
|
|
dtype=self.dtype,
|
|
|
- fast_inference = True, # Enable vLLM fast inference
|
|
|
+ fast_inference = False, # Enable vLLM fast inference
|
|
|
max_lora_rank = lora_rank,
|
|
|
gpu_memory_utilization=0.1,# Reduce if out of memory
|
|
|
)
|
|
@@ -149,7 +149,7 @@ class ModelTrainer:
|
|
|
def train(self, model, tokenizer, train_dataset):
|
|
|
print("is_bfloat16_supported()=",is_bfloat16_supported())
|
|
|
training_args = GRPOConfig(
|
|
|
- use_vllm = True, # use vLLM for fast inference!
|
|
|
+ use_vllm = False, # use vLLM for fast inference!
|
|
|
learning_rate = 5e-6,
|
|
|
adam_beta1 = 0.9,
|
|
|
adam_beta2 = 0.99,
|
|
@@ -225,5 +225,5 @@ if __name__ == "__main__":
|
|
|
model = trainer.train(model, tokenizer, train_dataset)
|
|
|
|
|
|
# 保存模型
|
|
|
- save_path = os.path.join('..', 'models', 'trained', 'DeepSeek-R1-Distill-Qwen-1.5B')
|
|
|
+ save_path = os.path.join('..', 'models', 'trained', 'DeepSeek-R1-Distill-Qwen-1.5B-GRPO')
|
|
|
trainer.save_model(model, tokenizer, save_path)
|