import os import torch from unsloth import FastLanguageModel class ModelFineTuner: def __init__(self, model_path, max_seq_length): self.model_path = model_path self.max_seq_length = max_seq_length def load_model(self): model, tokenizer = FastLanguageModel.from_pretrained(self.model_path) return model, tokenizer def fine_tune(self, model, tokenizer, dataset): model = FastLanguageModel.get_peft_model( model, r=16, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_alpha=16, lora_dropout=0, bias="none", use_gradient_checkpointing=True, random_state=3407, max_seq_length=self.max_seq_length, ) trainer = torch.optim.AdamW(model.parameters(), lr=2e-5) dataset = dataset.map(lambda x: tokenizer(x['text'], truncation=True, padding='max_length', max_length=self.max_seq_length), batched=True) model.train() for epoch in range(3): for batch in dataset['train']: outputs = model(**batch) loss = outputs.loss loss.backward() trainer.step() trainer.zero_grad() return model def save_fine_tuned_model(self, model, save_path): model.save_pretrained(save_path) if __name__ == "__main__": model_path = os.path.join('..', 'models', 'deepseek-r1-distill-1.5B') max_seq_length = 2048 fine_tuner = ModelFineTuner(model_path, max_seq_length) model, tokenizer = fine_tuner.load_model() dataset = fine_tuner.load_data(os.path.join('..', 'data', 'processed', 'train.json')) model = fine_tuner.fine_tune(model, tokenizer, dataset) fine_tuner.save_fine_tuned_model(model, os.path.join('..', 'models', 'deepseek-r1-distill-1.5B-fine-tuned'))