| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- import os
- import torch
- from unsloth import FastLanguageModel
- class ModelFineTuner:
- def __init__(self, model_path, max_seq_length):
- self.model_path = model_path
- self.max_seq_length = max_seq_length
- def load_model(self):
- model, tokenizer = FastLanguageModel.from_pretrained(self.model_path)
- return model, tokenizer
- def fine_tune(self, model, tokenizer, dataset):
- model = FastLanguageModel.get_peft_model(
- model,
- r=16,
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
- lora_alpha=16,
- lora_dropout=0,
- bias="none",
- use_gradient_checkpointing=True,
- random_state=3407,
- max_seq_length=self.max_seq_length,
- )
- trainer = torch.optim.AdamW(model.parameters(), lr=2e-5)
- dataset = dataset.map(lambda x: tokenizer(x['text'], truncation=True, padding='max_length', max_length=self.max_seq_length), batched=True)
- model.train()
- for epoch in range(3):
- for batch in dataset['train']:
- outputs = model(**batch)
- loss = outputs.loss
- loss.backward()
- trainer.step()
- trainer.zero_grad()
- return model
- def save_fine_tuned_model(self, model, save_path):
- model.save_pretrained(save_path)
- if __name__ == "__main__":
- model_path = os.path.join('..', 'models', 'deepseek-r1-distill-1.5B')
- max_seq_length = 2048
- fine_tuner = ModelFineTuner(model_path, max_seq_length)
- model, tokenizer = fine_tuner.load_model()
- dataset = fine_tuner.load_data(os.path.join('..', 'data', 'processed', 'train.json'))
- model = fine_tuner.fine_tune(model, tokenizer, dataset)
- fine_tuner.save_fine_tuned_model(model, os.path.join('..', 'models', 'deepseek-r1-distill-1.5B-fine-tuned'))
|