123456789101112131415161718192021222324 |
- import os
- import torch
- from unsloth import FastLanguageModel
- class ModelQuantizer:
- def __init__(self, model_path):
- self.model_path = model_path
- def quantize(self):
- model, tokenizer = FastLanguageModel.from_pretrained(self.model_path)
- # Apply dynamic quantization
- quantized_model = torch.quantization.quantize_dynamic(
- model, {torch.nn.Linear}, dtype=torch.qint8
- )
- return quantized_model, tokenizer
- def save_quantized_model(self, model, save_path):
- model.save_pretrained(save_path)
- if __name__ == "__main__":
- model_path = os.path.join('..', 'models', 'deepseek-r1-distill-1.5B-finetuned')
- quantizer = ModelQuantizer(model_path)
- quantized_model, tokenizer = quantizer.quantize()
- quantizer.save_quantized_model(quantized_model, os.path.join('..', 'models', 'deepseek-r1-distill-1.5B-quantized'))
|