|
@@ -122,7 +122,7 @@ class ModelTrainer:
|
|
|
dtype=self.dtype,
|
|
|
fast_inference = True, # Enable vLLM fast inference
|
|
|
max_lora_rank = lora_rank,
|
|
|
- gpu_memory_utilization=0.2, # 0.6 # Reduce if out of memory
|
|
|
+ gpu_memory_utilization=0.1, # 0.6 # Reduce if out of memory
|
|
|
)
|
|
|
|
|
|
# 将模型移动到设备上
|