пре 9 месеци · 6b7ea96c64
--- a/src/train_model_grpo_v2.py
+++ b/src/train_model_grpo_v2.py
@@ -123,7 +123,7 @@ class ModelTrainer:
 
				             dtype=self.dtype,
			
 
				             fast_inference = True, # Enable vLLM fast inference
			
 
				             max_lora_rank = lora_rank,
			
 
				-            gpu_memory_utilization=0.95, # 0.6 # Reduce if out of memory
			
 
				+            gpu_memory_utilization=0.005, # 0.6 # Reduce if out of memory
			
 
				         )
			
 
				 
			
 
				         # 将模型移动到设备上