10 달 전 · 75edbd5bb9
--- a/src/train_model_grpo.py
+++ b/src/train_model_grpo.py
@@ -171,8 +171,8 @@ class ModelTrainer:
 
				         #     train_dataset, batch_size=1, shuffle=True, pin_memory=True  
			
 
				         # )
			
 
				         
			
 
				-        # 释放未使用的显存
			
 
				-        torch.cuda.empty_cache()
			
 
				+        # # 释放未使用的显存
			
 
				+        # torch.cuda.empty_cache()
			
 
				 
			
 
				         training_args = GRPOConfig(
			
 
				             use_vllm = False, # use vLLM for fast inference!