4 月之前 · 2951868eb5
--- a/src/train_model_grpo_v2.py
+++ b/src/train_model_grpo_v2.py
@@ -123,7 +123,7 @@ class ModelTrainer:
 
				             dtype=self.dtype,
			
 
				             fast_inference = True, # Enable vLLM fast inference
			
 
				             max_lora_rank = lora_rank,
			
 
				-            gpu_memory_utilization=0.6, # 0.6 # Reduce if out of memory
			
 
				+            gpu_memory_utilization=0.08, # 0.6 # Reduce if out of memory
			
 
				         )
			
 
				 
			
 
				         # 将模型移动到设备上
			
@@ -233,7 +233,7 @@ if __name__ == "__main__":
 
				     max_seq_length = 512  # 单次会话（single session） 的最大 token 长度，一个token大约3-4 字节（Byte）
			
 
				     dtype = torch.float16  # 数据类型
			
 
				     load_in_4bit = True  # 是否以4位精度加载模型
			
 
				-    lora_rank=64
			
 
				+    lora_rank=16 
			
 
				 
			
 
				     # 定义训练集和测试集路径
			
 
				     train_data_path = os.path.join('..', 'data', 'processed', 'train.jsonl')