há 10 meses atrás · bcd25a2773
--- a/src/train_model_grpo.py
+++ b/src/train_model_grpo.py
@@ -117,7 +117,7 @@ class ModelTrainer:
 
				             max_seq_length=self.max_seq_length,
			
 
				             load_in_4bit=self.load_in_4bit, # 值为True 以 4 bit量化进行微调，为False LoRA 16bit。这将内存使用量减少了 4 倍，使我们能够在免费的 16GB 内存 GPU 中实际进行微调。4 位量化本质上将权重转换为一组有限的数字以减少内存使用量。这样做的缺点是准确度会下降 1-2%。如果您想要这种微小的额外准确度，请在较大的 GPU（如 H100）上将其设置为 False。
			
 
				             dtype=self.dtype,
			
 
				-            fast_inference = True, # Enable vLLM fast inference
			
 
				+            fast_inference = False, # Enable vLLM fast inference
			
 
				             max_lora_rank = lora_rank,
			
 
				             gpu_memory_utilization=0.6, # 0.6 # Reduce if out of memory
			
 
				         )
			
@@ -162,7 +162,7 @@ class ModelTrainer:
 
				         torch.cuda.empty_cache()
			
 
				 
			
 
				         training_args = GRPOConfig(
			
 
				-            use_vllm = True, # use vLLM for fast inference!
			
 
				+            use_vllm = False, # use vLLM for fast inference!
			
 
				             learning_rate = 5e-6,
			
 
				             adam_beta1 = 0.9,
			
 
				             adam_beta2 = 0.99,
			
@@ -179,8 +179,8 @@ class ModelTrainer:
 
				             max_prompt_length = 256, # 输入提示的最大长度
			
 
				             max_completion_length = 200, # 生成内容的最大长度
			
 
				             # num_train_epochs = 1, # Set to 1 for a full training run
			
 
				-            max_steps = 20,  # 250
			
 
				-            save_steps = 20, # 250
			
 
				+            max_steps = 250,  # 250
			
 
				+            save_steps = 250, # 250
			
 
				             max_grad_norm = 0.1,
			
 
				             report_to = "none", # Can use Weights & Biases
			
 
				             output_dir = os.path.join('..', 'models',"outputs"),
			
@@ -218,7 +218,7 @@ if __name__ == "__main__":
 
				     # model_name: 预训练模型的路径
			
 
				     max_seq_length = 6000  # 单次会话（single session） 的最大 token 长度，一个token大约3-4 字节（Byte）
			
 
				     dtype = torch.float16  # 数据类型
			
 
				-    load_in_4bit = True  # 是否以4位精度加载模型
			
 
				+    load_in_4bit = False  # 是否以4位精度加载模型
			
 
				     lora_rank=16
			
 
				 
			
 
				     # 定义训练集和测试集路径