9 months ago · c8e0ee83b0
--- a/src/train_model_grpo_v2.py
+++ b/src/train_model_grpo_v2.py
@@ -126,13 +126,13 @@ class ModelTrainer:
 
				             gpu_memory_utilization=0.6, # 0.6 # Reduce if out of memory
			
 
				         )
			
 
				 
			
 
				-        # # 将模型移动到设备上
			
 
				-        # model = model.to_empty(device='cuda')  # 使用 to_empty 而不是 to
			
 
				+        # 将模型移动到设备上
			
 
				+        model = model.to_empty(device='cuda')  # 使用 to_empty 而不是 to
			
 
				 
			
 
				-        # # 初始化模型的权重
			
 
				-        # for param in model.parameters():
			
 
				-        #     if param.is_meta:
			
 
				-        #         param.data = torch.randn_like(param)  # 随机初始化
			
 
				+        # 初始化模型的权重
			
 
				+        for param in model.parameters():
			
 
				+            if param.is_meta:
			
 
				+                param.data = torch.randn_like(param)  # 随机初始化
			
 
				 
			
 
				         # 添加 LoRA 适配器
			
 
				         model = FastLanguageModel.get_peft_model(
			
@@ -167,13 +167,13 @@ class ModelTrainer:
 
				         print(f"Reserved memory: {torch.cuda.memory_reserved()}")
			
 
				         print(f"Allocated memory: {torch.cuda.memory_allocated()}")
			
 
				 
			
 
				-        # # 启用 pin_memory  2025年3月10日未能验证通过
			
 
				-        # train_loader = torch.utils.data.DataLoader(
			
 
				-        #     train_dataset, batch_size=1, shuffle=True, pin_memory=True  
			
 
				-        # )
			
 
				+        # 启用 pin_memory  2025年3月10日未能验证通过
			
 
				+        train_loader = torch.utils.data.DataLoader(
			
 
				+            train_dataset, batch_size=1, shuffle=True, pin_memory=True  
			
 
				+        )
			
 
				         
			
 
				-        # # 释放未使用的显存
			
 
				-        # torch.cuda.empty_cache()
			
 
				+        # 释放未使用的显存
			
 
				+        torch.cuda.empty_cache()
			
 
				 
			
 
				         training_args = GRPOConfig(
			
 
				             use_vllm = False, # use vLLM for fast inference!
			
@@ -189,7 +189,7 @@ class ModelTrainer:
 
				             fp16 = not is_bfloat16_supported(),
			
 
				             per_device_train_batch_size = 1,
			
 
				             gradient_accumulation_steps = 1, # Increase to 4 for smoother training
			
 
				-            num_generations = 8, # 8 # 每次生成 输出 个数
			
 
				+            num_generations = 4, # 8 # 每次生成 输出 个数
			
 
				             max_prompt_length = 256, # 256 # 输入提示的最大长度
			
 
				             max_completion_length = 200,# 200 # 生成内容的最大长度
			
 
				             num_train_epochs = 1, # Set to 1 for a full training run
			
@@ -230,7 +230,7 @@ if __name__ == "__main__":
 
				     # 配置参数
			
 
				     model_name = os.path.join('..', 'models', 'pretrained', 'DeepSeek-R1-Distill-Qwen-1.5B')
			
 
				     # model_name: 预训练模型的路径
			
 
				-    max_seq_length = 6144  # 单次会话（single session） 的最大 token 长度，一个token大约3-4 字节（Byte）
			
 
				+    max_seq_length = 512  # 单次会话（single session） 的最大 token 长度，一个token大约3-4 字节（Byte）
			
 
				     dtype = torch.float16  # 数据类型
			
 
				     load_in_4bit = True  # 是否以4位精度加载模型
			
 
				     lora_rank=64
			
@@ -242,10 +242,10 @@ if __name__ == "__main__":
 
				     try:
			
 
				         # 设置环境变量
			
 
				         # 单机多卡
			
 
				-        # os.environ['RANK'] = '0' # 第一张卡的 rank
			
 
				-        # os.environ['WORLD_SIZE'] = '1'  # 总共有 1 张卡
			
 
				-        # os.environ['MASTER_ADDR'] = 'localhost'
			
 
				-        # os.environ['MASTER_PORT'] = '12345'
			
 
				+        os.environ['RANK'] = '0' # 第一张卡的 rank
			
 
				+        os.environ['WORLD_SIZE'] = '1'  # 总共有 1 张卡
			
 
				+        os.environ['MASTER_ADDR'] = 'localhost'
			
 
				+        os.environ['MASTER_PORT'] = '12345'
			
 
				         # 多机多卡
			
 
				         # export RANK=0  # 第一台机器的 rank
			
 
				         # export WORLD_SIZE=4  # 总共有 4 台机器
			
@@ -270,7 +270,7 @@ if __name__ == "__main__":
 
				         save_path = os.path.join('..', 'models', 'trained', 'DeepSeek-R1-Distill-Qwen-1.5B-GRPO')
			
 
				         trainer.save_model(model, tokenizer, save_path)
			
 
				     finally:
			
 
				-        # # 确保进程组被销毁
			
 
				-        # if dist.is_initialized():
			
 
				-        #     dist.destroy_process_group()
			
 
				+        # 确保进程组被销毁
			
 
				+        if dist.is_initialized():
			
 
				+            dist.destroy_process_group()
			
 
				         print("train finally")