Browse Source

修改train_model_grpo.py代码,验证GRPO训练模型,使用显卡共享内存 pin_memory=True 验证失败——内存

zhouyang.xie 3 months ago
parent
commit
0e6c18290b
1 changed files with 3 additions and 3 deletions
  1. 3 3
      src/train_model_grpo.py

+ 3 - 3
src/train_model_grpo.py

@@ -144,9 +144,9 @@ class ModelTrainer:
         # 加载训练集和测试集
         train_dataset = load_dataset("json", data_files={"train": train_data_path}, split="train")
 
-        train_loader = torch.utils.data.DataLoader(
-            train_dataset, batch_size=1, shuffle=True, pin_memory=True  # 启用 pin_memory
-        )
+        # train_loader = torch.utils.data.DataLoader(
+        #     train_dataset, batch_size=1, shuffle=True, pin_memory=True  # 启用 pin_memory  2025年3月7日未能验证通过
+        # )
 
         # train_data_path: 训练数据路径,格式为 JSONL
         return train_dataset