@@ -209,7 +209,7 @@ class ModelTrainer:
correctness_reward_func,
],
args = training_args,
- train_dataset = dataset,
+ train_dataset = train_dataset,
)
# 训练模型