|
@@ -131,8 +131,8 @@ class ModelTrainer:
|
|
|
self.xmlcount_reward_func,
|
|
|
self.soft_format_reward_func,
|
|
|
# self.strict_format_reward_func,
|
|
|
- # self.int_reward_func,
|
|
|
- # self.correctness_reward_func,
|
|
|
+ self.int_reward_func,
|
|
|
+ self.correctness_reward_func,
|
|
|
self.strict_format_reward_func,
|
|
|
self.semantic_correctness_reward_func,
|
|
|
self.reasoning_quality_reward_func,
|
|
@@ -364,7 +364,7 @@ if __name__ == "__main__":
|
|
|
|
|
|
# 使用文件初始化方法 2025-3-11 成功验证支持windows
|
|
|
init_method = f'env://' # env:// # 文件路径需要所有进程都能访问
|
|
|
- # dist.init_process_group(backend=backend, init_method=init_method)
|
|
|
+ dist.init_process_group(backend=backend, init_method=init_method)
|
|
|
|
|
|
print(f"Initialized distributed training with backend: {backend}")
|
|
|
|