@@ -127,7 +127,7 @@ class UnslothAlignPropConfig(AlignPropConfig):
)
def __init__(
self,
- exp_name = 'train_model_grpo_original',
+ exp_name = 'train_model_grpo_v1.1',
run_name = '',
seed = 3407,
log_with = None,
@@ -143,7 +143,7 @@ class UnslothDDPOConfig(DDPOConfig):