9 ماه پیش · 5e21073349
--- a/data/backup/gsm8k_dataset_for_train.jsonl
+++ b/data/backup/gsm8k_dataset_for_train.jsonl
--- a/src/unsloth_compiled_cache/UnslothAlignPropTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothAlignPropTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
@@ -120,7 +127,7 @@ class UnslothAlignPropConfig(AlignPropConfig):
 
				     )
			
 
				     def __init__(
			
 
				         self,
			
 
				-        exp_name = 'inference',
			
 
				+        exp_name = 'train_model_grpo_original',
			
 
				         run_name = '',
			
 
				         seed = 3407,
			
 
				         log_with = None,
			
--- a/src/unsloth_compiled_cache/UnslothBCOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothBCOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothCPOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothCPOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothDDPOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothDDPOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
@@ -136,7 +143,7 @@ class UnslothDDPOConfig(DDPOConfig):
 
				     )
			
 
				     def __init__(
			
 
				         self,
			
 
				-        exp_name = 'inference',
			
 
				+        exp_name = 'train_model_grpo_original',
			
 
				         run_name = '',
			
 
				         seed = 3407,
			
 
				         log_with = None,
			
--- a/src/unsloth_compiled_cache/UnslothDPOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothDPOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothGKDTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothGKDTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothGRPOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothGRPOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
@@ -113,7 +120,7 @@ class UnslothEfficientGRPO(torch.autograd.Function):
 
				             fullgraph = True,
			
 
				             options = torch_compile_options,
			
 
				         )
			
 
				-        
			
 
				+
			
 
				         grad_inputs_chunks = torch.chunk(grad_inputs,        chunks = n_chunks, dim = 0)
			
 
				         new_hidden_states  = torch.chunk(_new_hidden_states, chunks = n_chunks, dim = 0)
			
 
				         old_hidden_states  = torch.chunk(_old_hidden_states, chunks = n_chunks, dim = 0)
			
@@ -1082,14 +1089,20 @@ class _UnslothGRPOTrainer(Trainer):
 
				                 self, _input_ids, logits_to_keep, completion_mask, advantages,
			
 
				                 n_chunks = self.args.unsloth_num_chunks,
			
 
				             )
			
 
				-        
			
 
				+
			
 
				         # Log the metrics
			
 
				         # completion_length = self.accelerator.gather_for_metrics(completion_mask.sum(1)).float().mean().item()
			
 
				-        self._metrics["completion_length"].append(completion_length.item())
			
 
				 
			
 
				         # mean_kl = ((per_token_kl * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
			
 
				         # self._metrics["kl"].append(self.accelerator.gather_for_metrics(mean_kl).mean().item())
			
 
				-        self._metrics["kl"].append(mean_kl.item())
			
 
				+
			
 
				+        if "train" in self._metrics:
			
 
				+            mode = "eval" if self.control.should_evaluate else "train"
			
 
				+            self._metrics[mode]["completion_length"].append(completion_length.item())
			
 
				+            self._metrics[mode]["kl"].append(mean_kl.item())
			
 
				+        else:
			
 
				+            self._metrics["completion_length"].append(completion_length.item())
			
 
				+            self._metrics["kl"].append(mean_kl.item())
			
 
				         return loss
			
 
				 
			
 
				     def prediction_step(self, model, inputs, prediction_loss_only, ignore_keys: Optional[list[str]] = None):
			
--- a/src/unsloth_compiled_cache/UnslothKTOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothKTOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothNashMDTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothNashMDTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothORPOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothORPOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothOnlineDPOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothOnlineDPOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothPPOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothPPOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothPRMTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothPRMTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothRLOOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothRLOOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothRewardTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothRewardTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/UnslothSFTTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothSFTTrainer.py
@@ -1,8 +1,15 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
 
				 from torch.nn import functional as F
			
 
				-from trl.trainer.sft_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, ConstantLengthDataset, DataCollator, DataCollatorForLanguageModeling, Dataset, EvalPrediction, FeatureExtractionMixin, IterableDataset, Optional, PartialState, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Type, Union, dataclasses, defaultdict, deprecate_kwarg, generate_model_card, get_comet_experiment_url, get_peft_model, is_conversational, is_liger_kernel_available, is_peft_available, is_wandb_available, maybe_apply_chat_template, maybe_convert_to_chatml, nn, os, pack_examples, peft, peft_module_casting_to_bf16, prepare_model_for_kbit_training, torch, transformers, version, warnings, os)
			
 
				+from trl.trainer.sft_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, ConstantLengthDataset, DataCollator, DataCollatorForLanguageModeling, Dataset, EvalPrediction, FeatureExtractionMixin, IterableDataset, Optional, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Type, Union, dataclasses, defaultdict, deprecate_kwarg, generate_model_card, get_comet_experiment_url, get_peft_model, is_liger_kernel_available, is_peft_available, is_wandb_available, nn, os, pack_examples, peft, peft_module_casting_to_bf16, prepare_model_for_kbit_training, torch, transformers, version, warnings, Callable, ConstantLengthDataset, DataCollator, Dataset, IterableDataset, Optional, Union, os, pack_examples, transformers, os)
			
 
				 
			
 
				 
			
 
				 import os
			
@@ -611,117 +618,89 @@ class _UnslothSFTTrainer(Trainer):
 
				     def _prepare_dataset(
			
 
				         self,
			
 
				         dataset: Union[Dataset, IterableDataset],
			
 
				-        processing_class: Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin],
			
 
				-        args: SFTConfig,
			
 
				+        processing_class,
			
 
				+        args,
			
 
				         packing: bool,
			
 
				         formatting_func: Optional[Callable[[dict], str]],
			
 
				         dataset_name: str,
			
 
				     ) -> Union[Dataset, IterableDataset]:
			
 
				-        # Convert the dataset to an IterableDataset if it is a ConstantLengthDataset
			
 
				-        if isinstance(dataset, ConstantLengthDataset):
			
 
				-            return dataset
			
 
				-
			
 
				-        # If the dataset is already preprocessed (tokenized), skip the processing steps.
			
 
				-        column_names = list(next(iter(dataset)).keys())
			
 
				-        is_processed = "input_ids" in column_names
			
 
				-
			
 
				-        # Build the kwargs for the `map` function
			
 
				+        # All Unsloth Zoo code licensed under LGPLv3
			
 
				+        if isinstance(dataset, ConstantLengthDataset): return dataset
			
 
				+    
			
 
				         map_kwargs = {}
			
 
				-        if isinstance(dataset, Dataset):  # IterableDataset does not support num_proc
			
 
				-            map_kwargs["num_proc"] = args.dataset_num_proc
			
 
				-
			
 
				-        with PartialState().local_main_process_first():
			
 
				-            # Apply the formatting function if any
			
 
				-            if formatting_func is not None and is_processed:
			
 
				-                warnings.warn(
			
 
				-                    "You passed a dataset that is already processed (contains an `input_ids` field) together with a "
			
 
				-                    "formatting function. Therefore `formatting_func` will be ignored. Either remove the "
			
 
				-                    "`formatting_func` or pass a dataset that is not already processed.",
			
 
				-                    UserWarning,
			
 
				+        use_desc = isinstance(dataset, Dataset)
			
 
				+    
			
 
				+        # Get max length
			
 
				+        max_seq_length = getattr(args, "max_length", 0)
			
 
				+        if max_seq_length == 0: max_seq_length = getattr(args, "max_seq_length", 0)
			
 
				+        if max_seq_length == 0: max_seq_length = getattr(self, "max_seq_length", 0)
			
 
				+        if max_seq_length == 0: max_seq_length = getattr(self, "max_seq", 0)
			
 
				+        dataset_text_field = getattr(args, "dataset_text_field", "text")
			
 
				+        do_truncation = max_seq_length != 0
			
 
				+        do_formatting_func = False
			
 
				+    
			
 
				+        # Check if already tokenized so skip
			
 
				+        from transformers import DataCollatorForSeq2Seq
			
 
				+        column_names = set(next(iter(dataset)).keys())
			
 
				+        if "input_ids" in column_names:
			
 
				+            # Most likely forgot data collator!
			
 
				+            from transformers import DataCollatorForSeq2Seq
			
 
				+            self.data_collator = DataCollatorForSeq2Seq(processing_class)
			
 
				+            return dataset
			
 
				+        elif dataset_text_field not in column_names:
			
 
				+            do_formatting_func = True
			
 
				+            if formatting_func is None:
			
 
				+                raise RuntimeError("Unsloth: You must specify a `formatting_func`")
			
 
				+        pass
			
 
				+    
			
 
				+        # Check double BOS tokens
			
 
				+        if do_formatting_func:
			
 
				+            test_text = formatting_func(dataset[0])
			
 
				+            if not isinstance(test_text, list):
			
 
				+                raise ValueError(
			
 
				+                    "Unsloth: The `formatting_func` should return a list of processed strings."
			
 
				                 )
			
 
				-
			
 
				-            if formatting_func is not None and not is_processed:
			
 
				-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
			
 
				-                    map_kwargs["desc"] = f"Applying formatting function to {dataset_name} dataset"
			
 
				-
			
 
				-                batched = isinstance(formatting_func(next(iter(dataset))), list)
			
 
				-
			
 
				-                def _func(example):
			
 
				-                    return {"text": formatting_func(example)}
			
 
				-
			
 
				-                dataset = dataset.map(_func, batched=batched, **map_kwargs)
			
 
				-
			
 
				-            # If the dataset is prompt-completion, convert it to language modeling type
			
 
				-            if "prompt" in dataset.column_names and "completion" in dataset.column_names:
			
 
				-                key = "messages" if is_conversational(dataset[0]) else "text"
			
 
				-
			
 
				-                def concat_prompt_completion(example):
			
 
				-                    return {key: example["prompt"] + example["completion"]}
			
 
				-
			
 
				-                dataset = dataset.map(concat_prompt_completion, remove_columns=["prompt", "completion"])
			
 
				-
			
 
				-            # Convert the dataset to ChatML if needed
			
 
				-            if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
			
 
				-                map_kwargs["desc"] = f"Converting {dataset_name} dataset to ChatML"
			
 
				-            dataset = dataset.map(
			
 
				-                maybe_convert_to_chatml,
			
 
				-                remove_columns="conversations" if "conversations" in dataset.column_names else None,
			
 
				-                **map_kwargs,
			
 
				+            test_text = test_text[0]
			
 
				+        else:
			
 
				+            test_text = dataset[0][dataset_text_field]
			
 
				+        chat_template = getattr(processing_class, 'chat_template', None)
			
 
				+        chat_template = '' if chat_template is None else chat_template
			
 
				+        add_special_tokens = True
			
 
				+    
			
 
				+        if getattr(processing_class, 'bos_token', None) is not None:
			
 
				+            if test_text.startswith(processing_class.bos_token) or processing_class.bos_token in chat_template:
			
 
				+                add_special_tokens = False
			
 
				+                print("Unsloth: We found double BOS tokens - we shall remove one automatically.")
			
 
				+        pass
			
 
				+    
			
 
				+        # Create tokenize function
			
 
				+        def _tokenize(example):
			
 
				+            return processing_class(
			
 
				+                example[dataset_text_field] if not do_formatting_func else formatting_func(example),
			
 
				+                truncation = do_truncation,
			
 
				+                max_length = max_seq_length,
			
 
				+                return_token_type_ids = False,
			
 
				+                add_special_tokens = add_special_tokens,
			
 
				             )
			
 
				-
			
 
				-            # Apply the chat template if needed
			
 
				-            if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
			
 
				-                map_kwargs["desc"] = f"Applying chat template to {dataset_name} dataset"
			
 
				-            dataset = dataset.map(
			
 
				-                maybe_apply_chat_template,
			
 
				-                fn_kwargs={"tokenizer": processing_class},
			
 
				-                remove_columns="messages" if "messages" in dataset.column_names else None,  # renamed to "text"
			
 
				+        pass
			
 
				+    
			
 
				+        map_kwargs["num_proc"] = getattr(args, "dataset_num_proc", 2)
			
 
				+        if use_desc: map_kwargs["desc"] = f'Tokenizing to ["{dataset_text_field}"]'
			
 
				+        dataset = dataset.map(_tokenize, batched = True, **map_kwargs)
			
 
				+    
			
 
				+        if packing:
			
 
				+            if max_seq_length == 0:
			
 
				+                raise ValueError("When packing is enabled, `max_seq_length` can't be `None`.")
			
 
				+    
			
 
				+            if use_desc: map_kwargs["desc"] = f"Packing {dataset_name} dataset"
			
 
				+            dataset = dataset.select_columns("input_ids").map(
			
 
				+                pack_examples,
			
 
				+                batched = True,
			
 
				+                fn_kwargs = {"seq_length": max_seq_length,},
			
 
				                 **map_kwargs,
			
 
				             )
			
 
				-
			
 
				-            # Tokenize the dataset if needed
			
 
				-            if not is_processed:
			
 
				-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
			
 
				-                    map_kwargs["desc"] = f"Tokenizing {dataset_name} dataset"
			
 
				-
			
 
				-                def tokenize(example, processing_class, dataset_text_field):
			
 
				-                    return processing_class(example[dataset_text_field])
			
 
				-
			
 
				-                dataset = dataset.map(
			
 
				-                    tokenize,
			
 
				-                    fn_kwargs={"processing_class": processing_class, "dataset_text_field": args.dataset_text_field},
			
 
				-                    **map_kwargs,
			
 
				-                )
			
 
				-
			
 
				-            # Pack or truncate
			
 
				-            if packing:
			
 
				-                if args.max_seq_length is None:
			
 
				-                    raise ValueError("When packing is enabled, `max_seq_length` can't be `None`.")
			
 
				-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
			
 
				-                    map_kwargs["desc"] = f"Packing {dataset_name} dataset"
			
 
				-                dataset = dataset.select_columns("input_ids")
			
 
				-                dataset = dataset.map(
			
 
				-                    pack_examples, batched=True, fn_kwargs={"seq_length": args.max_seq_length}, **map_kwargs
			
 
				-                )
			
 
				-            elif args.max_seq_length is not None:
			
 
				-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
			
 
				-                    map_kwargs["desc"] = f"Truncating {dataset_name} dataset"
			
 
				-
			
 
				-                def truncate(example, max_seq_length):
			
 
				-                    return {key: example[key][:max_seq_length] for key in ["input_ids", "attention_mask"]}
			
 
				-
			
 
				-                dataset = dataset.map(
			
 
				-                    truncate,
			
 
				-                    fn_kwargs={"max_seq_length": args.max_seq_length},
			
 
				-                    **map_kwargs,
			
 
				-                )
			
 
				-
			
 
				-            # For Liger kernel, ensure only input_ids is present
			
 
				-            if args.use_liger:
			
 
				-                dataset = dataset.select_columns("input_ids")
			
 
				-
			
 
				         return dataset
			
 
				-
			
 
				+    
			
 
				     def compute_loss(self, model, inputs, return_outputs = False, num_items_in_batch = None):
			
 
				         outputs = super().compute_loss(
			
 
				             model,
			
--- a/src/unsloth_compiled_cache/UnslothXPOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothXPOTrainer.py
@@ -1,3 +1,10 @@
 
				+"""
			
 
				+2025.3.3
			
 
				+2025.3.5
			
 
				+4.49.0
			
 
				+0.15.2
			
 
				+__UNSLOTH_VERSIONING__
			
 
				+"""
			
 
				 from torch import Tensor
			
 
				 import torch
			
 
				 import torch.nn as nn
			
--- a/src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc