Преглед изворни кода

添加示例数据集- gsm8k 小学数学

root пре 9 месеци
родитељ
комит
5e21073349
35 измењених фајлова са 3536 додато и 108 уклоњено
  1. 3331 0
      data/backup/gsm8k_dataset_for_train.jsonl
  2. 8 1
      src/unsloth_compiled_cache/UnslothAlignPropTrainer.py
  3. 7 0
      src/unsloth_compiled_cache/UnslothBCOTrainer.py
  4. 7 0
      src/unsloth_compiled_cache/UnslothCPOTrainer.py
  5. 8 1
      src/unsloth_compiled_cache/UnslothDDPOTrainer.py
  6. 7 0
      src/unsloth_compiled_cache/UnslothDPOTrainer.py
  7. 7 0
      src/unsloth_compiled_cache/UnslothGKDTrainer.py
  8. 17 4
      src/unsloth_compiled_cache/UnslothGRPOTrainer.py
  9. 7 0
      src/unsloth_compiled_cache/UnslothKTOTrainer.py
  10. 7 0
      src/unsloth_compiled_cache/UnslothNashMDTrainer.py
  11. 7 0
      src/unsloth_compiled_cache/UnslothORPOTrainer.py
  12. 7 0
      src/unsloth_compiled_cache/UnslothOnlineDPOTrainer.py
  13. 7 0
      src/unsloth_compiled_cache/UnslothPPOTrainer.py
  14. 7 0
      src/unsloth_compiled_cache/UnslothPRMTrainer.py
  15. 7 0
      src/unsloth_compiled_cache/UnslothRLOOTrainer.py
  16. 7 0
      src/unsloth_compiled_cache/UnslothRewardTrainer.py
  17. 81 102
      src/unsloth_compiled_cache/UnslothSFTTrainer.py
  18. 7 0
      src/unsloth_compiled_cache/UnslothXPOTrainer.py
  19. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
  20. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
  21. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
  22. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
  23. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
  24. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
  25. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
  26. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
  27. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
  28. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
  29. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
  30. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
  31. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
  32. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
  33. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
  34. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
  35. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc

Разлика између датотеке није приказан због своје велике величине
+ 3331 - 0
data/backup/gsm8k_dataset_for_train.jsonl


+ 8 - 1
src/unsloth_compiled_cache/UnslothAlignPropTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn
@@ -120,7 +127,7 @@ class UnslothAlignPropConfig(AlignPropConfig):
     )
     def __init__(
         self,
-        exp_name = 'inference',
+        exp_name = 'train_model_grpo_original',
         run_name = '',
         seed = 3407,
         log_with = None,

+ 7 - 0
src/unsloth_compiled_cache/UnslothBCOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothCPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 8 - 1
src/unsloth_compiled_cache/UnslothDDPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn
@@ -136,7 +143,7 @@ class UnslothDDPOConfig(DDPOConfig):
     )
     def __init__(
         self,
-        exp_name = 'inference',
+        exp_name = 'train_model_grpo_original',
         run_name = '',
         seed = 3407,
         log_with = None,

+ 7 - 0
src/unsloth_compiled_cache/UnslothDPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothGKDTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 17 - 4
src/unsloth_compiled_cache/UnslothGRPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn
@@ -113,7 +120,7 @@ class UnslothEfficientGRPO(torch.autograd.Function):
             fullgraph = True,
             options = torch_compile_options,
         )
-        
+
         grad_inputs_chunks = torch.chunk(grad_inputs,        chunks = n_chunks, dim = 0)
         new_hidden_states  = torch.chunk(_new_hidden_states, chunks = n_chunks, dim = 0)
         old_hidden_states  = torch.chunk(_old_hidden_states, chunks = n_chunks, dim = 0)
@@ -1082,14 +1089,20 @@ class _UnslothGRPOTrainer(Trainer):
                 self, _input_ids, logits_to_keep, completion_mask, advantages,
                 n_chunks = self.args.unsloth_num_chunks,
             )
-        
+
         # Log the metrics
         # completion_length = self.accelerator.gather_for_metrics(completion_mask.sum(1)).float().mean().item()
-        self._metrics["completion_length"].append(completion_length.item())
 
         # mean_kl = ((per_token_kl * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
         # self._metrics["kl"].append(self.accelerator.gather_for_metrics(mean_kl).mean().item())
-        self._metrics["kl"].append(mean_kl.item())
+
+        if "train" in self._metrics:
+            mode = "eval" if self.control.should_evaluate else "train"
+            self._metrics[mode]["completion_length"].append(completion_length.item())
+            self._metrics[mode]["kl"].append(mean_kl.item())
+        else:
+            self._metrics["completion_length"].append(completion_length.item())
+            self._metrics["kl"].append(mean_kl.item())
         return loss
 
     def prediction_step(self, model, inputs, prediction_loss_only, ignore_keys: Optional[list[str]] = None):

+ 7 - 0
src/unsloth_compiled_cache/UnslothKTOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothNashMDTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothORPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothOnlineDPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothPPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothPRMTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothRLOOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothRewardTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 81 - 102
src/unsloth_compiled_cache/UnslothSFTTrainer.py

@@ -1,8 +1,15 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn
 from torch.nn import functional as F
-from trl.trainer.sft_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, ConstantLengthDataset, DataCollator, DataCollatorForLanguageModeling, Dataset, EvalPrediction, FeatureExtractionMixin, IterableDataset, Optional, PartialState, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Type, Union, dataclasses, defaultdict, deprecate_kwarg, generate_model_card, get_comet_experiment_url, get_peft_model, is_conversational, is_liger_kernel_available, is_peft_available, is_wandb_available, maybe_apply_chat_template, maybe_convert_to_chatml, nn, os, pack_examples, peft, peft_module_casting_to_bf16, prepare_model_for_kbit_training, torch, transformers, version, warnings, os)
+from trl.trainer.sft_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, ConstantLengthDataset, DataCollator, DataCollatorForLanguageModeling, Dataset, EvalPrediction, FeatureExtractionMixin, IterableDataset, Optional, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Type, Union, dataclasses, defaultdict, deprecate_kwarg, generate_model_card, get_comet_experiment_url, get_peft_model, is_liger_kernel_available, is_peft_available, is_wandb_available, nn, os, pack_examples, peft, peft_module_casting_to_bf16, prepare_model_for_kbit_training, torch, transformers, version, warnings, Callable, ConstantLengthDataset, DataCollator, Dataset, IterableDataset, Optional, Union, os, pack_examples, transformers, os)
 
 
 import os
@@ -611,117 +618,89 @@ class _UnslothSFTTrainer(Trainer):
     def _prepare_dataset(
         self,
         dataset: Union[Dataset, IterableDataset],
-        processing_class: Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin],
-        args: SFTConfig,
+        processing_class,
+        args,
         packing: bool,
         formatting_func: Optional[Callable[[dict], str]],
         dataset_name: str,
     ) -> Union[Dataset, IterableDataset]:
-        # Convert the dataset to an IterableDataset if it is a ConstantLengthDataset
-        if isinstance(dataset, ConstantLengthDataset):
-            return dataset
-
-        # If the dataset is already preprocessed (tokenized), skip the processing steps.
-        column_names = list(next(iter(dataset)).keys())
-        is_processed = "input_ids" in column_names
-
-        # Build the kwargs for the `map` function
+        # All Unsloth Zoo code licensed under LGPLv3
+        if isinstance(dataset, ConstantLengthDataset): return dataset
+    
         map_kwargs = {}
-        if isinstance(dataset, Dataset):  # IterableDataset does not support num_proc
-            map_kwargs["num_proc"] = args.dataset_num_proc
-
-        with PartialState().local_main_process_first():
-            # Apply the formatting function if any
-            if formatting_func is not None and is_processed:
-                warnings.warn(
-                    "You passed a dataset that is already processed (contains an `input_ids` field) together with a "
-                    "formatting function. Therefore `formatting_func` will be ignored. Either remove the "
-                    "`formatting_func` or pass a dataset that is not already processed.",
-                    UserWarning,
+        use_desc = isinstance(dataset, Dataset)
+    
+        # Get max length
+        max_seq_length = getattr(args, "max_length", 0)
+        if max_seq_length == 0: max_seq_length = getattr(args, "max_seq_length", 0)
+        if max_seq_length == 0: max_seq_length = getattr(self, "max_seq_length", 0)
+        if max_seq_length == 0: max_seq_length = getattr(self, "max_seq", 0)
+        dataset_text_field = getattr(args, "dataset_text_field", "text")
+        do_truncation = max_seq_length != 0
+        do_formatting_func = False
+    
+        # Check if already tokenized so skip
+        from transformers import DataCollatorForSeq2Seq
+        column_names = set(next(iter(dataset)).keys())
+        if "input_ids" in column_names:
+            # Most likely forgot data collator!
+            from transformers import DataCollatorForSeq2Seq
+            self.data_collator = DataCollatorForSeq2Seq(processing_class)
+            return dataset
+        elif dataset_text_field not in column_names:
+            do_formatting_func = True
+            if formatting_func is None:
+                raise RuntimeError("Unsloth: You must specify a `formatting_func`")
+        pass
+    
+        # Check double BOS tokens
+        if do_formatting_func:
+            test_text = formatting_func(dataset[0])
+            if not isinstance(test_text, list):
+                raise ValueError(
+                    "Unsloth: The `formatting_func` should return a list of processed strings."
                 )
-
-            if formatting_func is not None and not is_processed:
-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                    map_kwargs["desc"] = f"Applying formatting function to {dataset_name} dataset"
-
-                batched = isinstance(formatting_func(next(iter(dataset))), list)
-
-                def _func(example):
-                    return {"text": formatting_func(example)}
-
-                dataset = dataset.map(_func, batched=batched, **map_kwargs)
-
-            # If the dataset is prompt-completion, convert it to language modeling type
-            if "prompt" in dataset.column_names and "completion" in dataset.column_names:
-                key = "messages" if is_conversational(dataset[0]) else "text"
-
-                def concat_prompt_completion(example):
-                    return {key: example["prompt"] + example["completion"]}
-
-                dataset = dataset.map(concat_prompt_completion, remove_columns=["prompt", "completion"])
-
-            # Convert the dataset to ChatML if needed
-            if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                map_kwargs["desc"] = f"Converting {dataset_name} dataset to ChatML"
-            dataset = dataset.map(
-                maybe_convert_to_chatml,
-                remove_columns="conversations" if "conversations" in dataset.column_names else None,
-                **map_kwargs,
+            test_text = test_text[0]
+        else:
+            test_text = dataset[0][dataset_text_field]
+        chat_template = getattr(processing_class, 'chat_template', None)
+        chat_template = '' if chat_template is None else chat_template
+        add_special_tokens = True
+    
+        if getattr(processing_class, 'bos_token', None) is not None:
+            if test_text.startswith(processing_class.bos_token) or processing_class.bos_token in chat_template:
+                add_special_tokens = False
+                print("Unsloth: We found double BOS tokens - we shall remove one automatically.")
+        pass
+    
+        # Create tokenize function
+        def _tokenize(example):
+            return processing_class(
+                example[dataset_text_field] if not do_formatting_func else formatting_func(example),
+                truncation = do_truncation,
+                max_length = max_seq_length,
+                return_token_type_ids = False,
+                add_special_tokens = add_special_tokens,
             )
-
-            # Apply the chat template if needed
-            if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                map_kwargs["desc"] = f"Applying chat template to {dataset_name} dataset"
-            dataset = dataset.map(
-                maybe_apply_chat_template,
-                fn_kwargs={"tokenizer": processing_class},
-                remove_columns="messages" if "messages" in dataset.column_names else None,  # renamed to "text"
+        pass
+    
+        map_kwargs["num_proc"] = getattr(args, "dataset_num_proc", 2)
+        if use_desc: map_kwargs["desc"] = f'Tokenizing to ["{dataset_text_field}"]'
+        dataset = dataset.map(_tokenize, batched = True, **map_kwargs)
+    
+        if packing:
+            if max_seq_length == 0:
+                raise ValueError("When packing is enabled, `max_seq_length` can't be `None`.")
+    
+            if use_desc: map_kwargs["desc"] = f"Packing {dataset_name} dataset"
+            dataset = dataset.select_columns("input_ids").map(
+                pack_examples,
+                batched = True,
+                fn_kwargs = {"seq_length": max_seq_length,},
                 **map_kwargs,
             )
-
-            # Tokenize the dataset if needed
-            if not is_processed:
-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                    map_kwargs["desc"] = f"Tokenizing {dataset_name} dataset"
-
-                def tokenize(example, processing_class, dataset_text_field):
-                    return processing_class(example[dataset_text_field])
-
-                dataset = dataset.map(
-                    tokenize,
-                    fn_kwargs={"processing_class": processing_class, "dataset_text_field": args.dataset_text_field},
-                    **map_kwargs,
-                )
-
-            # Pack or truncate
-            if packing:
-                if args.max_seq_length is None:
-                    raise ValueError("When packing is enabled, `max_seq_length` can't be `None`.")
-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                    map_kwargs["desc"] = f"Packing {dataset_name} dataset"
-                dataset = dataset.select_columns("input_ids")
-                dataset = dataset.map(
-                    pack_examples, batched=True, fn_kwargs={"seq_length": args.max_seq_length}, **map_kwargs
-                )
-            elif args.max_seq_length is not None:
-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                    map_kwargs["desc"] = f"Truncating {dataset_name} dataset"
-
-                def truncate(example, max_seq_length):
-                    return {key: example[key][:max_seq_length] for key in ["input_ids", "attention_mask"]}
-
-                dataset = dataset.map(
-                    truncate,
-                    fn_kwargs={"max_seq_length": args.max_seq_length},
-                    **map_kwargs,
-                )
-
-            # For Liger kernel, ensure only input_ids is present
-            if args.use_liger:
-                dataset = dataset.select_columns("input_ids")
-
         return dataset
-
+    
     def compute_loss(self, model, inputs, return_outputs = False, num_items_in_batch = None):
         outputs = super().compute_loss(
             model,

+ 7 - 0
src/unsloth_compiled_cache/UnslothXPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

BIN
src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc


Неке датотеке нису приказане због велике количине промена