Procházet zdrojové kódy

添加示例数据集- gsm8k 小学数学

root před 9 měsíci
rodič
revize
5e21073349
35 změnil soubory, kde provedl 3536 přidání a 108 odebrání
  1. 3331 0
      data/backup/gsm8k_dataset_for_train.jsonl
  2. 8 1
      src/unsloth_compiled_cache/UnslothAlignPropTrainer.py
  3. 7 0
      src/unsloth_compiled_cache/UnslothBCOTrainer.py
  4. 7 0
      src/unsloth_compiled_cache/UnslothCPOTrainer.py
  5. 8 1
      src/unsloth_compiled_cache/UnslothDDPOTrainer.py
  6. 7 0
      src/unsloth_compiled_cache/UnslothDPOTrainer.py
  7. 7 0
      src/unsloth_compiled_cache/UnslothGKDTrainer.py
  8. 17 4
      src/unsloth_compiled_cache/UnslothGRPOTrainer.py
  9. 7 0
      src/unsloth_compiled_cache/UnslothKTOTrainer.py
  10. 7 0
      src/unsloth_compiled_cache/UnslothNashMDTrainer.py
  11. 7 0
      src/unsloth_compiled_cache/UnslothORPOTrainer.py
  12. 7 0
      src/unsloth_compiled_cache/UnslothOnlineDPOTrainer.py
  13. 7 0
      src/unsloth_compiled_cache/UnslothPPOTrainer.py
  14. 7 0
      src/unsloth_compiled_cache/UnslothPRMTrainer.py
  15. 7 0
      src/unsloth_compiled_cache/UnslothRLOOTrainer.py
  16. 7 0
      src/unsloth_compiled_cache/UnslothRewardTrainer.py
  17. 81 102
      src/unsloth_compiled_cache/UnslothSFTTrainer.py
  18. 7 0
      src/unsloth_compiled_cache/UnslothXPOTrainer.py
  19. binární
      src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
  20. binární
      src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
  21. binární
      src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
  22. binární
      src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
  23. binární
      src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
  24. binární
      src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
  25. binární
      src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
  26. binární
      src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
  27. binární
      src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
  28. binární
      src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
  29. binární
      src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
  30. binární
      src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
  31. binární
      src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
  32. binární
      src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
  33. binární
      src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
  34. binární
      src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
  35. binární
      src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc

Rozdílová data souboru nebyla zobrazena, protože soubor je příliš velký
+ 3331 - 0
data/backup/gsm8k_dataset_for_train.jsonl


+ 8 - 1
src/unsloth_compiled_cache/UnslothAlignPropTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn
@@ -120,7 +127,7 @@ class UnslothAlignPropConfig(AlignPropConfig):
     )
     def __init__(
         self,
-        exp_name = 'inference',
+        exp_name = 'train_model_grpo_original',
         run_name = '',
         seed = 3407,
         log_with = None,

+ 7 - 0
src/unsloth_compiled_cache/UnslothBCOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothCPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 8 - 1
src/unsloth_compiled_cache/UnslothDDPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn
@@ -136,7 +143,7 @@ class UnslothDDPOConfig(DDPOConfig):
     )
     def __init__(
         self,
-        exp_name = 'inference',
+        exp_name = 'train_model_grpo_original',
         run_name = '',
         seed = 3407,
         log_with = None,

+ 7 - 0
src/unsloth_compiled_cache/UnslothDPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothGKDTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 17 - 4
src/unsloth_compiled_cache/UnslothGRPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn
@@ -113,7 +120,7 @@ class UnslothEfficientGRPO(torch.autograd.Function):
             fullgraph = True,
             options = torch_compile_options,
         )
-        
+
         grad_inputs_chunks = torch.chunk(grad_inputs,        chunks = n_chunks, dim = 0)
         new_hidden_states  = torch.chunk(_new_hidden_states, chunks = n_chunks, dim = 0)
         old_hidden_states  = torch.chunk(_old_hidden_states, chunks = n_chunks, dim = 0)
@@ -1082,14 +1089,20 @@ class _UnslothGRPOTrainer(Trainer):
                 self, _input_ids, logits_to_keep, completion_mask, advantages,
                 n_chunks = self.args.unsloth_num_chunks,
             )
-        
+
         # Log the metrics
         # completion_length = self.accelerator.gather_for_metrics(completion_mask.sum(1)).float().mean().item()
-        self._metrics["completion_length"].append(completion_length.item())
 
         # mean_kl = ((per_token_kl * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
         # self._metrics["kl"].append(self.accelerator.gather_for_metrics(mean_kl).mean().item())
-        self._metrics["kl"].append(mean_kl.item())
+
+        if "train" in self._metrics:
+            mode = "eval" if self.control.should_evaluate else "train"
+            self._metrics[mode]["completion_length"].append(completion_length.item())
+            self._metrics[mode]["kl"].append(mean_kl.item())
+        else:
+            self._metrics["completion_length"].append(completion_length.item())
+            self._metrics["kl"].append(mean_kl.item())
         return loss
 
     def prediction_step(self, model, inputs, prediction_loss_only, ignore_keys: Optional[list[str]] = None):

+ 7 - 0
src/unsloth_compiled_cache/UnslothKTOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothNashMDTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothORPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothOnlineDPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothPPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothPRMTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothRLOOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 7 - 0
src/unsloth_compiled_cache/UnslothRewardTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

+ 81 - 102
src/unsloth_compiled_cache/UnslothSFTTrainer.py

@@ -1,8 +1,15 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn
 from torch.nn import functional as F
-from trl.trainer.sft_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, ConstantLengthDataset, DataCollator, DataCollatorForLanguageModeling, Dataset, EvalPrediction, FeatureExtractionMixin, IterableDataset, Optional, PartialState, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Type, Union, dataclasses, defaultdict, deprecate_kwarg, generate_model_card, get_comet_experiment_url, get_peft_model, is_conversational, is_liger_kernel_available, is_peft_available, is_wandb_available, maybe_apply_chat_template, maybe_convert_to_chatml, nn, os, pack_examples, peft, peft_module_casting_to_bf16, prepare_model_for_kbit_training, torch, transformers, version, warnings, os)
+from trl.trainer.sft_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, ConstantLengthDataset, DataCollator, DataCollatorForLanguageModeling, Dataset, EvalPrediction, FeatureExtractionMixin, IterableDataset, Optional, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Type, Union, dataclasses, defaultdict, deprecate_kwarg, generate_model_card, get_comet_experiment_url, get_peft_model, is_liger_kernel_available, is_peft_available, is_wandb_available, nn, os, pack_examples, peft, peft_module_casting_to_bf16, prepare_model_for_kbit_training, torch, transformers, version, warnings, Callable, ConstantLengthDataset, DataCollator, Dataset, IterableDataset, Optional, Union, os, pack_examples, transformers, os)
 
 
 import os
@@ -611,117 +618,89 @@ class _UnslothSFTTrainer(Trainer):
     def _prepare_dataset(
         self,
         dataset: Union[Dataset, IterableDataset],
-        processing_class: Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin],
-        args: SFTConfig,
+        processing_class,
+        args,
         packing: bool,
         formatting_func: Optional[Callable[[dict], str]],
         dataset_name: str,
     ) -> Union[Dataset, IterableDataset]:
-        # Convert the dataset to an IterableDataset if it is a ConstantLengthDataset
-        if isinstance(dataset, ConstantLengthDataset):
-            return dataset
-
-        # If the dataset is already preprocessed (tokenized), skip the processing steps.
-        column_names = list(next(iter(dataset)).keys())
-        is_processed = "input_ids" in column_names
-
-        # Build the kwargs for the `map` function
+        # All Unsloth Zoo code licensed under LGPLv3
+        if isinstance(dataset, ConstantLengthDataset): return dataset
+    
         map_kwargs = {}
-        if isinstance(dataset, Dataset):  # IterableDataset does not support num_proc
-            map_kwargs["num_proc"] = args.dataset_num_proc
-
-        with PartialState().local_main_process_first():
-            # Apply the formatting function if any
-            if formatting_func is not None and is_processed:
-                warnings.warn(
-                    "You passed a dataset that is already processed (contains an `input_ids` field) together with a "
-                    "formatting function. Therefore `formatting_func` will be ignored. Either remove the "
-                    "`formatting_func` or pass a dataset that is not already processed.",
-                    UserWarning,
+        use_desc = isinstance(dataset, Dataset)
+    
+        # Get max length
+        max_seq_length = getattr(args, "max_length", 0)
+        if max_seq_length == 0: max_seq_length = getattr(args, "max_seq_length", 0)
+        if max_seq_length == 0: max_seq_length = getattr(self, "max_seq_length", 0)
+        if max_seq_length == 0: max_seq_length = getattr(self, "max_seq", 0)
+        dataset_text_field = getattr(args, "dataset_text_field", "text")
+        do_truncation = max_seq_length != 0
+        do_formatting_func = False
+    
+        # Check if already tokenized so skip
+        from transformers import DataCollatorForSeq2Seq
+        column_names = set(next(iter(dataset)).keys())
+        if "input_ids" in column_names:
+            # Most likely forgot data collator!
+            from transformers import DataCollatorForSeq2Seq
+            self.data_collator = DataCollatorForSeq2Seq(processing_class)
+            return dataset
+        elif dataset_text_field not in column_names:
+            do_formatting_func = True
+            if formatting_func is None:
+                raise RuntimeError("Unsloth: You must specify a `formatting_func`")
+        pass
+    
+        # Check double BOS tokens
+        if do_formatting_func:
+            test_text = formatting_func(dataset[0])
+            if not isinstance(test_text, list):
+                raise ValueError(
+                    "Unsloth: The `formatting_func` should return a list of processed strings."
                 )
-
-            if formatting_func is not None and not is_processed:
-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                    map_kwargs["desc"] = f"Applying formatting function to {dataset_name} dataset"
-
-                batched = isinstance(formatting_func(next(iter(dataset))), list)
-
-                def _func(example):
-                    return {"text": formatting_func(example)}
-
-                dataset = dataset.map(_func, batched=batched, **map_kwargs)
-
-            # If the dataset is prompt-completion, convert it to language modeling type
-            if "prompt" in dataset.column_names and "completion" in dataset.column_names:
-                key = "messages" if is_conversational(dataset[0]) else "text"
-
-                def concat_prompt_completion(example):
-                    return {key: example["prompt"] + example["completion"]}
-
-                dataset = dataset.map(concat_prompt_completion, remove_columns=["prompt", "completion"])
-
-            # Convert the dataset to ChatML if needed
-            if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                map_kwargs["desc"] = f"Converting {dataset_name} dataset to ChatML"
-            dataset = dataset.map(
-                maybe_convert_to_chatml,
-                remove_columns="conversations" if "conversations" in dataset.column_names else None,
-                **map_kwargs,
+            test_text = test_text[0]
+        else:
+            test_text = dataset[0][dataset_text_field]
+        chat_template = getattr(processing_class, 'chat_template', None)
+        chat_template = '' if chat_template is None else chat_template
+        add_special_tokens = True
+    
+        if getattr(processing_class, 'bos_token', None) is not None:
+            if test_text.startswith(processing_class.bos_token) or processing_class.bos_token in chat_template:
+                add_special_tokens = False
+                print("Unsloth: We found double BOS tokens - we shall remove one automatically.")
+        pass
+    
+        # Create tokenize function
+        def _tokenize(example):
+            return processing_class(
+                example[dataset_text_field] if not do_formatting_func else formatting_func(example),
+                truncation = do_truncation,
+                max_length = max_seq_length,
+                return_token_type_ids = False,
+                add_special_tokens = add_special_tokens,
             )
-
-            # Apply the chat template if needed
-            if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                map_kwargs["desc"] = f"Applying chat template to {dataset_name} dataset"
-            dataset = dataset.map(
-                maybe_apply_chat_template,
-                fn_kwargs={"tokenizer": processing_class},
-                remove_columns="messages" if "messages" in dataset.column_names else None,  # renamed to "text"
+        pass
+    
+        map_kwargs["num_proc"] = getattr(args, "dataset_num_proc", 2)
+        if use_desc: map_kwargs["desc"] = f'Tokenizing to ["{dataset_text_field}"]'
+        dataset = dataset.map(_tokenize, batched = True, **map_kwargs)
+    
+        if packing:
+            if max_seq_length == 0:
+                raise ValueError("When packing is enabled, `max_seq_length` can't be `None`.")
+    
+            if use_desc: map_kwargs["desc"] = f"Packing {dataset_name} dataset"
+            dataset = dataset.select_columns("input_ids").map(
+                pack_examples,
+                batched = True,
+                fn_kwargs = {"seq_length": max_seq_length,},
                 **map_kwargs,
             )
-
-            # Tokenize the dataset if needed
-            if not is_processed:
-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                    map_kwargs["desc"] = f"Tokenizing {dataset_name} dataset"
-
-                def tokenize(example, processing_class, dataset_text_field):
-                    return processing_class(example[dataset_text_field])
-
-                dataset = dataset.map(
-                    tokenize,
-                    fn_kwargs={"processing_class": processing_class, "dataset_text_field": args.dataset_text_field},
-                    **map_kwargs,
-                )
-
-            # Pack or truncate
-            if packing:
-                if args.max_seq_length is None:
-                    raise ValueError("When packing is enabled, `max_seq_length` can't be `None`.")
-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                    map_kwargs["desc"] = f"Packing {dataset_name} dataset"
-                dataset = dataset.select_columns("input_ids")
-                dataset = dataset.map(
-                    pack_examples, batched=True, fn_kwargs={"seq_length": args.max_seq_length}, **map_kwargs
-                )
-            elif args.max_seq_length is not None:
-                if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
-                    map_kwargs["desc"] = f"Truncating {dataset_name} dataset"
-
-                def truncate(example, max_seq_length):
-                    return {key: example[key][:max_seq_length] for key in ["input_ids", "attention_mask"]}
-
-                dataset = dataset.map(
-                    truncate,
-                    fn_kwargs={"max_seq_length": args.max_seq_length},
-                    **map_kwargs,
-                )
-
-            # For Liger kernel, ensure only input_ids is present
-            if args.use_liger:
-                dataset = dataset.select_columns("input_ids")
-
         return dataset
-
+    
     def compute_loss(self, model, inputs, return_outputs = False, num_items_in_batch = None):
         outputs = super().compute_loss(
             model,

+ 7 - 0
src/unsloth_compiled_cache/UnslothXPOTrainer.py

@@ -1,3 +1,10 @@
+"""
+2025.3.3
+2025.3.5
+4.49.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
 from torch import Tensor
 import torch
 import torch.nn as nn

binární
src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc


binární
src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc


Některé soubory nejsou zobrazeny, neboť je v těchto rozdílových datech změněno mnoho souborů