소스 검색

遵循面向对象思想重构train_model_grpo.py

zhouyang.xie 2 달 전
부모
커밋
2275fcf164
22개의 변경된 파일23개의 추가작업 그리고 12개의 파일을 삭제
  1. 2 0
      conf/conf_train.yaml
  2. 6 4
      src/conf_train.py
  3. 13 6
      src/train_model_grpo_v1.py
  4. 1 1
      src/unsloth_compiled_cache/UnslothAlignPropTrainer.py
  5. 1 1
      src/unsloth_compiled_cache/UnslothDDPOTrainer.py
  6. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
  7. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
  8. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
  9. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
  10. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
  11. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
  12. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
  13. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
  14. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
  15. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
  16. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
  17. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
  18. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
  19. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
  20. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
  21. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
  22. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc

+ 2 - 0
conf/conf_train.yaml

@@ -3,10 +3,12 @@ model_name: "../models/pretrained/DeepSeek-R1-Distill-Qwen-1.5B"
 max_seq_length: 6144  # 单次会话的最大 token 长度
 dtype: "float16"  # 数据类型,可选 "float16" 或 "bfloat16"
 load_in_4bit: True  # 是否以4位精度加载模型
+fast_inference: False # Enable vLLM fast inference
 lora_rank: 64  # LoRA 的 rank 值
 gpu_memory_utilization: 0.6 # GPU VRAM 占用率
 
 # 训练配置
+use_vllm: False # use vLLM for fast inference!
 learning_rate: 5e-6  # 学习率
 adam_beta1: 0.9  # Adam 优化器的 beta1 参数
 adam_beta2: 0.99  # Adam 优化器的 beta2 参数

+ 6 - 4
src/conf_train.py

@@ -10,9 +10,11 @@ class Config:
     max_seq_length: int
     dtype: str
     load_in_4bit: bool
+    fast_inference: bool # Enable vLLM fast inference
     lora_rank: int
     gpu_memory_utilization: float
     
+    use_vllm:bool
     learning_rate: float
     adam_beta1: float
     adam_beta2: float
@@ -36,15 +38,15 @@ class Config:
     train_data_path: str
     save_path: str
 
-def load_config(config_path: str) -> Config:
+def load_config(config_path: str=f"../conf/conf_train.yaml") -> Config:
     """
     加载配置文件。
     :param config_path: 配置文件路径
     :return: 返回配置对象
     """
-    with open(config_path, 'r') as f:
+    with open(config_path, 'r', encoding='utf-8') as f:
         config_dict = yaml.safe_load(f)
     return Config(**config_dict)
 
-# 加载配置文件
-config = load_config(config_path=f"../conf/conf_train.yaml")
+# # 加载配置文件
+# config = load_config(config_path=f"../conf/conf_train.yaml")

+ 13 - 6
src/train_model_grpo_v1.py

@@ -14,11 +14,12 @@ class ModelTrainer:
         初始化 ModelTrainer 类,加载配置参数。
         :param config: 配置对象,包含模型训练所需的参数
         """
-        self.config = config
+        self.config:Config = config
         self.model_name = config.model_name
         self.max_seq_length = config.max_seq_length
         self.dtype = torch.float16 if config.dtype == "float16" else torch.bfloat16
         self.load_in_4bit = config.load_in_4bit
+        self.fast_inference=config.fast_inference
         self.lora_rank = config.lora_rank
         self.gpu_memory_utilization=config.gpu_memory_utilization
 
@@ -32,9 +33,9 @@ class ModelTrainer:
             max_seq_length=self.max_seq_length,
             load_in_4bit=self.load_in_4bit,
             dtype=self.dtype,
-            fast_inference=False,
+            fast_inference=self.fast_inference,
             max_lora_rank=self.lora_rank,
-            gpu_memory_utilization=0.6,
+            gpu_memory_utilization=config.gpu_memory_utilization,
         )
 
         model = model.to_empty(device='cuda')
@@ -91,7 +92,7 @@ class ModelTrainer:
         torch.cuda.empty_cache()
 
         training_args = GRPOConfig(
-            use_vllm=False,
+            use_vllm=self.config.use_vllm,
             learning_rate=self.config.learning_rate,
             adam_beta1=self.config.adam_beta1,
             adam_beta2=self.config.adam_beta2,
@@ -233,8 +234,14 @@ if __name__ == "__main__":
     os.environ['MASTER_PORT'] = '12345'
     
 
-    # 初始化进程组
-    dist.init_process_group(backend='nccl', init_method='env://')
+    # 根据操作系统选择后端
+    backend = 'gloo' if os.name == 'nt' else 'nccl'
+
+    # 使用文件初始化方法
+    init_method = 'file:///tmp/shared_file'  # 文件路径需要所有进程都能访问
+    dist.init_process_group(backend=backend, init_method=init_method)
+
+    print(f"Initialized distributed training with backend: {backend}")
 
     # 初始化 ModelTrainer
     trainer = ModelTrainer(config)

+ 1 - 1
src/unsloth_compiled_cache/UnslothAlignPropTrainer.py

@@ -120,7 +120,7 @@ class UnslothAlignPropConfig(AlignPropConfig):
     )
     def __init__(
         self,
-        exp_name = 'inference',
+        exp_name = 'train_model_grpo_v1',
         run_name = '',
         seed = 3407,
         log_with = None,

+ 1 - 1
src/unsloth_compiled_cache/UnslothDDPOTrainer.py

@@ -136,7 +136,7 @@ class UnslothDDPOConfig(DDPOConfig):
     )
     def __init__(
         self,
-        exp_name = 'inference',
+        exp_name = 'train_model_grpo_v1',
         run_name = '',
         seed = 3407,
         log_with = None,

BIN
src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc