Bladeren bron

修改train_model_grpo.py文件-开启vLLM 观察能否解决损失率值0并且无变化问题

zhouyang.xie 4 maanden geleden
bovenliggende
commit
b4c422b4be
20 gewijzigde bestanden met toevoegingen van 56 en 54 verwijderingen
  1. 1 1
      src/inference.py
  2. 19 19
      src/train_model_grpo.py
  3. 36 34
      src/train_model_grpo_v2.py
  4. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
  5. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
  6. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
  7. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
  8. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
  9. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
  10. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
  11. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
  12. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
  13. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
  14. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
  15. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
  16. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
  17. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
  18. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
  19. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
  20. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc

+ 1 - 1
src/inference.py

@@ -41,7 +41,7 @@ class ModelInference:
 
             # 生成模型的回复
             with torch.no_grad():
-                outputs = self.model.generate(**inputs, max_length=self.max_seq_length, pad_token_id=self.tokenizer.eos_token_id)
+                outputs = self.model.fast_generate(**inputs, max_length=self.max_seq_length, pad_token_id=self.tokenizer.eos_token_id)
             
             # 解码模型的输出
             model_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

+ 19 - 19
src/train_model_grpo.py

@@ -126,13 +126,13 @@ class ModelTrainer:
             gpu_memory_utilization=0.6, # 0.6 # Reduce if out of memory
         )
 
-        # # 将模型移动到设备上
-        # model = model.to_empty(device='cuda')  # 使用 to_empty 而不是 to
+        # 将模型移动到设备上
+        model = model.to_empty(device='cuda')  # 使用 to_empty 而不是 to
 
-        # # 初始化模型的权重
-        # for param in model.parameters():
-        #     if param.is_meta:
-        #         param.data = torch.randn_like(param)  # 随机初始化
+        # 初始化模型的权重
+        for param in model.parameters():
+            if param.is_meta:
+                param.data = torch.randn_like(param)  # 随机初始化
 
         # 添加 LoRA 适配器
         model = FastLanguageModel.get_peft_model(
@@ -167,13 +167,13 @@ class ModelTrainer:
         print(f"Reserved memory: {torch.cuda.memory_reserved()}")
         print(f"Allocated memory: {torch.cuda.memory_allocated()}")
 
-        # # 启用 pin_memory  2025年3月10日未能验证通过
-        # train_loader = torch.utils.data.DataLoader(
-        #     train_dataset, batch_size=1, shuffle=True, pin_memory=True  
-        # )
+        # 启用 pin_memory  2025年3月10日未能验证通过
+        train_loader = torch.utils.data.DataLoader(
+            train_dataset, batch_size=1, shuffle=True, pin_memory=True  
+        )
         
-        # # 释放未使用的显存
-        # torch.cuda.empty_cache()
+        # 释放未使用的显存
+        torch.cuda.empty_cache()
 
         training_args = GRPOConfig(
             use_vllm = False, # use vLLM for fast inference!
@@ -242,10 +242,10 @@ if __name__ == "__main__":
     try:
         # 设置环境变量
         # 单机多卡
-        # os.environ['RANK'] = '0' # 第一张卡的 rank
-        # os.environ['WORLD_SIZE'] = '1'  # 总共有 1 张卡
-        # os.environ['MASTER_ADDR'] = 'localhost'
-        # os.environ['MASTER_PORT'] = '12345'
+        os.environ['RANK'] = '0' # 第一张卡的 rank
+        os.environ['WORLD_SIZE'] = '1'  # 总共有 1 张卡
+        os.environ['MASTER_ADDR'] = 'localhost'
+        os.environ['MASTER_PORT'] = '12345'
         # 多机多卡
         # export RANK=0  # 第一台机器的 rank
         # export WORLD_SIZE=4  # 总共有 4 台机器
@@ -270,7 +270,7 @@ if __name__ == "__main__":
         save_path = os.path.join('..', 'models', 'trained', 'DeepSeek-R1-Distill-Qwen-1.5B-GRPO')
         trainer.save_model(model, tokenizer, save_path)
     finally:
-        # # 确保进程组被销毁
-        # if dist.is_initialized():
-        #     dist.destroy_process_group()
+        # 确保进程组被销毁
+        if dist.is_initialized():
+            dist.destroy_process_group()
         print("train finally")

+ 36 - 34
src/train_model_grpo_v2.py

@@ -114,31 +114,31 @@ class ModelTrainer:
         self.load_in_4bit = load_in_4bit         # load_in_4bit: 是否以4位精度加载模型,用于节省显存
         self.lora_rank=lora_rank  #Larger rank = smarter, but slower
 
-    def load_model(self):
+    def load_model(self,lora_rank=64):
         # 加载预训练模型和分词器
         model, tokenizer = FastLanguageModel.from_pretrained(
             model_name=self.model_name,
             max_seq_length=self.max_seq_length,
             load_in_4bit=self.load_in_4bit, # 值为True 以 4 bit量化进行微调,为False LoRA 16bit。这将内存使用量减少了 4 倍,使我们能够在免费的 16GB 内存 GPU 中实际进行微调。4 位量化本质上将权重转换为一组有限的数字以减少内存使用量。这样做的缺点是准确度会下降 1-2%。如果您想要这种微小的额外准确度,请在较大的 GPU(如 H100)上将其设置为 False。
             dtype=self.dtype,
-            fast_inference = True, # Enable vLLM fast inference
+            fast_inference = False, # Enable vLLM fast inference
             max_lora_rank = lora_rank,
-            gpu_memory_utilization=0.1, # 0.6 # Reduce if out of memory
+            gpu_memory_utilization=0.6, # 0.6 # Reduce if out of memory
         )
 
-        # 将模型移动到设备上
-        model = model.to_empty(device='cuda')  # 使用 to_empty 而不是 to
+        # # 将模型移动到设备上
+        # model = model.to_empty(device='cuda')  # 使用 to_empty 而不是 to
 
-        # 初始化模型的权重
-        for param in model.parameters():
-            if param.is_meta:
-                param.data = torch.randn_like(param)  # 随机初始化
+        # # 初始化模型的权重
+        # for param in model.parameters():
+        #     if param.is_meta:
+        #         param.data = torch.randn_like(param)  # 随机初始化
 
         # 添加 LoRA 适配器
         model = FastLanguageModel.get_peft_model(
             model,
             max_seq_length=self.max_seq_length,  # 最大上下文(序列)长度
-            r=8, # 16 # LoRA 的秩,控制适配器的复杂度
+            r=lora_rank,   # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
             target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                           "gate_proj", "up_proj", "down_proj"],  # 应用 LoRA 的目标模块
             lora_alpha=16,  # LoRA 的 alpha 参数,控制适配器的缩放
@@ -154,7 +154,8 @@ class ModelTrainer:
 
     def load_data(self, train_data_path):
         # 加载训练集和测试集
-        train_dataset = load_dataset("json", data_files={"train": train_data_path}, split="train")
+        with open(train_data_path, 'r') as f:
+            train_dataset = load_dataset("json", data_files={"train": train_data_path}, split="train")
 
         # train_data_path: 训练数据路径,格式为 JSONL
         return train_dataset
@@ -166,16 +167,16 @@ class ModelTrainer:
         print(f"Reserved memory: {torch.cuda.memory_reserved()}")
         print(f"Allocated memory: {torch.cuda.memory_allocated()}")
 
-        # 启用 pin_memory  2025年3月7日未能验证通过
-        train_loader = torch.utils.data.DataLoader(
-            train_dataset, batch_size=1, shuffle=True, pin_memory=True  
-        )
+        # # 启用 pin_memory  2025年3月10日未能验证通过
+        # train_loader = torch.utils.data.DataLoader(
+        #     train_dataset, batch_size=1, shuffle=True, pin_memory=True  
+        # )
         
-        # 释放未使用的显存
-        torch.cuda.empty_cache()
+        # # 释放未使用的显存
+        # torch.cuda.empty_cache()
 
         training_args = GRPOConfig(
-            use_vllm = True, # use vLLM for fast inference!
+            use_vllm = False, # use vLLM for fast inference!
             learning_rate = 5e-6,
             adam_beta1 = 0.9,
             adam_beta2 = 0.99,
@@ -188,12 +189,12 @@ class ModelTrainer:
             fp16 = not is_bfloat16_supported(),
             per_device_train_batch_size = 1,
             gradient_accumulation_steps = 1, # Increase to 4 for smoother training
-            num_generations = 128, # 256 # 每次生成  输出个数,值范围: 1 - 256
-            max_prompt_length = 128, # 256 # 输入提示的最大长度
-            max_completion_length = 128,# 200 # 生成内容的最大长度
+            num_generations = 8, # 8 # 每次生成 输出 个数
+            max_prompt_length = 256, # 256 # 输入提示的最大长度
+            max_completion_length = 200,# 200 # 生成内容的最大长度
             num_train_epochs = 1, # Set to 1 for a full training run
-            max_steps = 10,  # 250
-            save_steps = 10, # 250
+            max_steps = 250,  # 250
+            save_steps = 250, # 250
             max_grad_norm = 0.1,
             report_to = "none", # Can use Weights & Biases
             output_dir = os.path.join('..', 'models',"outputs"),
@@ -229,10 +230,10 @@ if __name__ == "__main__":
     # 配置参数
     model_name = os.path.join('..', 'models', 'pretrained', 'DeepSeek-R1-Distill-Qwen-1.5B')
     # model_name: 预训练模型的路径
-    max_seq_length = 512  # 单次会话(single session) 的最大 token 长度,一个token大约3-4 字节(Byte)
+    max_seq_length = 6144  # 单次会话(single session) 的最大 token 长度,一个token大约3-4 字节(Byte)
     dtype = torch.float16  # 数据类型
     load_in_4bit = True  # 是否以4位精度加载模型
-    lora_rank=16
+    lora_rank=64
 
     # 定义训练集和测试集路径
     train_data_path = os.path.join('..', 'data', 'processed', 'train.jsonl')
@@ -241,10 +242,10 @@ if __name__ == "__main__":
     try:
         # 设置环境变量
         # 单机多卡
-        os.environ['RANK'] = '0' # 第一张卡的 rank
-        os.environ['WORLD_SIZE'] = '1'  # 总共有 1 张卡
-        os.environ['MASTER_ADDR'] = 'localhost'
-        os.environ['MASTER_PORT'] = '12345'
+        # os.environ['RANK'] = '0' # 第一张卡的 rank
+        # os.environ['WORLD_SIZE'] = '1'  # 总共有 1 张卡
+        # os.environ['MASTER_ADDR'] = 'localhost'
+        # os.environ['MASTER_PORT'] = '12345'
         # 多机多卡
         # export RANK=0  # 第一台机器的 rank
         # export WORLD_SIZE=4  # 总共有 4 台机器
@@ -252,12 +253,12 @@ if __name__ == "__main__":
         # export MASTER_PORT=12345
 
         # 初始化进程组
-        dist.init_process_group(backend='nccl', init_method='env://')
+        # dist.init_process_group(backend='nccl', init_method='env://')
         # 初始化 ModelTrainer
         trainer = ModelTrainer(model_name, max_seq_length, dtype, load_in_4bit,lora_rank)
         
         # 加载模型和分词器
-        model, tokenizer = trainer.load_model()
+        model, tokenizer = trainer.load_model(lora_rank)
 
         # 加载数据集
         train_dataset = trainer.load_data(train_data_path)
@@ -269,6 +270,7 @@ if __name__ == "__main__":
         save_path = os.path.join('..', 'models', 'trained', 'DeepSeek-R1-Distill-Qwen-1.5B-GRPO')
         trainer.save_model(model, tokenizer, save_path)
     finally:
-        # 确保进程组被销毁
-        if dist.is_initialized():
-            dist.destroy_process_group()
+        # # 确保进程组被销毁
+        # if dist.is_initialized():
+        #     dist.destroy_process_group()
+        print("train finally")

BIN
src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc