Browse Source

修改train_model_grpo.py文件-开启vLLM 观察能否解决损失率值0并且无变化问题

zhouyang.xie 3 tháng trước cách đây
mục cha
commit
541bec600c
1 tập tin đã thay đổi với 5 bổ sung5 xóa
  1. 5 5
      src/train_model_grpo.py

+ 5 - 5
src/train_model_grpo.py

@@ -114,7 +114,7 @@ class ModelTrainer:
         self.load_in_4bit = load_in_4bit         # load_in_4bit: 是否以4位精度加载模型,用于节省显存
         self.lora_rank=lora_rank  #Larger rank = smarter, but slower
 
-    def load_model(self):
+    def load_model(self,lora_rank=64):
         # 加载预训练模型和分词器
         model, tokenizer = FastLanguageModel.from_pretrained(
             model_name=self.model_name,
@@ -138,7 +138,7 @@ class ModelTrainer:
         model = FastLanguageModel.get_peft_model(
             model,
             max_seq_length=self.max_seq_length,  # 最大上下文(序列)长度
-            r=16,  # LoRA 的秩,控制适配器的复杂度
+            r=lora_rank,   # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
             target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                           "gate_proj", "up_proj", "down_proj"],  # 应用 LoRA 的目标模块
             lora_alpha=16,  # LoRA 的 alpha 参数,控制适配器的缩放
@@ -189,7 +189,7 @@ class ModelTrainer:
             fp16 = not is_bfloat16_supported(),
             per_device_train_batch_size = 1,
             gradient_accumulation_steps = 1, # Increase to 4 for smoother training
-            num_generations = 256, # 256 # 每次生成 4 个输出
+            num_generations = 8, # 8 # 每次生成 输出 个数
             max_prompt_length = 256, # 256 # 输入提示的最大长度
             max_completion_length = 200,# 200 # 生成内容的最大长度
             num_train_epochs = 1, # Set to 1 for a full training run
@@ -233,7 +233,7 @@ if __name__ == "__main__":
     max_seq_length = 6144  # 单次会话(single session) 的最大 token 长度,一个token大约3-4 字节(Byte)
     dtype = torch.float16  # 数据类型
     load_in_4bit = True  # 是否以4位精度加载模型
-    lora_rank=16
+    lora_rank=64
 
     # 定义训练集和测试集路径
     train_data_path = os.path.join('..', 'data', 'processed', 'train.jsonl')
@@ -258,7 +258,7 @@ if __name__ == "__main__":
         trainer = ModelTrainer(model_name, max_seq_length, dtype, load_in_4bit,lora_rank)
         
         # 加载模型和分词器
-        model, tokenizer = trainer.load_model()
+        model, tokenizer = trainer.load_model(lora_rank)
 
         # 加载数据集
         train_dataset = trainer.load_data(train_data_path)