Forráskód Böngészése

修改train_model_grpo.py文件-开启vLLM 观察能否解决损失率值0并且无变化问题

zhouyang.xie 4 hónapja
szülő
commit
9fa7a6263a
18 módosított fájl, 7 hozzáadás és 4 törlés
  1. 7 4
      src/inference.py
  2. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
  3. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
  4. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
  5. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
  6. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
  7. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
  8. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
  9. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
  10. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
  11. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
  12. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
  13. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
  14. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
  15. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
  16. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
  17. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
  18. BIN
      src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc

+ 7 - 4
src/inference.py

@@ -10,15 +10,18 @@ class ModelInference:
         self.load_in_4bit = load_in_4bit
         self.model = None
         self.tokenizer = None
+        self.lora_rank=64
 
     def load_model(self):
         # 加载训练好的模型和分词器
         self.model, self.tokenizer = FastLanguageModel.from_pretrained(
             model_name=self.model_path,
             max_seq_length=self.max_seq_length,
+            load_in_4bit=self.load_in_4bit, # 值为True 以 4 bit量化进行微调,为False LoRA 16bit。这将内存使用量减少了 4 倍,使我们能够在免费的 16GB 内存 GPU 中实际进行微调。4 位量化本质上将权重转换为一组有限的数字以减少内存使用量。这样做的缺点是准确度会下降 1-2%。如果您想要这种微小的额外准确度,请在较大的 GPU(如 H100)上将其设置为 False。
             dtype=self.dtype,
-            # fast_inference = True,  # # 启用快速推理优化。须依赖vllm包
-            load_in_4bit=self.load_in_4bit,
+            fast_inference = False, # Enable vLLM fast inference
+            max_lora_rank = self.lora_rank,
+            gpu_memory_utilization=0.6, # 0.6 # Reduce if out of memory
         )
         
         # 将模型设置为推理模式
@@ -38,10 +41,10 @@ class ModelInference:
             # 将用户输入编码为模型输入
             inputs = self.tokenizer(user_input, return_tensors="pt", max_length=self.max_seq_length, truncation=True)
             inputs = inputs.to("cuda")  # 将输入数据移动到GPU
-
+            
             # 生成模型的回复
             with torch.no_grad():
-                outputs = self.model.fast_generate(**inputs, max_length=self.max_seq_length, pad_token_id=self.tokenizer.eos_token_id)
+                outputs = self.model.generate(**inputs, max_length=self.max_seq_length, pad_token_id=self.tokenizer.eos_token_id)
             
             # 解码模型的输出
             model_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

BIN
src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc


BIN
src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc