4 maanden geleden · b4c422b4be
--- a/src/inference.py
+++ b/src/inference.py
@@ -41,7 +41,7 @@ class ModelInference:
 
				 
			
 
				             # 生成模型的回复
			
 
				             with torch.no_grad():
			
 
				-                outputs = self.model.generate(**inputs, max_length=self.max_seq_length, pad_token_id=self.tokenizer.eos_token_id)
			
 
				+                outputs = self.model.fast_generate(**inputs, max_length=self.max_seq_length, pad_token_id=self.tokenizer.eos_token_id)
			
 
				             
			
 
				             # 解码模型的输出
			
 
				             model_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
			
--- a/src/train_model_grpo.py
+++ b/src/train_model_grpo.py
@@ -126,13 +126,13 @@ class ModelTrainer:
 
				             gpu_memory_utilization=0.6, # 0.6 # Reduce if out of memory
			
 
				         )
			
 
				 
			
 
				-        # # 将模型移动到设备上
			
 
				-        # model = model.to_empty(device='cuda')  # 使用 to_empty 而不是 to
			
 
				+        # 将模型移动到设备上
			
 
				+        model = model.to_empty(device='cuda')  # 使用 to_empty 而不是 to
			
 
				 
			
 
				-        # # 初始化模型的权重
			
 
				-        # for param in model.parameters():
			
 
				-        #     if param.is_meta:
			
 
				-        #         param.data = torch.randn_like(param)  # 随机初始化
			
 
				+        # 初始化模型的权重
			
 
				+        for param in model.parameters():
			
 
				+            if param.is_meta:
			
 
				+                param.data = torch.randn_like(param)  # 随机初始化
			
 
				 
			
 
				         # 添加 LoRA 适配器
			
 
				         model = FastLanguageModel.get_peft_model(
			
@@ -167,13 +167,13 @@ class ModelTrainer:
 
				         print(f"Reserved memory: {torch.cuda.memory_reserved()}")
			
 
				         print(f"Allocated memory: {torch.cuda.memory_allocated()}")
			
 
				 
			
 
				-        # # 启用 pin_memory  2025年3月10日未能验证通过
			
 
				-        # train_loader = torch.utils.data.DataLoader(
			
 
				-        #     train_dataset, batch_size=1, shuffle=True, pin_memory=True  
			
 
				-        # )
			
 
				+        # 启用 pin_memory  2025年3月10日未能验证通过
			
 
				+        train_loader = torch.utils.data.DataLoader(
			
 
				+            train_dataset, batch_size=1, shuffle=True, pin_memory=True  
			
 
				+        )
			
 
				         
			
 
				-        # # 释放未使用的显存
			
 
				-        # torch.cuda.empty_cache()
			
 
				+        # 释放未使用的显存
			
 
				+        torch.cuda.empty_cache()
			
 
				 
			
 
				         training_args = GRPOConfig(
			
 
				             use_vllm = False, # use vLLM for fast inference!
			
@@ -242,10 +242,10 @@ if __name__ == "__main__":
 
				     try:
			
 
				         # 设置环境变量
			
 
				         # 单机多卡
			
 
				-        # os.environ['RANK'] = '0' # 第一张卡的 rank
			
 
				-        # os.environ['WORLD_SIZE'] = '1'  # 总共有 1 张卡
			
 
				-        # os.environ['MASTER_ADDR'] = 'localhost'
			
 
				-        # os.environ['MASTER_PORT'] = '12345'
			
 
				+        os.environ['RANK'] = '0' # 第一张卡的 rank
			
 
				+        os.environ['WORLD_SIZE'] = '1'  # 总共有 1 张卡
			
 
				+        os.environ['MASTER_ADDR'] = 'localhost'
			
 
				+        os.environ['MASTER_PORT'] = '12345'
			
 
				         # 多机多卡
			
 
				         # export RANK=0  # 第一台机器的 rank
			
 
				         # export WORLD_SIZE=4  # 总共有 4 台机器
			
@@ -270,7 +270,7 @@ if __name__ == "__main__":
 
				         save_path = os.path.join('..', 'models', 'trained', 'DeepSeek-R1-Distill-Qwen-1.5B-GRPO')
			
 
				         trainer.save_model(model, tokenizer, save_path)
			
 
				     finally:
			
 
				-        # # 确保进程组被销毁
			
 
				-        # if dist.is_initialized():
			
 
				-        #     dist.destroy_process_group()
			
 
				+        # 确保进程组被销毁
			
 
				+        if dist.is_initialized():
			
 
				+            dist.destroy_process_group()
			
 
				         print("train finally")
			
--- a/src/train_model_grpo_v2.py
+++ b/src/train_model_grpo_v2.py
@@ -114,31 +114,31 @@ class ModelTrainer:
 
				         self.load_in_4bit = load_in_4bit         # load_in_4bit: 是否以4位精度加载模型，用于节省显存
			
 
				         self.lora_rank=lora_rank  #Larger rank = smarter, but slower
			
 
				 
			
 
				-    def load_model(self):
			
 
				+    def load_model(self,lora_rank=64):
			
 
				         # 加载预训练模型和分词器
			
 
				         model, tokenizer = FastLanguageModel.from_pretrained(
			
 
				             model_name=self.model_name,
			
 
				             max_seq_length=self.max_seq_length,
			
 
				             load_in_4bit=self.load_in_4bit, # 值为True 以 4 bit量化进行微调，为False LoRA 16bit。这将内存使用量减少了 4 倍，使我们能够在免费的 16GB 内存 GPU 中实际进行微调。4 位量化本质上将权重转换为一组有限的数字以减少内存使用量。这样做的缺点是准确度会下降 1-2%。如果您想要这种微小的额外准确度，请在较大的 GPU（如 H100）上将其设置为 False。
			
 
				             dtype=self.dtype,
			
 
				-            fast_inference = True, # Enable vLLM fast inference
			
 
				+            fast_inference = False, # Enable vLLM fast inference
			
 
				             max_lora_rank = lora_rank,
			
 
				-            gpu_memory_utilization=0.1, # 0.6 # Reduce if out of memory
			
 
				+            gpu_memory_utilization=0.6, # 0.6 # Reduce if out of memory
			
 
				         )
			
 
				 
			
 
				-        # 将模型移动到设备上
			
 
				-        model = model.to_empty(device='cuda')  # 使用 to_empty 而不是 to
			
 
				+        # # 将模型移动到设备上
			
 
				+        # model = model.to_empty(device='cuda')  # 使用 to_empty 而不是 to
			
 
				 
			
 
				-        # 初始化模型的权重
			
 
				-        for param in model.parameters():
			
 
				-            if param.is_meta:
			
 
				-                param.data = torch.randn_like(param)  # 随机初始化
			
 
				+        # # 初始化模型的权重
			
 
				+        # for param in model.parameters():
			
 
				+        #     if param.is_meta:
			
 
				+        #         param.data = torch.randn_like(param)  # 随机初始化
			
 
				 
			
 
				         # 添加 LoRA 适配器
			
 
				         model = FastLanguageModel.get_peft_model(
			
 
				             model,
			
 
				             max_seq_length=self.max_seq_length,  # 最大上下文（序列）长度
			
 
				-            r=8, # 16 # LoRA 的秩，控制适配器的复杂度
			
 
				+            r=lora_rank,   # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
			
 
				             target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
			
 
				                           "gate_proj", "up_proj", "down_proj"],  # 应用 LoRA 的目标模块
			
 
				             lora_alpha=16,  # LoRA 的 alpha 参数，控制适配器的缩放
			
@@ -154,7 +154,8 @@ class ModelTrainer:
 
				 
			
 
				     def load_data(self, train_data_path):
			
 
				         # 加载训练集和测试集
			
 
				-        train_dataset = load_dataset("json", data_files={"train": train_data_path}, split="train")
			
 
				+        with open(train_data_path, 'r') as f:
			
 
				+            train_dataset = load_dataset("json", data_files={"train": train_data_path}, split="train")
			
 
				 
			
 
				         # train_data_path: 训练数据路径，格式为 JSONL
			
 
				         return train_dataset
			
@@ -166,16 +167,16 @@ class ModelTrainer:
 
				         print(f"Reserved memory: {torch.cuda.memory_reserved()}")
			
 
				         print(f"Allocated memory: {torch.cuda.memory_allocated()}")
			
 
				 
			
 
				-        # 启用 pin_memory  2025年3月7日未能验证通过
			
 
				-        train_loader = torch.utils.data.DataLoader(
			
 
				-            train_dataset, batch_size=1, shuffle=True, pin_memory=True  
			
 
				-        )
			
 
				+        # # 启用 pin_memory  2025年3月10日未能验证通过
			
 
				+        # train_loader = torch.utils.data.DataLoader(
			
 
				+        #     train_dataset, batch_size=1, shuffle=True, pin_memory=True  
			
 
				+        # )
			
 
				         
			
 
				-        # 释放未使用的显存
			
 
				-        torch.cuda.empty_cache()
			
 
				+        # # 释放未使用的显存
			
 
				+        # torch.cuda.empty_cache()
			
 
				 
			
 
				         training_args = GRPOConfig(
			
 
				-            use_vllm = True, # use vLLM for fast inference!
			
 
				+            use_vllm = False, # use vLLM for fast inference!
			
 
				             learning_rate = 5e-6,
			
 
				             adam_beta1 = 0.9,
			
 
				             adam_beta2 = 0.99,
			
@@ -188,12 +189,12 @@ class ModelTrainer:
 
				             fp16 = not is_bfloat16_supported(),
			
 
				             per_device_train_batch_size = 1,
			
 
				             gradient_accumulation_steps = 1, # Increase to 4 for smoother training
			
 
				-            num_generations = 128, # 256 # 每次生成  输出个数，值范围: 1 - 256
			
 
				-            max_prompt_length = 128, # 256 # 输入提示的最大长度
			
 
				-            max_completion_length = 128,# 200 # 生成内容的最大长度
			
 
				+            num_generations = 8, # 8 # 每次生成 输出 个数
			
 
				+            max_prompt_length = 256, # 256 # 输入提示的最大长度
			
 
				+            max_completion_length = 200,# 200 # 生成内容的最大长度
			
 
				             num_train_epochs = 1, # Set to 1 for a full training run
			
 
				-            max_steps = 10,  # 250
			
 
				-            save_steps = 10, # 250
			
 
				+            max_steps = 250,  # 250
			
 
				+            save_steps = 250, # 250
			
 
				             max_grad_norm = 0.1,
			
 
				             report_to = "none", # Can use Weights & Biases
			
 
				             output_dir = os.path.join('..', 'models',"outputs"),
			
@@ -229,10 +230,10 @@ if __name__ == "__main__":
 
				     # 配置参数
			
 
				     model_name = os.path.join('..', 'models', 'pretrained', 'DeepSeek-R1-Distill-Qwen-1.5B')
			
 
				     # model_name: 预训练模型的路径
			
 
				-    max_seq_length = 512  # 单次会话（single session） 的最大 token 长度，一个token大约3-4 字节（Byte）
			
 
				+    max_seq_length = 6144  # 单次会话（single session） 的最大 token 长度，一个token大约3-4 字节（Byte）
			
 
				     dtype = torch.float16  # 数据类型
			
 
				     load_in_4bit = True  # 是否以4位精度加载模型
			
 
				-    lora_rank=16
			
 
				+    lora_rank=64
			
 
				 
			
 
				     # 定义训练集和测试集路径
			
 
				     train_data_path = os.path.join('..', 'data', 'processed', 'train.jsonl')
			
@@ -241,10 +242,10 @@ if __name__ == "__main__":
 
				     try:
			
 
				         # 设置环境变量
			
 
				         # 单机多卡
			
 
				-        os.environ['RANK'] = '0' # 第一张卡的 rank
			
 
				-        os.environ['WORLD_SIZE'] = '1'  # 总共有 1 张卡
			
 
				-        os.environ['MASTER_ADDR'] = 'localhost'
			
 
				-        os.environ['MASTER_PORT'] = '12345'
			
 
				+        # os.environ['RANK'] = '0' # 第一张卡的 rank
			
 
				+        # os.environ['WORLD_SIZE'] = '1'  # 总共有 1 张卡
			
 
				+        # os.environ['MASTER_ADDR'] = 'localhost'
			
 
				+        # os.environ['MASTER_PORT'] = '12345'
			
 
				         # 多机多卡
			
 
				         # export RANK=0  # 第一台机器的 rank
			
 
				         # export WORLD_SIZE=4  # 总共有 4 台机器
			
@@ -252,12 +253,12 @@ if __name__ == "__main__":
 
				         # export MASTER_PORT=12345
			
 
				 
			
 
				         # 初始化进程组
			
 
				-        dist.init_process_group(backend='nccl', init_method='env://')
			
 
				+        # dist.init_process_group(backend='nccl', init_method='env://')
			
 
				         # 初始化 ModelTrainer
			
 
				         trainer = ModelTrainer(model_name, max_seq_length, dtype, load_in_4bit,lora_rank)
			
 
				         
			
 
				         # 加载模型和分词器
			
 
				-        model, tokenizer = trainer.load_model()
			
 
				+        model, tokenizer = trainer.load_model(lora_rank)
			
 
				 
			
 
				         # 加载数据集
			
 
				         train_dataset = trainer.load_data(train_data_path)
			
@@ -269,6 +270,7 @@ if __name__ == "__main__":
 
				         save_path = os.path.join('..', 'models', 'trained', 'DeepSeek-R1-Distill-Qwen-1.5B-GRPO')
			
 
				         trainer.save_model(model, tokenizer, save_path)
			
 
				     finally:
			
 
				-        # 确保进程组被销毁
			
 
				-        if dist.is_initialized():
			
 
				-            dist.destroy_process_group()
			
 
				+        # # 确保进程组被销毁
			
 
				+        # if dist.is_initialized():
			
 
				+        #     dist.destroy_process_group()
			
 
				+        print("train finally")
			
--- a/src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc