9 months ago · dff6b0d9c7
--- a/conf/conf_train.yaml
+++ b/conf/conf_train.yaml
@@ -1,15 +1,15 @@
 
				 # 模型配置
			
 
				 model_name: "../models/pretrained/DeepSeek-R1-Distill-Qwen-1.5B"
			
 
				-max_seq_length: 6144  # 2048 单次会话的最大 token 长度
			
 
				+max_seq_length: 768  # 2048 单次会话的最大 token 长度
			
 
				 dtype: "float16"  # 数据类型，可选 "float16" 或 "bfloat16"
			
 
				 load_in_4bit: True  # 是否以4位精度加载模型
			
 
				 fast_inference: False # Enable vLLM fast inference
			
 
				-lora_rank: 64  # LoRA 的 rank 值 Choose any number>0!suggested 8,16,32,64，128
			
 
				+lora_rank: 128  # LoRA 的 rank 值 Choose any number>0!suggested 8,16,32,64，128
			
 
				 gpu_memory_utilization: 0.6 # GPU VRAM 占用率
			
 
				 
			
 
				 # 训练配置
			
 
				 use_vllm: False # use vLLM for fast inference!
			
 
				-learning_rate: 1e-5  # 5e-6 学习率 1e-4 (0.0001) to 5e-5 (0.00005).
			
 
				+learning_rate: 1e-4  # 5e-6 学习率 1e-4 (0.0001) to 5e-5 (0.00005).
			
 
				 adam_beta1: 0.9  # Adam 优化器的 beta1 参数
			
 
				 adam_beta2: 0.99  # Adam 优化器的 beta2 参数
			
 
				 weight_decay: 0.1  # 权重衰减，用于防止过拟合，设置为0.1
			
@@ -19,12 +19,12 @@ optim: "adamw_8bit"  # 优化器类型 , adamw_8bit为AdmaW优化器，并启动
 
				 logging_steps: 1  # 日志记录步数
			
 
				 per_device_train_batch_size: 2  # 1 每个设备的训练批次（batch）大小
			
 
				 gradient_accumulation_steps: 4  # 1 梯度累积步数 ,用于在较小的batch size下模拟较大的batch
			
 
				-num_generations: 6  # 8 表示每次训练时生成的候选输出数量
			
 
				-max_prompt_length: 256  # 模型输入的最大长度
			
 
				-max_completion_length: 200  # 模型输入（生成）的最大长度
			
 
				+num_generations: 8  # 8 表示每次训练时生成的候选输出数量
			
 
				+max_prompt_length: 256  # 256 模型输入的最大长度
			
 
				+max_completion_length: 384  # 200 模型输入（生成）的最大长度
			
 
				 num_train_epochs: 1  # 训练轮数
			
 
				-max_steps: 250  # 训练的最大步数
			
 
				-save_steps: 250  # 保存模型的步数（多少步保存一次模型）
			
 
				+max_steps: 64  # 250 训练的最大步数
			
 
				+save_steps: 60  # 250 保存模型的步数（多少步保存一次模型）
			
 
				 max_grad_norm: 0.1  # 梯度裁剪的最大阈值，防止梯度爆炸
			
 
				 report_to: "none"  # 报告工具，报告内容如 Weights & Biases，设置为none 表不将训练结果报告到外部工具
			
 
				 output_dir: "../models/outputs"  # 输出目录
			
--- a/src/train_model_grpo_v1.1.py
+++ b/src/train_model_grpo_v1.1.py
@@ -158,8 +158,29 @@ class ModelTrainer:
 
				         :param tokenizer: 分词器
			
 
				         :param save_path: 保存路径
			
 
				         """
			
 
				+        """
			
 
				+        # Save to 8bit Q8_0
			
 
				+        if False: model.save_pretrained_gguf("model", tokenizer,)
			
 
				+        # Remember to go to https://huggingface.co/settings/tokens for a token!
			
 
				+        # And change hf to your username!
			
 
				+        if False: model.push_to_hub_gguf("hf/model", tokenizer, token = "")
			
 
				+
			
 
				+        # Save to 16bit GGUF
			
 
				+        if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
			
 
				+        if False: model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")
			
 
				+
			
 
				+        # Save to q4_k_m GGUF
			
 
				+        if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
			
 
				+        if False: model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "q4_k_m", token = "")
			
 
				+
			
 
				+        ###
			
 
				+
			
 
				+        model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) # save_method = "merged_4bit" Merge to 4bit ; save_method = "lora"  Just LoRA adapters ;
			
 
				+        """
			
 
				+
			
 
				         model.save_pretrained(save_path)
			
 
				         tokenizer.save_pretrained(save_path)
			
 
				+        
			
 
				         print(f"Model saved to {save_path}")
			
 
				     
			
 
				     @staticmethod