2 달 전 · 2275fcf164
--- a/conf/conf_train.yaml
+++ b/conf/conf_train.yaml
@@ -3,10 +3,12 @@ model_name: "../models/pretrained/DeepSeek-R1-Distill-Qwen-1.5B"
 
				 max_seq_length: 6144  # 单次会话的最大 token 长度
			
 
				 dtype: "float16"  # 数据类型，可选 "float16" 或 "bfloat16"
			
 
				 load_in_4bit: True  # 是否以4位精度加载模型
			
 
				+fast_inference: False # Enable vLLM fast inference
			
 
				 lora_rank: 64  # LoRA 的 rank 值
			
 
				 gpu_memory_utilization: 0.6 # GPU VRAM 占用率
			
 
				 
			
 
				 # 训练配置
			
 
				+use_vllm: False # use vLLM for fast inference!
			
 
				 learning_rate: 5e-6  # 学习率
			
 
				 adam_beta1: 0.9  # Adam 优化器的 beta1 参数
			
 
				 adam_beta2: 0.99  # Adam 优化器的 beta2 参数
			
--- a/src/conf_train.py
+++ b/src/conf_train.py
@@ -10,9 +10,11 @@ class Config:
 
				     max_seq_length: int
			
 
				     dtype: str
			
 
				     load_in_4bit: bool
			
 
				+    fast_inference: bool # Enable vLLM fast inference
			
 
				     lora_rank: int
			
 
				     gpu_memory_utilization: float
			
 
				     
			
 
				+    use_vllm:bool
			
 
				     learning_rate: float
			
 
				     adam_beta1: float
			
 
				     adam_beta2: float
			
@@ -36,15 +38,15 @@ class Config:
 
				     train_data_path: str
			
 
				     save_path: str
			
 
				 
			
 
				-def load_config(config_path: str) -> Config:
			
 
				+def load_config(config_path: str=f"../conf/conf_train.yaml") -> Config:
			
 
				     """
			
 
				     加载配置文件。
			
 
				     :param config_path: 配置文件路径
			
 
				     :return: 返回配置对象
			
 
				     """
			
 
				-    with open(config_path, 'r') as f:
			
 
				+    with open(config_path, 'r', encoding='utf-8') as f:
			
 
				         config_dict = yaml.safe_load(f)
			
 
				     return Config(**config_dict)
			
 
				 
			
 
				-# 加载配置文件
			
 
				-config = load_config(config_path=f"../conf/conf_train.yaml")
			
 
				+# # 加载配置文件
			
 
				+# config = load_config(config_path=f"../conf/conf_train.yaml")
			
--- a/src/train_model_grpo_v1.py
+++ b/src/train_model_grpo_v1.py
@@ -14,11 +14,12 @@ class ModelTrainer:
 
				         初始化 ModelTrainer 类，加载配置参数。
			
 
				         :param config: 配置对象，包含模型训练所需的参数
			
 
				         """
			
 
				-        self.config = config
			
 
				+        self.config:Config = config
			
 
				         self.model_name = config.model_name
			
 
				         self.max_seq_length = config.max_seq_length
			
 
				         self.dtype = torch.float16 if config.dtype == "float16" else torch.bfloat16
			
 
				         self.load_in_4bit = config.load_in_4bit
			
 
				+        self.fast_inference=config.fast_inference
			
 
				         self.lora_rank = config.lora_rank
			
 
				         self.gpu_memory_utilization=config.gpu_memory_utilization
			
 
				 
			
@@ -32,9 +33,9 @@ class ModelTrainer:
 
				             max_seq_length=self.max_seq_length,
			
 
				             load_in_4bit=self.load_in_4bit,
			
 
				             dtype=self.dtype,
			
 
				-            fast_inference=False,
			
 
				+            fast_inference=self.fast_inference,
			
 
				             max_lora_rank=self.lora_rank,
			
 
				-            gpu_memory_utilization=0.6,
			
 
				+            gpu_memory_utilization=config.gpu_memory_utilization,
			
 
				         )
			
 
				 
			
 
				         model = model.to_empty(device='cuda')
			
@@ -91,7 +92,7 @@ class ModelTrainer:
 
				         torch.cuda.empty_cache()
			
 
				 
			
 
				         training_args = GRPOConfig(
			
 
				-            use_vllm=False,
			
 
				+            use_vllm=self.config.use_vllm,
			
 
				             learning_rate=self.config.learning_rate,
			
 
				             adam_beta1=self.config.adam_beta1,
			
 
				             adam_beta2=self.config.adam_beta2,
			
@@ -233,8 +234,14 @@ if __name__ == "__main__":
 
				     os.environ['MASTER_PORT'] = '12345'
			
 
				     
			
 
				 
			
 
				-    # 初始化进程组
			
 
				-    dist.init_process_group(backend='nccl', init_method='env://')
			
 
				+    # 根据操作系统选择后端
			
 
				+    backend = 'gloo' if os.name == 'nt' else 'nccl'
			
 
				+
			
 
				+    # 使用文件初始化方法
			
 
				+    init_method = 'file:///tmp/shared_file'  # 文件路径需要所有进程都能访问
			
 
				+    dist.init_process_group(backend=backend, init_method=init_method)
			
 
				+
			
 
				+    print(f"Initialized distributed training with backend: {backend}")
			
 
				 
			
 
				     # 初始化 ModelTrainer
			
 
				     trainer = ModelTrainer(config)
			
--- a/src/unsloth_compiled_cache/UnslothAlignPropTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothAlignPropTrainer.py
@@ -120,7 +120,7 @@ class UnslothAlignPropConfig(AlignPropConfig):
 
				     )
			
 
				     def __init__(
			
 
				         self,
			
 
				-        exp_name = 'inference',
			
 
				+        exp_name = 'train_model_grpo_v1',
			
 
				         run_name = '',
			
 
				         seed = 3407,
			
 
				         log_with = None,
			
--- a/src/unsloth_compiled_cache/UnslothDDPOTrainer.py
+++ b/src/unsloth_compiled_cache/UnslothDDPOTrainer.py
@@ -136,7 +136,7 @@ class UnslothDDPOConfig(DDPOConfig):
 
				     )
			
 
				     def __init__(
			
 
				         self,
			
 
				-        exp_name = 'inference',
			
 
				+        exp_name = 'train_model_grpo_v1',
			
 
				         run_name = '',
			
 
				         seed = 3407,
			
 
				         log_with = None,
			
--- a/src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothAlignPropTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothBCOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothCPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothDDPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothDPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothGKDTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothGRPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothKTOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothNashMDTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothORPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothOnlineDPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothPPOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothPRMTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothRLOOTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothRewardTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothSFTTrainer.cpython-311.pyc
--- a/src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc
+++ b/src/unsloth_compiled_cache/__pycache__/UnslothXPOTrainer.cpython-311.pyc