Prechádzať zdrojové kódy

遵循面向对象思想重构train_model_grpo.py

zhouyang.xie 4 mesiacov pred
rodič
commit
4bb9924e00
2 zmenil súbory, kde vykonal 20 pridanie a 12 odobranie
  1. 2 1
      src/model_downloader.py
  2. 18 11
      src/train_model_grpo_v1.1.py

+ 2 - 1
src/model_downloader.py

@@ -10,7 +10,8 @@ from modelscope import snapshot_download
 # model_dir = snapshot_download('deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', cache_dir="../models/")
 # model_dir = snapshot_download('deepseek-ai/Janus-Pro-7B', cache_dir="../models/")
 
-model_dir = snapshot_download('AI-ModelScope/bert-base-uncased', cache_dir="../models/")
+# model_dir = snapshot_download('AI-ModelScope/bert-base-uncased', cache_dir="../models/")
+model_dir = snapshot_download('LLM-Research/longformer-base-4096', cache_dir="../models/")
 
 # 验证SDK token
 # 据模型源上传人说,模型支持华为 昇腾(Ascend) 910

+ 18 - 11
src/train_model_grpo_v1.1.py

@@ -27,8 +27,8 @@ class ModelTrainer:
         self.lora_rank = config.lora_rank
         self.gpu_memory_utilization = config.gpu_memory_utilization
         # 初始化 BERT 模型和分词器
-        self.tokenizer = BertTokenizer.from_pretrained(f'../models/AI-ModelScope/bert-base-uncased')
-        self.bert_model = BertModel.from_pretrained(f'../models/AI-ModelScope/bert-base-uncased')
+        self.tokenizer = BertTokenizer.from_pretrained(f'../models/LLM-Research/longformer-base-4096')
+        self.bert_model = BertModel.from_pretrained(f'../models/LLM-Research/longformer-base-4096')
 
     def load_model(self):
         """
@@ -178,15 +178,22 @@ class ModelTrainer:
         extracted_responses = [self.extract_xml_answer(r) for r in responses]
         scores = []
         for resp, ans in zip(extracted_responses, answer):
-            # 编码生成答案和标准答案
-            inputs_resp = self.tokenizer(resp, return_tensors='pt', padding=True, truncation=True)
-            inputs_ans = self.tokenizer(ans, return_tensors='pt', padding=True, truncation=True)
-            with torch.no_grad():
-                outputs_resp = self.bert_model(**inputs_resp).last_hidden_state.mean(dim=1)  # 形状为 (1, 768)
-                outputs_ans = self.bert_model(**inputs_ans).last_hidden_state.mean(dim=1)  # 形状为 (1, 768)
-            # 计算余弦相似度
-            similarity = self.cosine_similarity(outputs_resp.numpy(), outputs_ans.numpy())
-            scores.append(similarity)
+            # 分块处理长文本
+            resp_chunks = [resp[i:i + 500] for i in range(0, len(resp), 500)]  # 每块 500 个字符
+            ans_chunks = [ans[i:i + 500] for i in range(0, len(ans), 500)]  # 每块 500 个字符
+            chunk_similarities = []
+            for resp_chunk, ans_chunk in zip(resp_chunks, ans_chunks):
+                # 编码生成答案和标准答案
+                inputs_resp = self.tokenizer(resp_chunk, return_tensors='pt', padding=True, truncation=True, max_length=512)
+                inputs_ans = self.tokenizer(ans_chunk, return_tensors='pt', padding=True, truncation=True, max_length=512)
+                with torch.no_grad():
+                    outputs_resp = self.bert_model(**inputs_resp).last_hidden_state.mean(dim=1)  # 形状为 (1, 768)
+                    outputs_ans = self.bert_model(**inputs_ans).last_hidden_state.mean(dim=1)  # 形状为 (1, 768)
+                # 计算余弦相似度
+                similarity = self.cosine_similarity(outputs_resp.numpy(), outputs_ans.numpy())
+                chunk_similarities.append(similarity)
+            # 取所有块的平均相似度
+            scores.append(np.mean(chunk_similarities))
         return scores
     
     def combined_reward_func(self, prompts, completions, answer, **kwargs):