|
@@ -27,8 +27,8 @@ class ModelTrainer:
|
|
self.lora_rank = config.lora_rank
|
|
self.lora_rank = config.lora_rank
|
|
self.gpu_memory_utilization = config.gpu_memory_utilization
|
|
self.gpu_memory_utilization = config.gpu_memory_utilization
|
|
# 初始化 BERT 模型和分词器
|
|
# 初始化 BERT 模型和分词器
|
|
- self.tokenizer = BertTokenizer.from_pretrained(f'../models/AI-ModelScope/bert-base-uncased')
|
|
|
|
- self.bert_model = BertModel.from_pretrained(f'../models/AI-ModelScope/bert-base-uncased')
|
|
|
|
|
|
+ self.tokenizer = BertTokenizer.from_pretrained(f'../models/LLM-Research/longformer-base-4096')
|
|
|
|
+ self.bert_model = BertModel.from_pretrained(f'../models/LLM-Research/longformer-base-4096')
|
|
|
|
|
|
def load_model(self):
|
|
def load_model(self):
|
|
"""
|
|
"""
|
|
@@ -178,15 +178,22 @@ class ModelTrainer:
|
|
extracted_responses = [self.extract_xml_answer(r) for r in responses]
|
|
extracted_responses = [self.extract_xml_answer(r) for r in responses]
|
|
scores = []
|
|
scores = []
|
|
for resp, ans in zip(extracted_responses, answer):
|
|
for resp, ans in zip(extracted_responses, answer):
|
|
- # 编码生成答案和标准答案
|
|
|
|
- inputs_resp = self.tokenizer(resp, return_tensors='pt', padding=True, truncation=True)
|
|
|
|
- inputs_ans = self.tokenizer(ans, return_tensors='pt', padding=True, truncation=True)
|
|
|
|
- with torch.no_grad():
|
|
|
|
- outputs_resp = self.bert_model(**inputs_resp).last_hidden_state.mean(dim=1) # 形状为 (1, 768)
|
|
|
|
- outputs_ans = self.bert_model(**inputs_ans).last_hidden_state.mean(dim=1) # 形状为 (1, 768)
|
|
|
|
- # 计算余弦相似度
|
|
|
|
- similarity = self.cosine_similarity(outputs_resp.numpy(), outputs_ans.numpy())
|
|
|
|
- scores.append(similarity)
|
|
|
|
|
|
+ # 分块处理长文本
|
|
|
|
+ resp_chunks = [resp[i:i + 500] for i in range(0, len(resp), 500)] # 每块 500 个字符
|
|
|
|
+ ans_chunks = [ans[i:i + 500] for i in range(0, len(ans), 500)] # 每块 500 个字符
|
|
|
|
+ chunk_similarities = []
|
|
|
|
+ for resp_chunk, ans_chunk in zip(resp_chunks, ans_chunks):
|
|
|
|
+ # 编码生成答案和标准答案
|
|
|
|
+ inputs_resp = self.tokenizer(resp_chunk, return_tensors='pt', padding=True, truncation=True, max_length=512)
|
|
|
|
+ inputs_ans = self.tokenizer(ans_chunk, return_tensors='pt', padding=True, truncation=True, max_length=512)
|
|
|
|
+ with torch.no_grad():
|
|
|
|
+ outputs_resp = self.bert_model(**inputs_resp).last_hidden_state.mean(dim=1) # 形状为 (1, 768)
|
|
|
|
+ outputs_ans = self.bert_model(**inputs_ans).last_hidden_state.mean(dim=1) # 形状为 (1, 768)
|
|
|
|
+ # 计算余弦相似度
|
|
|
|
+ similarity = self.cosine_similarity(outputs_resp.numpy(), outputs_ans.numpy())
|
|
|
|
+ chunk_similarities.append(similarity)
|
|
|
|
+ # 取所有块的平均相似度
|
|
|
|
+ scores.append(np.mean(chunk_similarities))
|
|
return scores
|
|
return scores
|
|
|
|
|
|
def combined_reward_func(self, prompts, completions, answer, **kwargs):
|
|
def combined_reward_func(self, prompts, completions, answer, **kwargs):
|