zhangxiyi.amos commited on
Commit
5f7d877
·
1 Parent(s): f0986b2

fix: 移除平均池化

Browse files
Files changed (1) hide show
  1. app.py +4 -12
app.py CHANGED
@@ -18,14 +18,6 @@ config = AutoConfig.from_pretrained("Salesforce/codet5p-110m-embedding", trust_r
18
  tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5p-110m-embedding", trust_remote_code=True)
19
  model6 = AutoModel.from_pretrained("Salesforce/codet5p-110m-embedding", config=config, trust_remote_code=True)
20
 
21
- # 创建一个简单的平均池化函数来获取嵌入
22
- def mean_pooling(model_output, attention_mask):
23
- token_embeddings = model_output[0] # First element of model_output contains all token embeddings
24
- input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
25
- sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
26
- sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
27
- return sum_embeddings / sum_mask
28
-
29
  @spaces.GPU
30
  def generate(query1, query2, source_code):
31
  if len(query1) < 1:
@@ -47,10 +39,10 @@ def generate(query1, query2, source_code):
47
  # 特殊处理 Salesforce/codet5p-110m-embedding 模型
48
  inputs = tokenizer([query1, query2, source_code], padding=True, truncation=True, return_tensors="pt")
49
  with torch.no_grad():
50
- model_output = model6(**inputs)
51
- embeddings = mean_pooling(model_output, inputs['attention_mask'])
52
- score1 = cos_sim(embeddings[0].unsqueeze(0), embeddings[2].unsqueeze(0))
53
- score2 = cos_sim(embeddings[1].unsqueeze(0), embeddings[2].unsqueeze(0))
54
  results.append([model_names[-1], float(score1), float(score2)])
55
 
56
  return results
 
18
  tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5p-110m-embedding", trust_remote_code=True)
19
  model6 = AutoModel.from_pretrained("Salesforce/codet5p-110m-embedding", config=config, trust_remote_code=True)
20
 
 
 
 
 
 
 
 
 
21
  @spaces.GPU
22
  def generate(query1, query2, source_code):
23
  if len(query1) < 1:
 
39
  # 特殊处理 Salesforce/codet5p-110m-embedding 模型
40
  inputs = tokenizer([query1, query2, source_code], padding=True, truncation=True, return_tensors="pt")
41
  with torch.no_grad():
42
+ embeddings = model6(**inputs)[0] # 直接使用模型输出的嵌入
43
+
44
+ score1 = cos_sim(embeddings[0], embeddings[2])
45
+ score2 = cos_sim(embeddings[1], embeddings[2])
46
  results.append([model_names[-1], float(score1), float(score2)])
47
 
48
  return results