Spaces:
Running
Running
zhangxiyi.amos
commited on
Commit
·
5f7d877
1
Parent(s):
f0986b2
fix: 移除平均池化
Browse files
app.py
CHANGED
@@ -18,14 +18,6 @@ config = AutoConfig.from_pretrained("Salesforce/codet5p-110m-embedding", trust_r
|
|
18 |
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5p-110m-embedding", trust_remote_code=True)
|
19 |
model6 = AutoModel.from_pretrained("Salesforce/codet5p-110m-embedding", config=config, trust_remote_code=True)
|
20 |
|
21 |
-
# 创建一个简单的平均池化函数来获取嵌入
|
22 |
-
def mean_pooling(model_output, attention_mask):
|
23 |
-
token_embeddings = model_output[0] # First element of model_output contains all token embeddings
|
24 |
-
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
25 |
-
sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
|
26 |
-
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
27 |
-
return sum_embeddings / sum_mask
|
28 |
-
|
29 |
@spaces.GPU
|
30 |
def generate(query1, query2, source_code):
|
31 |
if len(query1) < 1:
|
@@ -47,10 +39,10 @@ def generate(query1, query2, source_code):
|
|
47 |
# 特殊处理 Salesforce/codet5p-110m-embedding 模型
|
48 |
inputs = tokenizer([query1, query2, source_code], padding=True, truncation=True, return_tensors="pt")
|
49 |
with torch.no_grad():
|
50 |
-
|
51 |
-
|
52 |
-
score1 = cos_sim(embeddings[0]
|
53 |
-
score2 = cos_sim(embeddings[1]
|
54 |
results.append([model_names[-1], float(score1), float(score2)])
|
55 |
|
56 |
return results
|
|
|
18 |
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5p-110m-embedding", trust_remote_code=True)
|
19 |
model6 = AutoModel.from_pretrained("Salesforce/codet5p-110m-embedding", config=config, trust_remote_code=True)
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
@spaces.GPU
|
22 |
def generate(query1, query2, source_code):
|
23 |
if len(query1) < 1:
|
|
|
39 |
# 特殊处理 Salesforce/codet5p-110m-embedding 模型
|
40 |
inputs = tokenizer([query1, query2, source_code], padding=True, truncation=True, return_tensors="pt")
|
41 |
with torch.no_grad():
|
42 |
+
embeddings = model6(**inputs)[0] # 直接使用模型输出的嵌入
|
43 |
+
|
44 |
+
score1 = cos_sim(embeddings[0], embeddings[2])
|
45 |
+
score2 = cos_sim(embeddings[1], embeddings[2])
|
46 |
results.append([model_names[-1], float(score1), float(score2)])
|
47 |
|
48 |
return results
|