File size: 1,409 Bytes
3c32de9
 
 
3fc2ce9
3c32de9
 
 
 
 
 
 
 
 
 
e416868
3c32de9
 
e416868
 
3c32de9
3fc2ce9
3c32de9
2c526e8
3c32de9
 
2c526e8
3c32de9
 
 
0376d10
3c32de9
 
0376d10
3c32de9
0376d10
3c32de9
0376d10
3c32de9
 
 
 
0376d10
 
3c32de9
 
ebe2b18
3c32de9
ebe2b18
 
3c32de9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[
        0
    ]  
    input_mask_expanded = (
        attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    )
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
        input_mask_expanded.sum(1), min=1e-9
    )

def cosine_similarity(u, v):
    return F.cosine_similarity(u, v, dim=1)


def compare(text1, text2):

    sentences = [text1, text2]

    tokenizer = AutoTokenizer.from_pretrained("dmlls/all-mpnet-base-v2-negation")
    model = AutoModel.from_pretrained("dmlls/all-mpnet-base-v2-negation")

    encoded_input = tokenizer(
        sentences, padding=True, truncation=True, return_tensors="pt"
    )

    with torch.no_grad():
        model_output = model(**encoded_input)

    sentence_embeddings = mean_pooling(model_output, encoded_input["attention_mask"])

    sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

    similarity_score = cosine_similarity(
        sentence_embeddings[0].unsqueeze(0), sentence_embeddings[1].unsqueeze(0)
    )
    return similarity_score.item()


#------------------------------------------------------------
from fastapi import FastAPI

app = FastAPI()

@app.get("/")
def greet_json():
    return {"Hello": "World!"}