File size: 2,343 Bytes
19488e2
74c5a4a
be10d84
21edaab
 
74c5a4a
be10d84
21edaab
026c201
 
 
21edaab
 
0b2ff15
 
be10d84
6450f47
 
 
21edaab
 
026c201
be10d84
 
 
 
 
21edaab
 
026c201
 
 
 
 
 
21edaab
 
026c201
 
 
 
 
 
21edaab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b2ff15
 
 
6450f47
 
 
 
026c201
21edaab
 
 
 
 
026c201
0b2ff15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import spaces
import gradio as gr
from numpy.linalg import norm
from transformers import AutoModel
from sentence_transformers import SentenceTransformer

cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))

model1 = AutoModel.from_pretrained("jinaai/jina-embeddings-v2-base-code", trust_remote_code=True)
model2 = AutoModel.from_pretrained("jinaai/jina-embeddings-v2-base-en", trust_remote_code=True)
model3 = AutoModel.from_pretrained("jinaai/jina-embeddings-v2-base-zh", trust_remote_code=True)
model4 = SentenceTransformer("aspire/acge_text_embedding")
model5 = SentenceTransformer("intfloat/multilingual-e5-large")

@spaces.GPU
def generate(input1, input2):
    if len(input1) < 1:
        input1 = "How do I access the index while iterating over a sequence with a for loop?"
    if len(input2) < 1:
        input2 = "# Use the built-in enumerator\nfor idx, x in enumerate(xs):\n    print(idx, x)"

    embeddings1 = model1.encode(
        [
            input1,
            input2,
        ]
    )
    score1 = cos_sim(embeddings1[0], embeddings1[1])

    embeddings2 = model2.encode(
        [
            input1,
            input2,
        ]
    )
    score2 = cos_sim(embeddings2[0], embeddings2[1])

    embeddings3 = model3.encode(
        [
            input1,
            input2,
        ]
    )
    score3 = cos_sim(embeddings3[0], embeddings3[1])

    embeddings4 = model4.encode(
        [
            input1,
            input2,
        ]
    )
    score4 = cos_sim(embeddings4[0], embeddings4[1])

    embeddings5 = model5.encode(
        [
            input1,
            input2,
        ]
    )
    score5 = cos_sim(embeddings5[0], embeddings5[1])

    return score1, score2, score3, score4, score5

gr.Interface(
    fn=generate,
    inputs=[
        gr.Text(label="input1", placeholder="How do I access the index while iterating over a sequence with a for loop?"),
        gr.Text(label="input2", placeholder="# Use the built-in enumerator\nfor idx, x in enumerate(xs):\n    print(idx, x)"),
    ],
    outputs=[
        gr.Text(label="jinaai/jina-embeddings-v2-base-code"),
        gr.Text(label="jinaai/jina-embeddings-v2-base-en"),
        gr.Text(label="jinaai/jina-embeddings-v2-base-zh"),
        gr.Text(label="aspire/acge_text_embedding"),
        gr.Text(label="intfloat/multilingual-e5-large"),
    ],
).launch()