File size: 2,103 Bytes
90dfdae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from dataclasses import dataclass
from operator import add, sub

import gradio as gr

import numpy as np
from datasets import load_dataset
from sklearn.metrics.pairwise import cosine_similarity

from pyparsing import Word, alphas, Char, ParseException

term = Word(alphas)
operator = Char("+ -")

expression = term + (operator + term)[...]
operations = {"+": add, "-": sub}


def parse_expression(input):
    try:
        return expression.parseString(input)
    except ParseException as pe:
        raise gr.Error(f"Syntax error at {pe.loc}: {pe.msg}")


def evaluate_expression(input):
    # Skip every other item
    words = input[::2]
    operators = input[1::2]

    result = word_to_vectors(words[0])

    for operator, word in zip(operators, words[1:]):
        result = operations[operator](result, word_to_vectors(word))

    return result


dataset = load_dataset("karmiq/glove", split="train")
df = dataset.to_pandas()

all_words = df["word"].to_numpy()
all_vectors = np.array(df["embeddings"].to_list())


def word_to_vectors(word):
    return df.loc[df["word"] == word].embeddings.to_numpy()[0]


def expression_to_vectors(input):
    return evaluate_expression(parse_expression(input))


def get_results(expression):
    vectors = expression_to_vectors(expression)
    similarity_scores = cosine_similarity([vectors], all_vectors)[0]
    top_indices = np.argsort(similarity_scores)[::-1]
    return dict(
        [
            (all_words[i], similarity_scores[i])
            for i in top_indices
            if not all_words[i] in expression.split()
        ][:10]
    )


examples = [
    "king - man + woman",
    "berlin - germany + france",
]

with gr.Blocks() as app:
    with gr.Row():
        with gr.Column():
            input = gr.Textbox(value=examples[0], label="Expression")
            with gr.Row():
                btn = gr.Button("Run")
            with gr.Row():
                gr.Examples(examples, inputs=input)

        with gr.Column():
            output = gr.Label(label="Closest words")

    btn.click(fn=get_results, inputs=input, outputs=output)

app.launch()