Spaces:
Runtime error
Runtime error
File size: 3,300 Bytes
90dfdae d8e3961 90dfdae 4dd63a4 90dfdae d8e3961 4dd63a4 90dfdae 52b4dce 90dfdae 475c76c 90dfdae 475c76c dedb273 475c76c b4d5bf8 52b4dce dedb273 52b4dce 90dfdae 52b4dce 475c76c 52b4dce 90dfdae b4d5bf8 90dfdae b4d5bf8 90dfdae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
from dataclasses import dataclass
from operator import add, sub
import gradio as gr
import numpy as np
from datasets import load_dataset
from sklearn.metrics.pairwise import cosine_similarity
from pyparsing import Word, alphas, Char, ParseException
term = Word(alphas)
operator = Char("+ -")
expression = term + (operator + term)[...]
operations = {"+": add, "-": sub}
def parse_expression(input):
try:
return expression.parseString(input)
except ParseException as e:
raise gr.Error(f"Parsing error: {e.msg} at position [{e.loc}].")
def evaluate_expression(input):
# Skip every other item
words = input[::2]
operators = input[1::2]
result = word_to_vectors(words[0])
for operator, word in zip(operators, words[1:]):
result = operations[operator](result, word_to_vectors(word))
return result
dataset = load_dataset("karmiq/glove", split="train")
df = dataset.to_pandas()
all_words = df["word"].to_numpy()
all_vectors = np.array(df["embeddings"].to_list())
def word_to_vectors(word):
result = df.loc[df["word"] == word].embeddings.to_numpy()
if len(result) < 1:
raise gr.Error("Word not found in the dictionary.")
else:
return result[0]
def expression_to_vectors(input):
return evaluate_expression(parse_expression(input))
def get_results(expression):
if len(expression) < 1:
raise gr.Error("Please provide an expression.")
expression = expression.lower()
vectors = expression_to_vectors(expression)
similarity_scores = cosine_similarity([vectors], all_vectors)[0]
top_indices = np.argsort(similarity_scores)[::-1]
return dict(
[
(all_words[i], similarity_scores[i])
for i in top_indices
if not all_words[i] in expression.split()
][:10]
)
examples = [
"king - man + woman",
"mother - woman + man",
"berlin - germany + france",
"saxophone - jazz + classical",
]
initial_output = get_results(examples[0])
css = """
button.gallery-item { color: var(--body-text-color) !important; }
.output-class { color: var(--color-red-700) !important; }
.confidence-set .label .text { font-weight: var(--weight-medium); }
.confidence-set:hover .label { color: var(--color-red-700) !important; }
"""
output = gr.Label(label="Closest words", value=initial_output)
with gr.Blocks(
css=css,
theme=gr.themes.Monochrome(radius_size=gr.themes.sizes.radius_sm),
) as app:
with gr.Row():
with gr.Column():
input = gr.Textbox(value=examples[0], label="Expression")
with gr.Row():
btn = gr.Button("Run", variant="primary")
with gr.Row():
gr.Markdown(
"Demonstration of computing cosine similarity of embeddings "
"from the [GloVe](https://nlp.stanford.edu/projects/glove/) dataset."
)
with gr.Row():
gr.Examples(
examples,
inputs=input,
cache_examples=True,
outputs=output,
fn=get_results,
)
with gr.Column():
output.render()
btn.click(fn=get_results, inputs=input, outputs=output)
app.launch()
|