Spaces:
Build error
Build error
Adding App
Browse files- app.py +79 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from sentence_transformers.util import cos_sim
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
from bokeh.plotting import figure, output_notebook, show, save
|
8 |
+
from bokeh.io import output_file, show
|
9 |
+
from bokeh.models import ColumnDataSource, HoverTool
|
10 |
+
from sklearn.manifold import TSNE
|
11 |
+
|
12 |
+
|
13 |
+
@st.cache
|
14 |
+
def load_model():
|
15 |
+
model = SentenceTransformer('hackathon-pln-es/bertin-roberta-base-finetuning-esnli')
|
16 |
+
model.eval()
|
17 |
+
return model
|
18 |
+
|
19 |
+
@st.cache
|
20 |
+
def load_plot_data():
|
21 |
+
embs = np.load('semeval2015-embs.npy')
|
22 |
+
data = pd.read_csv('semeval2015-data.csv')
|
23 |
+
return embs, data
|
24 |
+
|
25 |
+
st.title("Sentence Embedding for Spanish with Bertin")
|
26 |
+
st.write("Sentence embedding for spanish trained on NLI. Used for Sentence Textual Similarity. Based on the model hackathon-pln-es/bertin-roberta-base-finetuning-esnli.")
|
27 |
+
st.write("Introduce two sentence to see their cosine similarity and a graph showing them in the embedding space.")
|
28 |
+
st.write("Authors: Anibal Pérez, Emilio Tomás Ariza, Lautaro Gesuelli y Mauricio Mazuecos.")
|
29 |
+
|
30 |
+
sent1 = st.text_area('Enter sentence 1')
|
31 |
+
sent2 = st.text_area('Enter sentence 2')
|
32 |
+
|
33 |
+
if st.button('Compute similarity'):
|
34 |
+
if sent1 and sent2:
|
35 |
+
model = load_model()
|
36 |
+
encodings = model.encode([sent1, sent2])
|
37 |
+
sim = cos_sim(encodings[0], encodings[1]).numpy().tolist()[0][0]
|
38 |
+
st.text('Cosine Similarity: {0:.4f}'.format(sim))
|
39 |
+
|
40 |
+
print('Generating visualization...')
|
41 |
+
sentembs, data = load_plot_data()
|
42 |
+
X_embedded = TSNE(n_components=2, learning_rate='auto',
|
43 |
+
init='random').fit_transform(np.concatenate([sentembs, encodings], axis=0))
|
44 |
+
|
45 |
+
data = data.append({'sent': sent1, 'color': '#F0E442'}, ignore_index=True) # sentence 1
|
46 |
+
data = data.append({'sent': sent2, 'color': '#D55E00'}, ignore_index=True) # sentence 2
|
47 |
+
data['x'] = X_embedded[:,0]
|
48 |
+
data['y'] = X_embedded[:,1]
|
49 |
+
|
50 |
+
source = ColumnDataSource(data)
|
51 |
+
|
52 |
+
p = figure(title="Embeddings in space")
|
53 |
+
p.circle(
|
54 |
+
x='x',
|
55 |
+
y='y',
|
56 |
+
legend_label="Objects",
|
57 |
+
#fill_color=["red"],
|
58 |
+
color='color',
|
59 |
+
fill_alpha=0.5,
|
60 |
+
line_color="blue",
|
61 |
+
size=14,
|
62 |
+
source=source
|
63 |
+
)
|
64 |
+
p.add_tools(HoverTool(
|
65 |
+
tooltips=[
|
66 |
+
('sent', '@sent')
|
67 |
+
],
|
68 |
+
formatters={
|
69 |
+
'@sent': 'printf'
|
70 |
+
},
|
71 |
+
mode='mouse'
|
72 |
+
))
|
73 |
+
st.bokeh_chart(p, use_container_width=True)
|
74 |
+
else:
|
75 |
+
st.write('Missing a sentences')
|
76 |
+
else:
|
77 |
+
pass
|
78 |
+
|
79 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
sentence-transformers==2.2.0
|
2 |
+
transformers==4.17.0
|
3 |
+
torch==1.10.2
|
4 |
+
sklearn==0.0
|
5 |
+
bokeh==2.4.1
|