Spaces:
Runtime error
Runtime error
File size: 2,023 Bytes
da676c8 40c9d2b da676c8 12094be da676c8 fa02d7f da676c8 12094be f089045 da676c8 fa02d7f 6dd0ae0 12094be 6dd0ae0 005c6a4 12094be da676c8 fa02d7f da676c8 1ef9e65 da676c8 6dd0ae0 12094be ac5b8a7 12094be fa02d7f f089045 fa02d7f 12094be fa02d7f 38a8bac 12094be fa02d7f 40c9d2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import streamlit as st
import pandas as pd
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
@st.cache(allow_output_mutation=True)
def get_model(model):
return pipeline("fill-mask", model=model, top_k=100)#seto maximum of tokens to be retrieved after each inference to model
HISTORY_WEIGHT = 100 # set history weight (if found any keyword from history, it will priorities based on its weight)
st.caption("This is a simple auto-completion where the next token is predicted per probability and a weigh if appears in user's history")
history_keyword_text = st.text_input("Enter users's history keywords (optional, i.e., 'Gates')", value="")
text = st.text_input("Enter a text for auto completion...", value='Where is Bill')
semantic_text = st.text_input("Enter users's history semantic (optional, i.e., 'Microsoft')", value="Microsoft")
model = st.selectbox("choose a model", ["roberta-base", "bert-base-uncased"])
data_load_state = st.text('Loading model...')
nlp = get_model(model)
if text:
data_load_state = st.text('Inference to model...')
result = nlp(text+' '+nlp.tokenizer.mask_token)
data_load_state.text('')
predicted_embeddings = model.encode(result['sequence'], convert_to_tensor=True)
semantic_history_embeddings = model.encode(semantic_text.spllit(','), convert_to_tensor=True)
cosine_scores = util.cos_sim(embeddings1, embeddings2)
for index, r in enumerate(result):
result[index]['score']=cosine_scores[index][index]
if r['token_str'].lower().strip() in history_keyword_text.lower().strip() and len(r['token_str'].lower().strip())>1:
#found from history, then increase the score of tokens
result[index]['score']*=HISTORY_WEIGHT
#sort the results
df=pd.DataFrame(result).sort_values(by='score', ascending=False)
#show the results as a table
st.table(df) |