Spaces:
Sleeping
Sleeping
#import streamlit as st | |
#x = st.slider('Select a value') | |
#st.write(x, 'squared is', x * x) | |
import streamlit as st | |
from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer | |
st.title("Completamento del testo in Latino con Latin BERT") | |
st.write("Inserisci un testo con il token [MASK] per vedere le previsioni del modello.") | |
st.write("Esempi di testo:"); | |
st.write("Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens > cum"); | |
st.write("hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt > primo"); | |
st.write("Lorem ipsum dolor sit amet, [MASK] adipiscing elit. > consectetur"); | |
st.write("Populus Romanus cum Macedonibus [MASK] ter gessit => bellum"); | |
#Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens => cum | |
#hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt => primo | |
#Lorem ipsum dolor sit amet, [MASK] adipiscing elit. => consectetur | |
input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.") | |
# Model based on BERT | |
#modelname = "./models/latin_bert/" | |
#Hugging face LuisAVasquez/simple-latin-bert-uncased | |
#modelname_lv = "LuisAVasquez/simple-latin-bert-uncased" | |
#https://github.com/dbamman/latin-bert | |
modelname = "./models/bert-base-latin-uncased" | |
#tokenizer_roberta = AutoTokenizer.from_pretrained("pstroe/roberta-base-latin-cased3") | |
#model_roberta = AutoModelForMaskedLM.from_pretrained("pstroe/roberta-base-latin-cased3") | |
#fill_mask_roberta = pipeline("fill-mask", model=model_roberta, tokenizer=tokenizer_roberta) | |
tokenizer_robertaclasscat = AutoTokenizer.from_pretrained("ClassCat/roberta-base-latin-v2") | |
model_robertaclasscat = AutoModelForMaskedLM.from_pretrained("ClassCat/roberta-base-latin-v2") | |
fill_mask_robertaclasscat = pipeline("fill-mask", model=model_robertaclasscat, tokenizer=tokenizer_robertaclasscat) | |
tokenizer = AutoTokenizer.from_pretrained(modelname) | |
model = AutoModelForMaskedLM.from_pretrained(modelname) | |
fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer) | |
#tokenizer_lv = AutoTokenizer.from_pretrained(modelname_lv) | |
#model_lv = AutoModelForMaskedLM.from_pretrained(modelname_lv) | |
#fill_mask_lv = pipeline("fill-mask", model=model_lv, tokenizer=tokenizer_lv) | |
if input_text: | |
predictions = fill_mask(input_text) | |
st.subheader("Risultati delle previsioni con Bert:") | |
for pred in predictions: | |
st.write(f"**Parola**: {pred['token_str']}, **Probabilità**: {pred['score']:.4f}, **Sequence**: {pred['sequence']}") | |
input_text_roberta = input_text.replace("[MASK]", "<mask>") | |
#predictions_roberta = fill_mask_roberta(input_text_roberta) | |
#st.subheader("Risultati delle previsioni con Roberta Base Latin Cased 3:") | |
#for pred_roberta in predictions_roberta: | |
# st.write(f"**Parola**: {pred_roberta['token_str']}, **Probabilità**: {pred_roberta['score']:.4f}, **Sequence**: {pred_roberta['sequence']}") | |
predictions_robertaclasscat = fill_mask_robertaclasscat(input_text_roberta) | |
st.subheader("Risultati delle previsioni con Roberta:") | |
for pred_robertaclasscat in predictions_robertaclasscat: | |
st.write(f"**Parola**: {pred_robertaclasscat['token_str']}, **Probabilità**: {pred_robertaclasscat['score']:.4f}, **Sequence**: {pred_robertaclasscat['sequence']}") | |