File size: 3,379 Bytes
21c571e 933d893 21c571e 0659652 c7ab302 21c571e 0659652 257c54d d6a4ed5 7888f10 0659652 ad7b7bc b5fe0df 92189ac d28466c 92189ac 1f69fb9 cfc942c ad7b7bc 6c48632 d28466c 6c48632 7888f10 b5fe0df 21c571e ad7b7bc d28466c 9eda48b 0659652 d28466c 0659652 7b22e2e 35550d2 d28466c 651742a d28466c 7888f10 dda8d7a ff83a69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
#import streamlit as st
#x = st.slider('Select a value')
#st.write(x, 'squared is', x * x)
import streamlit as st
from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer
st.title("Completamento del testo in Latino con Latin BERT")
st.write("Inserisci un testo con il token [MASK] per vedere le previsioni del modello.")
st.write("Esempi di testo:");
st.write("Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens > cum");
st.write("hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt > primo");
st.write("Lorem ipsum dolor sit amet, [MASK] adipiscing elit. > consectetur");
st.write("Populus Romanus cum Macedonibus [MASK] ter gessit => bellum");
#Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens => cum
#hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt => primo
#Lorem ipsum dolor sit amet, [MASK] adipiscing elit. => consectetur
input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.")
# Model based on BERT
#modelname = "./models/latin_bert/"
#Hugging face LuisAVasquez/simple-latin-bert-uncased
#modelname_lv = "LuisAVasquez/simple-latin-bert-uncased"
#https://github.com/dbamman/latin-bert
modelname = "./models/bert-base-latin-uncased"
#tokenizer_roberta = AutoTokenizer.from_pretrained("pstroe/roberta-base-latin-cased3")
#model_roberta = AutoModelForMaskedLM.from_pretrained("pstroe/roberta-base-latin-cased3")
#fill_mask_roberta = pipeline("fill-mask", model=model_roberta, tokenizer=tokenizer_roberta)
tokenizer_robertaclasscat = AutoTokenizer.from_pretrained("ClassCat/roberta-base-latin-v2")
model_robertaclasscat = AutoModelForMaskedLM.from_pretrained("ClassCat/roberta-base-latin-v2")
fill_mask_robertaclasscat = pipeline("fill-mask", model=model_robertaclasscat, tokenizer=tokenizer_robertaclasscat)
tokenizer = AutoTokenizer.from_pretrained(modelname)
model = AutoModelForMaskedLM.from_pretrained(modelname)
fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer)
#tokenizer_lv = AutoTokenizer.from_pretrained(modelname_lv)
#model_lv = AutoModelForMaskedLM.from_pretrained(modelname_lv)
#fill_mask_lv = pipeline("fill-mask", model=model_lv, tokenizer=tokenizer_lv)
if input_text:
predictions = fill_mask(input_text)
st.subheader("Risultati delle previsioni con Bert:")
for pred in predictions:
st.write(f"**Parola**: {pred['token_str']}, **Probabilità**: {pred['score']:.4f}, **Sequence**: {pred['sequence']}")
input_text_roberta = input_text.replace("[MASK]", "<mask>")
#predictions_roberta = fill_mask_roberta(input_text_roberta)
#st.subheader("Risultati delle previsioni con Roberta Base Latin Cased 3:")
#for pred_roberta in predictions_roberta:
# st.write(f"**Parola**: {pred_roberta['token_str']}, **Probabilità**: {pred_roberta['score']:.4f}, **Sequence**: {pred_roberta['sequence']}")
predictions_robertaclasscat = fill_mask_robertaclasscat(input_text_roberta)
st.subheader("Risultati delle previsioni con Roberta:")
for pred_robertaclasscat in predictions_robertaclasscat:
st.write(f"**Parola**: {pred_robertaclasscat['token_str']}, **Probabilità**: {pred_robertaclasscat['score']:.4f}, **Sequence**: {pred_robertaclasscat['sequence']}")
|