Spaces:
Sleeping
Sleeping
File size: 2,694 Bytes
21c571e 933d893 21c571e 0659652 c7ab302 21c571e 0659652 257c54d 0659652 ad7b7bc b5fe0df 92189ac 9eda48b 92189ac 1f69fb9 cfc942c ad7b7bc 6c48632 b5fe0df 21c571e ad7b7bc 9eda48b 0659652 9eda48b 0659652 7b22e2e 9eda48b 6c48632 dda8d7a ff83a69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
#import streamlit as st
#x = st.slider('Select a value')
#st.write(x, 'squared is', x * x)
import streamlit as st
from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer
st.title("Completamento del testo in Latino con Latin BERT")
st.write("Inserisci un testo con il token [MASK] per vedere le previsioni del modello.")
st.write("Esempi di testo:");
st.write("duces et reges carthaginiensivm hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt");
st.write("hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt");
#duces et reges carthaginiensivm hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt
#hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt
input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.")
# Model based on BERT
#modelname = "./models/latin_bert/"
#Hugging face LuisAVasquez/simple-latin-bert-uncased
modelname_lv = "LuisAVasquez/simple-latin-bert-uncased"
#https://github.com/dbamman/latin-bert
modelname = "./models/bert-base-latin-uncased"
tokenizer_roberta = AutoTokenizer.from_pretrained("pstroe/roberta-base-latin-cased3")
model_roberta = AutoModelForMaskedLM.from_pretrained("pstroe/roberta-base-latin-cased3")
fill_mask_roberta = pipeline("fill-mask", model=model_roberta, tokenizer=tokenizer_roberta)
tokenizer = AutoTokenizer.from_pretrained(modelname)
model = AutoModelForMaskedLM.from_pretrained(modelname)
fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer)
tokenizer_lv = AutoTokenizer.from_pretrained(modelname_lv)
model_lv = AutoModelForMaskedLM.from_pretrained(modelname_lv)
fill_mask_lv = pipeline("fill-mask", model=model_lv, tokenizer=tokenizer_lv)
if input_text:
predictions = fill_mask(input_text)
st.subheader("Risultati delle previsioni con Bert Base Latin Uncased:")
for pred in predictions:
st.write(f"**Parola**: {pred['token_str']}, **Probabilità**: {pred['score']:.4f}, **Sequence**: {pred['sequence']}")
predictions_lv = fill_mask_lv(input_text)
st.subheader("Risultati delle previsioni con Simple Latin Bert:")
for pred_lv in predictions_lv:
st.write(f"**Parola**: {pred_lv['token_str']}, **Probabilità**: {pred_lv['score']:.4f}, **Sequence**: {pred_lv['sequence']}")
predictions_roberta = fill_mask_roberta(input_text)
st.subheader("Risultati delle previsioni con Roberta Base Latin Cased 3:")
for pred_roberta in predictions_roberta:
st.write(f"**Parola**: {pred_roberta['token_str']}, **Probabilità**: {pred_roberta['score']:.4f}, **Sequence**: {pred_roberta['sequence']}")
|