File size: 2,694 Bytes
21c571e
933d893
21c571e
 
0659652
c7ab302
21c571e
0659652
 
 
 
257c54d
 
 
 
 
 
0659652
 
ad7b7bc
b5fe0df
92189ac
9eda48b
92189ac
1f69fb9
cfc942c
ad7b7bc
6c48632
 
 
 
 
b5fe0df
 
21c571e
ad7b7bc
9eda48b
 
 
 
0659652
 
9eda48b
0659652
7b22e2e
9eda48b
 
 
 
6c48632
 
 
 
dda8d7a
ff83a69
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#import streamlit as st

#x = st.slider('Select a value')
#st.write(x, 'squared is', x * x)
import streamlit as st
from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer


st.title("Completamento del testo in Latino con Latin BERT")
st.write("Inserisci un testo con il token [MASK] per vedere le previsioni del modello.")


st.write("Esempi di testo:");
st.write("duces et reges carthaginiensivm hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt");
st.write("hanno et mago qui [MASK]  punico bello cornelium consulem aput liparas ceperunt");
#duces et reges carthaginiensivm hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt
#hanno et mago qui [MASK]  punico bello cornelium consulem aput liparas ceperunt
input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.")

# Model based on BERT
#modelname = "./models/latin_bert/"
#Hugging face LuisAVasquez/simple-latin-bert-uncased
modelname_lv = "LuisAVasquez/simple-latin-bert-uncased"
#https://github.com/dbamman/latin-bert
modelname = "./models/bert-base-latin-uncased"



tokenizer_roberta = AutoTokenizer.from_pretrained("pstroe/roberta-base-latin-cased3")
model_roberta = AutoModelForMaskedLM.from_pretrained("pstroe/roberta-base-latin-cased3")
fill_mask_roberta = pipeline("fill-mask", model=model_roberta, tokenizer=tokenizer_roberta)

tokenizer = AutoTokenizer.from_pretrained(modelname)
model = AutoModelForMaskedLM.from_pretrained(modelname)
fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer)

tokenizer_lv = AutoTokenizer.from_pretrained(modelname_lv)
model_lv = AutoModelForMaskedLM.from_pretrained(modelname_lv)
fill_mask_lv = pipeline("fill-mask", model=model_lv, tokenizer=tokenizer_lv)

if input_text:
    predictions = fill_mask(input_text)
    st.subheader("Risultati delle previsioni con Bert Base Latin Uncased:")
    for pred in predictions:
        st.write(f"**Parola**: {pred['token_str']}, **Probabilità**: {pred['score']:.4f}, **Sequence**: {pred['sequence']}")
    predictions_lv = fill_mask_lv(input_text)
    st.subheader("Risultati delle previsioni con Simple Latin Bert:")
    for pred_lv in predictions_lv:
        st.write(f"**Parola**: {pred_lv['token_str']}, **Probabilità**: {pred_lv['score']:.4f}, **Sequence**: {pred_lv['sequence']}")
    predictions_roberta = fill_mask_roberta(input_text)    
    st.subheader("Risultati delle previsioni con Roberta Base Latin Cased 3:")
    for pred_roberta in predictions_roberta:
        st.write(f"**Parola**: {pred_roberta['token_str']}, **Probabilità**: {pred_roberta['score']:.4f}, **Sequence**: {pred_roberta['sequence']}")