File size: 3,379 Bytes
21c571e
933d893
21c571e
 
0659652
c7ab302
21c571e
0659652
 
 
 
257c54d
 
d6a4ed5
 
 
7888f10
 
 
 
0659652
 
ad7b7bc
b5fe0df
92189ac
d28466c
92189ac
1f69fb9
cfc942c
ad7b7bc
6c48632
d28466c
 
 
6c48632
7888f10
 
 
 
b5fe0df
 
21c571e
ad7b7bc
d28466c
 
 
9eda48b
0659652
 
d28466c
0659652
7b22e2e
35550d2
d28466c
 
 
 
651742a
d28466c
7888f10
 
dda8d7a
ff83a69
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#import streamlit as st

#x = st.slider('Select a value')
#st.write(x, 'squared is', x * x)
import streamlit as st
from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer


st.title("Completamento del testo in Latino con Latin BERT")
st.write("Inserisci un testo con il token [MASK] per vedere le previsioni del modello.")


st.write("Esempi di testo:");
st.write("Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens > cum");
st.write("hanno et mago qui [MASK]  punico bello cornelium consulem aput liparas ceperunt > primo");
st.write("Lorem ipsum dolor sit amet, [MASK] adipiscing elit. > consectetur");
st.write("Populus Romanus cum Macedonibus [MASK] ter gessit => bellum");
#Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens => cum
#hanno et mago qui [MASK]  punico bello cornelium consulem aput liparas ceperunt => primo
#Lorem ipsum dolor sit amet, [MASK] adipiscing elit. => consectetur
input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.")

# Model based on BERT
#modelname = "./models/latin_bert/"
#Hugging face LuisAVasquez/simple-latin-bert-uncased
#modelname_lv = "LuisAVasquez/simple-latin-bert-uncased"
#https://github.com/dbamman/latin-bert
modelname = "./models/bert-base-latin-uncased"



#tokenizer_roberta = AutoTokenizer.from_pretrained("pstroe/roberta-base-latin-cased3")
#model_roberta = AutoModelForMaskedLM.from_pretrained("pstroe/roberta-base-latin-cased3")
#fill_mask_roberta = pipeline("fill-mask", model=model_roberta, tokenizer=tokenizer_roberta)

tokenizer_robertaclasscat = AutoTokenizer.from_pretrained("ClassCat/roberta-base-latin-v2")
model_robertaclasscat = AutoModelForMaskedLM.from_pretrained("ClassCat/roberta-base-latin-v2")
fill_mask_robertaclasscat = pipeline("fill-mask", model=model_robertaclasscat, tokenizer=tokenizer_robertaclasscat)

tokenizer = AutoTokenizer.from_pretrained(modelname)
model = AutoModelForMaskedLM.from_pretrained(modelname)
fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer)

#tokenizer_lv = AutoTokenizer.from_pretrained(modelname_lv)
#model_lv = AutoModelForMaskedLM.from_pretrained(modelname_lv)
#fill_mask_lv = pipeline("fill-mask", model=model_lv, tokenizer=tokenizer_lv)

if input_text:
    predictions = fill_mask(input_text)
    st.subheader("Risultati delle previsioni con Bert:")
    for pred in predictions:
        st.write(f"**Parola**: {pred['token_str']}, **Probabilità**: {pred['score']:.4f}, **Sequence**: {pred['sequence']}")
    input_text_roberta = input_text.replace("[MASK]", "<mask>")
    #predictions_roberta = fill_mask_roberta(input_text_roberta)    
    #st.subheader("Risultati delle previsioni con Roberta Base Latin Cased 3:")
    #for pred_roberta in predictions_roberta:
    #    st.write(f"**Parola**: {pred_roberta['token_str']}, **Probabilità**: {pred_roberta['score']:.4f}, **Sequence**: {pred_roberta['sequence']}")
    predictions_robertaclasscat = fill_mask_robertaclasscat(input_text_roberta)    
    st.subheader("Risultati delle previsioni con Roberta:")
    for pred_robertaclasscat in predictions_robertaclasscat:
        st.write(f"**Parola**: {pred_robertaclasscat['token_str']}, **Probabilità**: {pred_robertaclasscat['score']:.4f}, **Sequence**: {pred_robertaclasscat['sequence']}")