File size: 813 Bytes
a28abbc
805d0ae
a28abbc
805d0ae
a28abbc
805d0ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a28abbc
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import streamlit as st
from transformers import T5ForConditionalGeneration, T5Tokenizer

sentence = st.text_area("enter some text")

model = T5ForConditionalGeneration.from_pretrained("Unbabel/gec-t5_small")
tokenizer = T5Tokenizer.from_pretrained('t5-small')

tokenized_sentence = tokenizer('gec: ' + sentence, max_length=128, truncation=True, padding='max_length', return_tensors='pt')
corrected_sentence = tokenizer.decode(
    model.generate(
        input_ids = tokenized_sentence.input_ids,
        attention_mask = tokenized_sentence.attention_mask, 
        max_length=128,
        num_beams=5,
        early_stopping=True,
    )[0],
    skip_special_tokens=True, 
    clean_up_tokenization_spaces=True
)
print(corrected_sentence) # -> I like swimming.


#if text:
    #st.json(corrected_sentence)