import streamlit as st
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

st.title("SpellCorrectorT5")
st.markdown('SpellCorrectorT5 is a fine-tuned version of **pre-trained t5-small model** modelled on randomly selected 50000 sentences modified by [imputing random noises/errors](./random_noiser.py) and trained using transformers. It not only looks for _spelling errors but also looks for the semantics_ in the sentence and suggest other possible words for the incorrect word.')
m_name = "vishnun/tinygram"
ttokenizer = AutoTokenizer.from_pretrained(m_name)
tmodel = AutoModelForSeq2SeqLM.from_pretrained(m_name)
form = st.form("T5-form")

examples = ["I will return it to yu once it is donr",
            "Iu is going to rain",
            "Wheir do you live?",
            "It wis great mieting with you all"]

input_text = form.selectbox(label="Choose an example",
        options=examples)        
form.write("(or)")
input_text = form.text_input(label='Enter your own sentence', value=input_text)
submit = form.form_submit_button("Submit")

if submit:
  input_ids = ttokenizer.encode(input_text, return_tensors='pt')
  
  # generate text until the output length (which includes the context length) reaches 50
  outputs = tmodel.generate(
    input_ids,
    do_sample=True, 
    max_length=50,
    top_p=0.999, 
    top_k=45,
    num_return_sequences=2
  )
  
  st.subheader("Most probable: ")
  
  for y in outputs:
    
    out_text = ttokenizer.decode(y, skip_special_tokens=True)
    st.success(out_text.capitalize())
    c_text = ""
    for x in out_text.lower().split(" "):
      if x in input_text.lower().split(" "):
        c_text = c_text + x + " "
      else:
        c_text = c_text + '<span style="font-weight:bold; color:rgb(150,255,100);">' + x + '</span>' + " "
        
    ct = c_text.capitalize()
    st.markdown(str(ct), unsafe_allow_html=True)
    st.markdown("***", unsafe_allow_html=True)