TiberiuCristianLeon's picture
Update app.py
2db18a8 verified
raw
history blame
2.33 kB
import streamlit as st
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
# Create the app layout
st.title("Text Translation")
input_text = st.text_input("Enter text to translate:")
# Create a list of options for the select box
options = ["English", "Romanian", "German", "French", "Spanish"]
langs = {"English":"en", "Romanian":"ro", "German":"de", "French":"fr", "Spanish":"es"}
models = ["t5-base", "t5-small", "t5-large", "Helsinki-NLP"]
# Create the select box
sselected_language = st.selectbox("Select a source language:", options)
tselected_language = st.selectbox("Select a target language:", options)
sl = langs[sselected_language]
tl = langs[tselected_language]
model_name = st.selectbox("Select a model:", models)
st.session_state["sselected_language"] = sselected_language
st.session_state["tselected_language"] = tselected_language
st.session_state["model_name"] = model_name
if model_name == 'Helsinki-NLP':
try:
model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
except EnvironmentError:
model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
else:
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
st.write("Selected language combination:", sselected_language, " - ", tselected_language, "Selected model:", model_name)
submit_button = st.button("Translate")
translated_textarea = st.text("")
# Handle the submit button click
if submit_button:
prompt = f'Prompt: translate {sselected_language} to {tselected_language}: {input_text}'
print(prompt)
input_ids = tokenizer.encode(prompt, return_tensors='pt')
# Perform translation
output_ids = model.generate(input_ids)
# Decode the translated text
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Display the translated text
print(translated_text)
st.write(f"Translated text from {sselected_language} to {tselected_language}:", translated_text)
translated_textarea = st.text(translated_text)