File size: 2,621 Bytes
0f80043 650aa08 3ddb276 167f186 e1dc136 497174a 4bd7b04 8877001 94e2261 44f3be7 650aa08 6572bd7 94e2261 0d15afd 61e85d4 582c23b 650aa08 007e517 2db18a8 007e517 2db18a8 582c23b 3afc8d3 167f186 3afc8d3 167f186 cd2dcf6 6377617 167f186 c1b6fa4 167f186 8f1fed8 167f186 5b332dd 4bd7b04 5b332dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import streamlit as st
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
# Create the app layout
st.title("Text Translation")
input_text = st.text_input("Enter text to translate:")
# Create a list of options for the select box
options = ["German", "Romanian", "English", "French", "Spanish"]
langs = {"English":"en", "Romanian":"ro", "German":"de", "French":"fr", "Spanish":"es"}
models = ["Helsinki-NLP", "t5-base", "t5-small", "t5-large"]
# Create two columns
scol, tcol = st.columns(2)
# Place select boxes in columns
with scol:
sselected_language = st.selectbox("Source language:", options, index=0, placeholder="Select source language")
with tcol:
tselected_language = st.selectbox("Target language:", options, index=1, placeholder="Select target language")
model_name = st.selectbox("Select a model:", models, index=0, placeholder="Select language model")
sl = langs[sselected_language]
tl = langs[tselected_language]
st.session_state["sselected_language"] = sselected_language
st.session_state["tselected_language"] = tselected_language
st.session_state["model_name"] = model_name
if model_name == 'Helsinki-NLP':
try:
model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
except EnvironmentError:
model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
else:
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
st.write("Selected language combination:", sselected_language, " - ", tselected_language, "Selected model:", model_name)
submit_button = st.button("Translate")
translated_textarea = st.text("")
# Handle the submit button click
if submit_button:
if model_name.startswith('Helsinki-NLP'):
prompt = input_text
else:
prompt = f'translate {sselected_language} to {tselected_language}: {input_text}'
print(prompt)
input_ids = tokenizer.encode(prompt, return_tensors='pt')
# Perform translation
output_ids = model.generate(input_ids)
# Decode the translated text
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Display the translated text
print(translated_text)
st.write(f"Translated text from {sselected_language} to {tselected_language} using {model_name}")
translated_textarea = st.text(translated_text) |