import streamlit as st from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM # Create the app layout st.title("Text Translation") input_text = st.text_input("Enter text to translate:") # Create a list of options for the select box options = ["German", "Romanian", "English", "French", "Spanish"] langs = {"English":"en", "Romanian":"ro", "German":"de", "French":"fr", "Spanish":"es"} models = ["Helsinki-NLP", "t5-base", "t5-small", "t5-large"] # Create two columns scol, tcol = st.columns(2) # Place select boxes in columns with scol: sselected_language = st.selectbox("Source language:", options, index=0, placeholder="Select source language") with tcol: tselected_language = st.selectbox("Target language:", options, index=1, placeholder="Select target language") model_name = st.selectbox("Select a model:", models, index=None, placeholder="Select language model") sl = langs[sselected_language] tl = langs[tselected_language] st.session_state["sselected_language"] = sselected_language st.session_state["tselected_language"] = tselected_language st.session_state["model_name"] = model_name if model_name == 'Helsinki-NLP': try: model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) except EnvironmentError: model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) else: tokenizer = T5Tokenizer.from_pretrained(model_name) model = T5ForConditionalGeneration.from_pretrained(model_name) st.write("Selected language combination:", sselected_language, " - ", tselected_language, "Selected model:", model_name) submit_button = st.button("Translate") translated_textarea = st.text("") # Handle the submit button click if submit_button: prompt = f'translate {sselected_language} to {tselected_language}: {input_text}' print(prompt) input_ids = tokenizer.encode(prompt, return_tensors='pt') # Perform translation output_ids = model.generate(input_ids) # Decode the translated text translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) # Display the translated text print(translated_text) st.write(f"Translated text from {sselected_language} to {tselected_language} using {model_name}") translated_textarea = st.text(translated_text)