File size: 2,621 Bytes
0f80043
650aa08
3ddb276
167f186
e1dc136
 
497174a
4bd7b04
8877001
94e2261
 
 
 
 
 
 
 
 
44f3be7
650aa08
6572bd7
 
94e2261
0d15afd
 
61e85d4
582c23b
650aa08
007e517
 
2db18a8
 
007e517
 
2db18a8
 
582c23b
 
 
3afc8d3
167f186
3afc8d3
167f186
 
 
cd2dcf6
 
 
 
6377617
 
167f186
c1b6fa4
167f186
8f1fed8
167f186
5b332dd
4bd7b04
5b332dd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import streamlit as st
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM

# Create the app layout
st.title("Text Translation")
input_text = st.text_input("Enter text to translate:")
# Create a list of options for the select box
options = ["German", "Romanian", "English", "French", "Spanish"]
langs = {"English":"en", "Romanian":"ro", "German":"de", "French":"fr", "Spanish":"es"}
models = ["Helsinki-NLP", "t5-base", "t5-small", "t5-large"]

# Create two columns
scol, tcol = st.columns(2)
# Place select boxes in columns
with scol:
    sselected_language = st.selectbox("Source language:", options, index=0, placeholder="Select source language")
with tcol:
    tselected_language = st.selectbox("Target language:", options, index=1, placeholder="Select target language")
model_name = st.selectbox("Select a model:", models, index=0, placeholder="Select language model")

sl = langs[sselected_language]  
tl = langs[tselected_language]

st.session_state["sselected_language"] = sselected_language
st.session_state["tselected_language"] = tselected_language
st.session_state["model_name"] = model_name

if model_name == 'Helsinki-NLP':
    try:
        model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    except EnvironmentError:
        model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
else:
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name)
st.write("Selected language combination:", sselected_language, " - ", tselected_language, "Selected model:", model_name)
submit_button = st.button("Translate")
translated_textarea = st.text("")

# Handle the submit button click
if submit_button:
    if model_name.startswith('Helsinki-NLP'):
        prompt = input_text
    else:
        prompt = f'translate {sselected_language} to {tselected_language}: {input_text}'
    print(prompt)
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    # Perform translation
    output_ids = model.generate(input_ids)
    # Decode the translated text
    translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    # Display the translated text
    print(translated_text)
    st.write(f"Translated text from {sselected_language} to {tselected_language} using {model_name}")
    translated_textarea = st.text(translated_text)