Spaces:

TiberiuCristianLeon
/

GradioTranslate

Running

File size: 5,713 Bytes

56f497c
70ebea3
a103fac
fe93b05
cae0132
56f497c
cae0132
4596a5d
8b76c73
4894c8f
2da2e03
 
282f817
 
 
a5ff6b9
e13fba3
310e819
 
 
 
 
 
 
 
0b61062
70ebea3
56f497c
4596a5d
 
0b61062
56f497c
 
8b76c73
 
 
56f497c
8b76c73
 
 
 
e13fba3
8b76c73
310e819
282f817
cae0132
a103fac
cae0132
2045817
60b55c4
310e819
6dc437d
d2894fa
 
 
 
cae0132
44aa6cb
d2894fa
 
cae0132
d2894fa
310e819
d2894fa
 
56f497c
bf6322b
56f497c
 
 
 
 
 
72e8644
2045817
72e8644
60b55c4
4894c8f
4fd4915
b98b60d
56f497c
2169b22
 
 
 
56f497c
 
e402119
56f497c
 
 
2169b22
56f497c
7b79ed9
 
2169b22
28c6232
56f497c
ba3b9f6
56f497c
 
8b76c73
 
56f497c
 
 
 
b98b60d
56f497c

import gradio as gr
import spaces
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import languagecodes

favourite_langs = {"German": "de", "Romanian": "ro", "English": "en", "-----": "-----"}
all_langs = languagecodes.iso_languages

# Language options as list, add favourite languages first
options = list(favourite_langs.keys())
options.extend(list(all_langs.keys()))
models = ["Helsinki-NLP", "t5-base", "t5-small", "t5-large",
          "facebook/nllb-200-distilled-600M",
          "facebook/nllb-200-distilled-1.3B",
          "facebook/mbart-large-50-many-to-many-mmt"]

def model_to_cuda(model):
    # Move the model to GPU if available
    if torch.cuda.is_available():
        model = model.to('cuda')
        print("CUDA is available! Using GPU.")
    else:
        print("CUDA not available! Using CPU.")
    return model

@spaces.GPU
def translate_text(input_text, sselected_language, tselected_language, model_name):
    sl = all_langs[sselected_language]
    tl = all_langs[tselected_language]
    message_text = f'Translated from {sselected_language} to {tselected_language} with {model_name}'
    if model_name == "Helsinki-NLP":
        try:
            model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
        except EnvironmentError:
            try:   
                model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
            except EnvironmentError as error:
                return f"Error finding model: {model_name}! Try other available language combination.", error
    
    if 'nllb' in model_name:
        tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang=languagecodes.nllb_language_codes[sselected_language])
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
        translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=languagecodes.nllb_language_codes[sselected_language], tgt_lang=languagecodes.nllb_language_codes[tselected_language])
        translated_text = translator(input_text, max_length=512)
        return translated_text[0]['translation_text'], message_text
    
    if model_name.startswith('facebook/mbart-large'):
        from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
        model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
        tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
        # translate source to target
        tokenizer.src_lang = languagecodes.mbart_large_languages[sselected_language]
        encoded = tokenizer(input_text, return_tensors="pt")
        generated_tokens = model.generate(
            **encoded,
            forced_bos_token_id=tokenizer.lang_code_to_id[languagecodes.mbart_large_languages[tselected_language]]
        )
        return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0], message_text

    if model_name.startswith('t5'):
        tokenizer = T5Tokenizer.from_pretrained(model_name)
        model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")

    if model_name.startswith("Helsinki-NLP"):
        prompt = input_text
    else:
        prompt = f"translate {sselected_language} to {tselected_language}: {input_text}"

    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    output_ids = model.generate(input_ids, max_length=512)
    translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    message_text = f'Translated from {sselected_language} to {tselected_language} with {model_name}'
    print(f'Translating from {sselected_language} to {tselected_language} with {model_name}:', f'{input_text} =  {translated_text}', sep='\n')
    return translated_text, message_text

# Define a function to swap dropdown values
def swap_languages(src_lang, tgt_lang):
    return tgt_lang, src_lang 

def create_interface():
    with gr.Blocks() as interface:
        gr.Markdown("## Machine Text Translation")

        with gr.Row():
            input_text = gr.Textbox(label="Enter text to translate:", placeholder="Type your text here...")
        
        with gr.Row():
            sselected_language = gr.Dropdown(choices=options, value = options[0], label="Source language", interactive=True)
            tselected_language = gr.Dropdown(choices=options, value = options[1], label="Target language", interactive=True)
            swap_button = gr.Button("Swap Languages")
            swap_button.click(fn=swap_languages, inputs=[sselected_language, tselected_language], outputs=[sselected_language, tselected_language])

        model_name = gr.Dropdown(choices=models, label="Select a model", value = models[4], interactive=True)
        translate_button = gr.Button("Translate")

        translated_text = gr.Textbox(label="Translated text:", placeholder="Display field for translation", interactive=False, show_copy_button=True)
        message_text = gr.Textbox(label="Messages:", placeholder="Display field for status and error messages", interactive=False)

        translate_button.click(
            translate_text, 
            inputs=[input_text, sselected_language, tselected_language, model_name], 
            outputs=[translated_text, message_text]
        )

    return interface

interface = create_interface()
interface.launch()