Spaces:
Running
Running
| import gradio as gr | |
| import spaces | |
| import torch | |
| from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| import languagecodes | |
| favourite_langs = {"German": "de", "Romanian": "ro", "English": "en", "-----": "-----"} | |
| all_langs = languagecodes.iso_languages | |
| options = list(favourite_langs.keys()) | |
| options.extend(list(all_langs.keys())) | |
| models = ["Helsinki-NLP", "t5-base", "t5-small", "t5-large", "facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/mbart-large-50-many-to-many-mmt"] | |
| def model_to_cuda(model): | |
| # Move the model to GPU if available | |
| if torch.cuda.is_available(): | |
| model = model.to('cuda') | |
| print("CUDA is available! Using GPU.") | |
| else: | |
| print("CUDA not available! Using CPU.") | |
| return model | |
| def translate_text(input_text, sselected_language, tselected_language, model_name): | |
| sl = all_langs[sselected_language] | |
| tl = all_langs[tselected_language] | |
| message_text = f'Translated from {sselected_language} to {tselected_language} with {model_name}' | |
| if model_name == "Helsinki-NLP": | |
| try: | |
| model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name)) | |
| except EnvironmentError: | |
| try: | |
| model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name)) | |
| except EnvironmentError as error: | |
| return f"Error finding model: {model_name}! Try other available language combination.", error | |
| if model_name.startswith('facebook/nllb'): | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang=languagecodes.nllb_language_codes[sselected_language]) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto") | |
| translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=languagecodes.nllb_language_codes[sselected_language], tgt_lang=languagecodes.nllb_language_codes[tselected_language]) | |
| translated_text = translator(input_text, max_length=512) | |
| return translated_text[0]['translation_text'], message_text | |
| if model_name.startswith('facebook/mbart-large'): | |
| from transformers import MBartForConditionalGeneration, MBart50TokenizerFast | |
| model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
| tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
| # translate source to target | |
| tokenizer.src_lang = languagecodes.mbart_large_languages[sselected_language] | |
| encoded = tokenizer(input_text, return_tensors="pt") | |
| generated_tokens = model.generate( | |
| **encoded, | |
| forced_bos_token_id=tokenizer.lang_code_to_id[languagecodes.mbart_large_languages[tselected_language]] | |
| ) | |
| return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0], message_text | |
| if model_name.startswith('t5'): | |
| tokenizer = T5Tokenizer.from_pretrained(model_name) | |
| model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto") | |
| if model_name.startswith("Helsinki-NLP"): | |
| prompt = input_text | |
| else: | |
| prompt = f"translate {sselected_language} to {tselected_language}: {input_text}" | |
| input_ids = tokenizer.encode(prompt, return_tensors="pt") | |
| output_ids = model.generate(input_ids, max_length=512) | |
| translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| print(f'Translating from {sselected_language} to {tselected_language} with {model_name}:', f'{input_text} = {translated_text}', sep='\n') | |
| return translated_text, message_text | |
| # Define a function to swap dropdown values | |
| def swap_languages(src_lang, tgt_lang): | |
| return tgt_lang, src_lang | |
| def create_interface(): | |
| with gr.Blocks() as interface: | |
| gr.Markdown("## Machine Text Translation") | |
| with gr.Row(): | |
| input_text = gr.Textbox(label="Enter text to translate:", placeholder="Type your text here...") | |
| with gr.Row(): | |
| sselected_language = gr.Dropdown(choices=options, value = options[0], label="Source language", interactive=True) | |
| tselected_language = gr.Dropdown(choices=options, value = options[1], label="Target language", interactive=True) | |
| swap_button = gr.Button("Swap Languages") | |
| swap_button.click(fn=swap_languages, inputs=[sselected_language, tselected_language], outputs=[sselected_language, tselected_language]) | |
| model_name = gr.Dropdown(choices=models, label="Select a model", value = models[4], interactive=True) | |
| translate_button = gr.Button("Translate") | |
| translated_text = gr.Textbox(label="Translated text:", placeholder="Display field for translation", interactive=False, show_copy_button=True) | |
| message_text = gr.Textbox(label="Messages:", placeholder="Display field for status and error messages", interactive=False) | |
| translate_button.click( | |
| translate_text, | |
| inputs=[input_text, sselected_language, tselected_language, model_name], | |
| outputs=[translated_text, message_text] | |
| ) | |
| return interface | |
| interface = create_interface() | |
| interface.launch() |