Spaces:
Sleeping
Sleeping
File size: 2,565 Bytes
15678e0 82da87b 15678e0 82da87b d8c4e42 15678e0 82da87b d8c4e42 82da87b d8c4e42 15678e0 419f944 a26be94 82da87b 15678e0 419f944 15678e0 419f944 d8c4e42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Load both translation models from Hugging Face
# English to Moroccan Arabic (Darija)
tokenizer_eng_to_darija = AutoTokenizer.from_pretrained("lachkarsalim/Helsinki-translation-English_Moroccan-Arabic")
model_eng_to_darija = AutoModelForSeq2SeqLM.from_pretrained("lachkarsalim/Helsinki-translation-English_Moroccan-Arabic")
# Moroccan Arabic (Darija) to Modern Standard Arabic (MSA)
tokenizer_darija_to_msa = AutoTokenizer.from_pretrained("Saidtaoussi/AraT5_Darija_to_MSA")
model_darija_to_msa = AutoModelForSeq2SeqLM.from_pretrained("Saidtaoussi/AraT5_Darija_to_MSA")
# Translation function for Darija to MSA
def translate_darija_to_msa(darija_text):
inputs = tokenizer_darija_to_msa(darija_text, return_tensors="pt", padding=True)
translated = model_darija_to_msa.generate(**inputs)
translated_text = tokenizer_darija_to_msa.decode(translated[0], skip_special_tokens=True)
return translated_text
# Translation function for English to Moroccan Arabic and vice versa
def translate_eng_to_darija(eng_text, direction="eng_to_darija"):
if direction == "eng_to_darija":
inputs = tokenizer_eng_to_darija(eng_text, return_tensors="pt", padding=True)
translated = model_eng_to_darija.generate(**inputs)
translated_text = tokenizer_eng_to_darija.decode(translated[0], skip_special_tokens=True)
else:
# Translate from Darija to English (reverse translation)
inputs = tokenizer_eng_to_darija(eng_text, return_tensors="pt", padding=True)
translated = model_eng_to_darija.generate(**inputs)
translated_text = tokenizer_eng_to_darija.decode(translated[0], skip_special_tokens=True)
return translated_text
# Gradio interface setup without max new tokens
def respond(message, translation_choice: str):
# Translate based on the user's choice
if translation_choice == "Moroccan Arabic to MSA":
return translate_darija_to_msa(message)
elif translation_choice == "English to Moroccan Arabic":
return translate_eng_to_darija(message, direction="eng_to_darija")
demo = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(value="", label="Enter Your Text", placeholder="Type your sentence here..."),
gr.Dropdown(
label="Choose Translation Direction",
choices=["English to Moroccan Arabic", "Moroccan Arabic to MSA"],
value="English to Moroccan Arabic"
),
],
outputs="text"
)
# Launch the interface
demo.launch()
|