File size: 3,015 Bytes
15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b 15678e0 82da87b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Load both translation models from Hugging Face
# English to Moroccan Arabic (Darija)
tokenizer_eng_to_darija = AutoTokenizer.from_pretrained("Saidtaoussi/AraT5_Darija_to_MSA")
model_eng_to_darija = AutoModelForSeq2SeqLM.from_pretrained("Saidtaoussi/AraT5_Darija_to_MSA")
# Moroccan Arabic (Darija) to Modern Standard Arabic (MSA)
tokenizer_darija_to_msa = AutoTokenizer.from_pretrained("lachkarsalim/Helsinki-translation-English_Moroccan-Arabic")
model_darija_to_msa = AutoModelForSeq2SeqLM.from_pretrained("lachkarsalim/Helsinki-translation-English_Moroccan-Arabic")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
translation_choice: str,
):
"""
Responds to the input message by selecting the translation model based on the user's choice.
"""
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]: # User message
messages.append({"role": "user", "content": val[0]})
if val[1]: # Assistant message
messages.append({"role": "assistant", "content": val[1]})
# Append the user message
messages.append({"role": "user", "content": message})
# Initialize the response variable
response = ""
# Translate based on the user's choice
if translation_choice == "Moroccan Arabic to MSA":
# Translate Moroccan Arabic (Darija) to Modern Standard Arabic
inputs = tokenizer_darija_to_msa(message, return_tensors="pt", padding=True)
outputs = model_darija_to_msa.generate(inputs["input_ids"], num_beams=5, max_length=512, early_stopping=True)
response = tokenizer_darija_to_msa.decode(outputs[0], skip_special_tokens=True)
elif translation_choice == "English to Moroccan Arabic":
# Translate English to Moroccan Arabic (Darija)
inputs = tokenizer_eng_to_darija(message, return_tensors="pt", padding=True)
outputs = model_eng_to_darija.generate(inputs["input_ids"], num_beams=5, max_length=512, early_stopping=True)
response = tokenizer_eng_to_darija.decode(outputs[0], skip_special_tokens=True)
return response
# Gradio interface setup
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
gr.Dropdown(
label="Choose Translation Direction",
choices=["English to Moroccan Arabic", "Moroccan Arabic to MSA"],
value="English to Moroccan Arabic"
),
],
)
if __name__ == "__main__":
demo.launch()
|