Ubuntu commited on
Commit
d8c4e42
·
1 Parent(s): a26be94

Remove max tokens slider and simplify interface

Browse files
Files changed (1) hide show
  1. app.py +33 -38
app.py CHANGED
@@ -1,55 +1,49 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
 
5
  # Load both translation models from Hugging Face
6
  # English to Moroccan Arabic (Darija)
7
- tokenizer_eng_to_darija = AutoTokenizer.from_pretrained("Saidtaoussi/AraT5_Darija_to_MSA")
8
- model_eng_to_darija = AutoModelForSeq2SeqLM.from_pretrained("Saidtaoussi/AraT5_Darija_to_MSA")
9
 
10
  # Moroccan Arabic (Darija) to Modern Standard Arabic (MSA)
11
- tokenizer_darija_to_msa = AutoTokenizer.from_pretrained("lachkarsalim/Helsinki-translation-English_Moroccan-Arabic")
12
- model_darija_to_msa = AutoModelForSeq2SeqLM.from_pretrained("lachkarsalim/Helsinki-translation-English_Moroccan-Arabic")
13
-
14
- def respond(
15
- message,
16
- history: list[tuple[str, str]],
17
- system_message,
18
- max_tokens,
19
- translation_choice: str,
20
- ):
21
- # Ensure there's no empty input
22
- if not message.strip():
23
- return "Error: Please enter a valid text to translate."
24
-
25
- # Initialize the response variable
26
- response = ""
27
-
 
 
 
 
 
 
 
 
 
28
  # Translate based on the user's choice
29
- try:
30
- if translation_choice == "Moroccan Arabic to MSA":
31
- # Translate Moroccan Arabic (Darija) to Modern Standard Arabic
32
- inputs = tokenizer_darija_to_msa(message, return_tensors="pt", padding=True)
33
- outputs = model_darija_to_msa.generate(inputs["input_ids"], num_beams=5, max_length=max_tokens, early_stopping=True)
34
- response = tokenizer_darija_to_msa.decode(outputs[0], skip_special_tokens=True)
35
-
36
- elif translation_choice == "English to Moroccan Arabic":
37
- # Translate English to Moroccan Arabic (Darija)
38
- inputs = tokenizer_eng_to_darija(message, return_tensors="pt", padding=True)
39
- outputs = model_eng_to_darija.generate(inputs["input_ids"], num_beams=5, max_length=max_tokens, early_stopping=True)
40
- response = tokenizer_eng_to_darija.decode(outputs[0], skip_special_tokens=True)
41
- except Exception as e:
42
- response = f"Error occurred: {str(e)}"
43
 
44
- return response
45
 
46
-
47
- # Gradio interface setup without pre-filled system message
48
  demo = gr.Interface(
49
  fn=respond,
50
  inputs=[
51
  gr.Textbox(value="", label="Enter Your Text", placeholder="Type your sentence here..."),
52
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
53
  gr.Dropdown(
54
  label="Choose Translation Direction",
55
  choices=["English to Moroccan Arabic", "Moroccan Arabic to MSA"],
@@ -61,3 +55,4 @@ demo = gr.Interface(
61
 
62
  # Launch the interface
63
  demo.launch()
 
 
1
  import gradio as gr
 
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
  # Load both translation models from Hugging Face
5
  # English to Moroccan Arabic (Darija)
6
+ tokenizer_eng_to_darija = AutoTokenizer.from_pretrained("lachkarsalim/Helsinki-translation-English_Moroccan-Arabic")
7
+ model_eng_to_darija = AutoModelForSeq2SeqLM.from_pretrained("lachkarsalim/Helsinki-translation-English_Moroccan-Arabic")
8
 
9
  # Moroccan Arabic (Darija) to Modern Standard Arabic (MSA)
10
+ tokenizer_darija_to_msa = AutoTokenizer.from_pretrained("Saidtaoussi/AraT5_Darija_to_MSA")
11
+ model_darija_to_msa = AutoModelForSeq2SeqLM.from_pretrained("Saidtaoussi/AraT5_Darija_to_MSA")
12
+
13
+ # Translation function for Darija to MSA
14
+ def translate_darija_to_msa(darija_text):
15
+ inputs = tokenizer_darija_to_msa(darija_text, return_tensors="pt", padding=True)
16
+ translated = model_darija_to_msa.generate(**inputs)
17
+ translated_text = tokenizer_darija_to_msa.decode(translated[0], skip_special_tokens=True)
18
+ return translated_text
19
+
20
+ # Translation function for English to Moroccan Arabic and vice versa
21
+ def translate_eng_to_darija(eng_text, direction="eng_to_darija"):
22
+ if direction == "eng_to_darija":
23
+ inputs = tokenizer_eng_to_darija(eng_text, return_tensors="pt", padding=True)
24
+ translated = model_eng_to_darija.generate(**inputs)
25
+ translated_text = tokenizer_eng_to_darija.decode(translated[0], skip_special_tokens=True)
26
+ else:
27
+ # Translate from Darija to English (reverse translation)
28
+ inputs = tokenizer_eng_to_darija(eng_text, return_tensors="pt", padding=True)
29
+ translated = model_eng_to_darija.generate(**inputs)
30
+ translated_text = tokenizer_eng_to_darija.decode(translated[0], skip_special_tokens=True)
31
+ return translated_text
32
+
33
+
34
+ # Gradio interface setup without max new tokens
35
+ def respond(message, translation_choice: str):
36
  # Translate based on the user's choice
37
+ if translation_choice == "Moroccan Arabic to MSA":
38
+ return translate_darija_to_msa(message)
39
+ elif translation_choice == "English to Moroccan Arabic":
40
+ return translate_eng_to_darija(message, direction="eng_to_darija")
 
 
 
 
 
 
 
 
 
 
41
 
 
42
 
 
 
43
  demo = gr.Interface(
44
  fn=respond,
45
  inputs=[
46
  gr.Textbox(value="", label="Enter Your Text", placeholder="Type your sentence here..."),
 
47
  gr.Dropdown(
48
  label="Choose Translation Direction",
49
  choices=["English to Moroccan Arabic", "Moroccan Arabic to MSA"],
 
55
 
56
  # Launch the interface
57
  demo.launch()
58
+