aminahmed78 commited on
Commit
9a374f9
·
verified ·
1 Parent(s): ea75107

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
+ from gtts import gTTS
4
+ import tempfile
5
+
6
+ # Function to initialize models with exception handling
7
+ def initialize_model():
8
+ try:
9
+ # Load ASR (Automatic Speech Recognition) model for voice-to-text
10
+ asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
11
+
12
+ # Load Translation model (supports multiple language pairs)
13
+ translation_model = pipeline("translation", model="Helsinki-NLP/opus-mt-en-mul") # Choose a supported model
14
+
15
+ # Load conversational model (fine-tuned on dialogues)
16
+ model_name = "microsoft/DialoGPT-medium" # Example conversational model
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+ model = AutoModelForCausalLM.from_pretrained(model_name)
19
+
20
+ return asr_model, translation_model, tokenizer, model
21
+ except Exception as e:
22
+ print(f"Error initializing models: {e}")
23
+ return None, None, None, None
24
+
25
+ # Initialize the models
26
+ asr_model, translation_model, tokenizer, conversation_model = initialize_model()
27
+
28
+ def chatbot_speech_to_speech(audio_input, target_language):
29
+ try:
30
+ # Step 1: Convert Audio to Text
31
+ text_input = asr_model(audio_input)["text"]
32
+
33
+ # Step 2: Translate Text to English if the input language is not English
34
+ if target_language != "en":
35
+ translated_text = translation_model(text_input, src_lang=target_language, tgt_lang="en")[0]['translation_text']
36
+ else:
37
+ translated_text = text_input
38
+
39
+ # Step 3: Generate conversational response using the dialogue model
40
+ inputs = tokenizer.encode(translated_text + tokenizer.eos_token, return_tensors='pt')
41
+ response_ids = conversation_model.generate(inputs, max_length=100, pad_token_id=tokenizer.eos_token_id)
42
+ response_text = tokenizer.decode(response_ids[:, inputs.shape[-1]:][0], skip_special_tokens=True)
43
+
44
+ # Step 4: Translate the response text back to the target language
45
+ if target_language != "en":
46
+ final_response = translation_model(response_text, src_lang="en", tgt_lang=target_language)[0]['translation_text']
47
+ else:
48
+ final_response = response_text
49
+
50
+ # Step 5: Convert text to speech using gTTS
51
+ tts = gTTS(final_response, lang=target_language)
52
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
53
+ tts.save(temp_file.name)
54
+
55
+ return temp_file.name
56
+ except Exception as e:
57
+ return f"Error in processing: {e}"
58
+
59
+ # Gradio Interface Function
60
+ def interface(audio, language):
61
+ result = chatbot_speech_to_speech(audio, language)
62
+ return result
63
+
64
+ # Define the Gradio app with Blocks using the latest syntax
65
+ with gr.Blocks() as gradio_ui:
66
+ gr.Markdown("# Multilingual Voice-to-Voice Chatbot for Kids")
67
+ gr.Markdown("### Speak to the chatbot in your selected language and receive a spoken response.")
68
+
69
+ audio_input = gr.Audio(type="filepath", label="Record your message")
70
+ language_dropdown = gr.Dropdown(choices=["en", "fr", "es", "de", "zh", "ur"], label="Select Language")
71
+
72
+ result_audio = gr.Audio(type="filepath", label="Chatbot Response")
73
+
74
+
75
+ submit_btn = gr.Button("Submit")
76
+ submit_btn.click(fn=interface, inputs=[audio_input, language_dropdown], outputs=result_audio)
77
+
78
+ # Launch the app
79
+ if asr_model and translation_model and tokenizer and conversation_model:
80
+ gradio_ui.launch()
81
+ else:
82
+ print("Error initializing one or more models. Please check your model configuration.")