Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
3 |
+
from gtts import gTTS
|
4 |
+
import tempfile
|
5 |
+
|
6 |
+
# Function to initialize models with exception handling
|
7 |
+
def initialize_model():
|
8 |
+
try:
|
9 |
+
# Load ASR (Automatic Speech Recognition) model for voice-to-text
|
10 |
+
asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
|
11 |
+
|
12 |
+
# Load Translation model (supports multiple language pairs)
|
13 |
+
translation_model = pipeline("translation", model="Helsinki-NLP/opus-mt-en-mul") # Choose a supported model
|
14 |
+
|
15 |
+
# Load conversational model (fine-tuned on dialogues)
|
16 |
+
model_name = "microsoft/DialoGPT-medium" # Example conversational model
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
18 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
19 |
+
|
20 |
+
return asr_model, translation_model, tokenizer, model
|
21 |
+
except Exception as e:
|
22 |
+
print(f"Error initializing models: {e}")
|
23 |
+
return None, None, None, None
|
24 |
+
|
25 |
+
# Initialize the models
|
26 |
+
asr_model, translation_model, tokenizer, conversation_model = initialize_model()
|
27 |
+
|
28 |
+
def chatbot_speech_to_speech(audio_input, target_language):
|
29 |
+
try:
|
30 |
+
# Step 1: Convert Audio to Text
|
31 |
+
text_input = asr_model(audio_input)["text"]
|
32 |
+
|
33 |
+
# Step 2: Translate Text to English if the input language is not English
|
34 |
+
if target_language != "en":
|
35 |
+
translated_text = translation_model(text_input, src_lang=target_language, tgt_lang="en")[0]['translation_text']
|
36 |
+
else:
|
37 |
+
translated_text = text_input
|
38 |
+
|
39 |
+
# Step 3: Generate conversational response using the dialogue model
|
40 |
+
inputs = tokenizer.encode(translated_text + tokenizer.eos_token, return_tensors='pt')
|
41 |
+
response_ids = conversation_model.generate(inputs, max_length=100, pad_token_id=tokenizer.eos_token_id)
|
42 |
+
response_text = tokenizer.decode(response_ids[:, inputs.shape[-1]:][0], skip_special_tokens=True)
|
43 |
+
|
44 |
+
# Step 4: Translate the response text back to the target language
|
45 |
+
if target_language != "en":
|
46 |
+
final_response = translation_model(response_text, src_lang="en", tgt_lang=target_language)[0]['translation_text']
|
47 |
+
else:
|
48 |
+
final_response = response_text
|
49 |
+
|
50 |
+
# Step 5: Convert text to speech using gTTS
|
51 |
+
tts = gTTS(final_response, lang=target_language)
|
52 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
53 |
+
tts.save(temp_file.name)
|
54 |
+
|
55 |
+
return temp_file.name
|
56 |
+
except Exception as e:
|
57 |
+
return f"Error in processing: {e}"
|
58 |
+
|
59 |
+
# Gradio Interface Function
|
60 |
+
def interface(audio, language):
|
61 |
+
result = chatbot_speech_to_speech(audio, language)
|
62 |
+
return result
|
63 |
+
|
64 |
+
# Define the Gradio app with Blocks using the latest syntax
|
65 |
+
with gr.Blocks() as gradio_ui:
|
66 |
+
gr.Markdown("# Multilingual Voice-to-Voice Chatbot for Kids")
|
67 |
+
gr.Markdown("### Speak to the chatbot in your selected language and receive a spoken response.")
|
68 |
+
|
69 |
+
audio_input = gr.Audio(type="filepath", label="Record your message")
|
70 |
+
language_dropdown = gr.Dropdown(choices=["en", "fr", "es", "de", "zh", "ur"], label="Select Language")
|
71 |
+
|
72 |
+
result_audio = gr.Audio(type="filepath", label="Chatbot Response")
|
73 |
+
|
74 |
+
|
75 |
+
submit_btn = gr.Button("Submit")
|
76 |
+
submit_btn.click(fn=interface, inputs=[audio_input, language_dropdown], outputs=result_audio)
|
77 |
+
|
78 |
+
# Launch the app
|
79 |
+
if asr_model and translation_model and tokenizer and conversation_model:
|
80 |
+
gradio_ui.launch()
|
81 |
+
else:
|
82 |
+
print("Error initializing one or more models. Please check your model configuration.")
|