palbha commited on
Commit
5c8bbca
·
verified ·
1 Parent(s): 856548e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -0
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import google.generativeai as genai
4
+ from gtts import gTTS
5
+ import tempfile
6
+ import time
7
+ from google.colab import userdata
8
+
9
+ # Configure the Gemini API
10
+ GOOGLE_API_KEY = userdata.get('gemini_api') # Replace with your actual API key
11
+ genai.configure(api_key=GOOGLE_API_KEY)
12
+
13
+ # Initialize the model
14
+ model = genai.GenerativeModel('gemini-pro')
15
+
16
+ def transcribe_audio(audio_path):
17
+ """
18
+ This function uses Google's Speech-to-Text API to transcribe audio.
19
+ For the free tier, we're using a simple placeholder.
20
+ In a real application, you'd use a proper STT API here.
21
+ """
22
+ # For demonstration, we're returning a placeholder message
23
+ # In a real app, you would connect to a speech-to-text service
24
+ return "This is a placeholder for speech-to-text transcription. In a real application, this would be the transcribed text from your audio."
25
+
26
+ def text_to_speech(text):
27
+ """Convert text to speech using gTTS and return the path to the audio file"""
28
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
29
+ tts = gTTS(text=text, lang='en')
30
+ tts.save(fp.name)
31
+ return fp.name
32
+
33
+ def chat_with_gemini(user_input, history):
34
+ """
35
+ Process user input through Gemini API and return the response
36
+ """
37
+ # Initialize conversation or continue existing one
38
+ if not history:
39
+ history = []
40
+ chat = model.start_chat(history=[])
41
+ else:
42
+ # Reconstruct the chat session with history
43
+ chat = model.start_chat(history=[
44
+ {"role": "user" if i % 2 == 0 else "model", "parts": [msg]}
45
+ for i, msg in enumerate(history)
46
+ ])
47
+
48
+ # Generate response
49
+ response = chat.send_message(user_input)
50
+ response_text = response.text
51
+
52
+ # Update history
53
+ history.append(user_input)
54
+ history.append(response_text)
55
+
56
+ # Generate audio response
57
+ audio_path = text_to_speech(response_text)
58
+
59
+ return response_text, history, audio_path
60
+
61
+ def process_audio(audio, history):
62
+ """Process audio input, convert to text, and get response"""
63
+ if audio is None:
64
+ return "No audio detected", history, None
65
+
66
+ # Convert audio to text
67
+ user_input = transcribe_audio(audio)
68
+
69
+ # Get response from Gemini
70
+ response_text, new_history, audio_path = chat_with_gemini(user_input, history)
71
+
72
+ return response_text, new_history, audio_path
73
+
74
+ def process_text(text_input, history):
75
+ """Process text input and get response"""
76
+ if not text_input.strip():
77
+ return "No input detected", history, None
78
+
79
+ # Get response from Gemini
80
+ response_text, new_history, audio_path = chat_with_gemini(text_input, history)
81
+
82
+ return response_text, new_history, audio_path
83
+
84
+ def display_history(history):
85
+ """Format the history for display"""
86
+ if not history:
87
+ return "No conversation history yet."
88
+
89
+ display_text = ""
90
+ for i in range(0, len(history), 2):
91
+ if i < len(history):
92
+ display_text += f"You: {history[i]}\n\n"
93
+ if i + 1 < len(history):
94
+ display_text += f"Assistant: {history[i+1]}\n\n"
95
+
96
+ return display_text
97
+
98
+ # Create the Gradio interface
99
+ with gr.Blocks(title="Gemini Audio Chatbot") as demo:
100
+ gr.Markdown("# Gemini Audio Chatbot")
101
+ gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.")
102
+
103
+ # State for conversation history
104
+ history = gr.State([])
105
+
106
+ with gr.Row():
107
+ with gr.Column(scale=7):
108
+ # Chat history display
109
+ chat_display = gr.Markdown("No conversation history yet.")
110
+
111
+ with gr.Column(scale=3):
112
+ # Info and instructions
113
+ gr.Markdown("""
114
+ ## How to use:
115
+ 1. Speak using the microphone or type your message
116
+ 2. Wait for the assistant's response
117
+ 3. The conversation history will be displayed on the left
118
+ """)
119
+
120
+ with gr.Row():
121
+ # Text input
122
+ text_input = gr.Textbox(
123
+ placeholder="Type your message here...",
124
+ label="Text Input"
125
+ )
126
+
127
+ with gr.Row():
128
+ # Audio input
129
+ audio_input = gr.Audio(
130
+ sources=["microphone"],
131
+ type="filepath",
132
+ label="Audio Input"
133
+ )
134
+
135
+ with gr.Row():
136
+ # Assistant's response
137
+ response_text = gr.Textbox(label="Assistant's Response")
138
+
139
+ with gr.Row():
140
+ # Audio output
141
+ audio_output = gr.Audio(label="Assistant's Voice")
142
+
143
+ # Buttons
144
+ with gr.Row():
145
+ clear_btn = gr.Button("Clear Conversation")
146
+
147
+ # Event handlers
148
+ text_input.submit(
149
+ process_text,
150
+ inputs=[text_input, history],
151
+ outputs=[response_text, history, audio_output]
152
+ ).then(
153
+ display_history,
154
+ inputs=[history],
155
+ outputs=[chat_display]
156
+ ).then(
157
+ lambda: "",
158
+ outputs=[text_input]
159
+ )
160
+
161
+ audio_input.change(
162
+ process_audio,
163
+ inputs=[audio_input, history],
164
+ outputs=[response_text, history, audio_output]
165
+ ).then(
166
+ display_history,
167
+ inputs=[history],
168
+ outputs=[chat_display]
169
+ )
170
+
171
+ clear_btn.click(
172
+ lambda: ([], "No conversation history yet.", "", None),
173
+ outputs=[history, chat_display, response_text, audio_output]
174
+ )
175
+
176
+ demo.launch()