palbha commited on
Commit
9203946
·
verified ·
1 Parent(s): 1131a4e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -0
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from google import genai
4
+ from gtts import gTTS
5
+ import tempfile
6
+
7
+ # Configure the Gemini API
8
+ GOOGLE_API_KEY = os.getenv("gemini_api") # Ensure your API key is set
9
+ client = genai.Client(api_key=GOOGLE_API_KEY)
10
+
11
+ def transcribe_audio(audio_path):
12
+ """
13
+ Transcribe the audio file using the Gemini API.
14
+ """
15
+ try:
16
+ # Upload the audio file
17
+ uploaded_file = client.files.upload(file=audio_path)
18
+
19
+ # Send the file to Gemini for transcription
20
+ response = client.models.generate_content(
21
+ model='gemini-2.0-flash',
22
+ contents=['Transcribe the input audio & return only the transcription.', uploaded_file]
23
+ )
24
+
25
+ print("Transcription Response:", response.text)
26
+ return response.text
27
+
28
+ except Exception as e:
29
+ print("Error in transcription:", str(e))
30
+ return "Error in transcription"
31
+
32
+ def text_to_speech(text):
33
+ """Convert text to speech using gTTS and return the path to the audio file."""
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
35
+ tts = gTTS(text=text, lang='en')
36
+ tts.save(fp.name)
37
+ return fp.name
38
+
39
+ def chat_with_gemini(user_input, history):
40
+ """
41
+ Process user input through Gemini API and return the response.
42
+ """
43
+ if history is None or not isinstance(history, list): # Ensure history is initialized
44
+ history = []
45
+
46
+ # Initialize or continue conversation
47
+ chat = client.chats.create(model="gemini-2.0-flash") if not history else history[-1]
48
+
49
+ print("User input:", user_input)
50
+
51
+ # Generate response
52
+ response = chat.send_message(user_input)
53
+ response_text = response.text
54
+ print("Response text:", response_text)
55
+
56
+ # Append to history properly
57
+ history.append((user_input, response_text))
58
+
59
+ # Generate audio response
60
+ audio_path = text_to_speech(response_text)
61
+
62
+ return response_text, history, audio_path
63
+
64
+ def process_audio(audio, history):
65
+ """Process audio input, convert to text, and get response."""
66
+ if audio is None:
67
+ return "No audio detected", history, None
68
+
69
+ # Convert audio to text
70
+ user_input = transcribe_audio(audio)
71
+
72
+ # Get response from Gemini
73
+ response_text, new_history, audio_path = chat_with_gemini(user_input, history)
74
+
75
+ return response_text, new_history, audio_path
76
+
77
+ def process_text(text_input, history):
78
+ """Process text input and get response."""
79
+ if not text_input.strip():
80
+ return "No input detected", history, None
81
+
82
+ # Get response from Gemini
83
+ response_text, new_history, audio_path = chat_with_gemini(text_input, history)
84
+
85
+ return response_text, new_history, audio_path
86
+
87
+ def display_history(history):
88
+ """Format the history for display."""
89
+ if not history:
90
+ return "No conversation history yet."
91
+
92
+ return "\n".join([f"You: {msg[0]}\nAssistant: {msg[1]}\n" for msg in history])
93
+
94
+ # Create the Gradio interface
95
+ with gr.Blocks(title="Gemini Audio Chatbot") as demo:
96
+ gr.Markdown("# Gemini Audio Chatbot")
97
+ gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.")
98
+
99
+ # State for conversation history
100
+ history = gr.State([]) # Ensuring the history persists
101
+
102
+ with gr.Row():
103
+ with gr.Column(scale=7):
104
+ chat_display = gr.Markdown("No conversation history yet.")
105
+
106
+ with gr.Column(scale=3):
107
+ gr.Markdown("""
108
+ ## How to use:
109
+ 1. Speak using the microphone or type your message
110
+ 2. Wait for the assistant's response
111
+ 3. The conversation history will be displayed on the left
112
+ """)
113
+
114
+ with gr.Row():
115
+ audio_input = gr.Audio(
116
+ sources=["microphone"],
117
+ type="filepath",
118
+ label="Audio Input"
119
+ )
120
+
121
+ with gr.Row():
122
+ text_input = gr.Textbox(label="Type your message here")
123
+
124
+ with gr.Row():
125
+ response_text = gr.Textbox(label="Assistant's Response")
126
+
127
+ with gr.Row():
128
+ audio_output = gr.Audio(label="Assistant's Voice")
129
+
130
+ # Buttons
131
+ with gr.Row():
132
+ clear_btn = gr.Button("Clear Conversation")
133
+
134
+ # Audio and Text Input Handling
135
+ audio_input.change(
136
+ process_audio,
137
+ inputs=[audio_input, history],
138
+ outputs=[response_text, history, audio_output]
139
+ ).then(
140
+ display_history,
141
+ inputs=[history],
142
+ outputs=[chat_display]
143
+ )
144
+
145
+ text_input.submit(
146
+ process_text,
147
+ inputs=[text_input, history],
148
+ outputs=[response_text, history, audio_output]
149
+ ).then(
150
+ display_history,
151
+ inputs=[history],
152
+ outputs=[chat_display]
153
+ )
154
+
155
+ # Clear conversation
156
+ clear_btn.click(
157
+ lambda: ([], "No conversation history yet.", "", None),
158
+ outputs=[history, chat_display, response_text, audio_output]
159
+ )
160
+
161
+ demo.launch()