Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import requests
|
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import InferenceClient
|
6 |
from dataclasses import dataclass
|
|
|
7 |
import pytesseract
|
8 |
from PIL import Image
|
9 |
|
@@ -215,18 +216,44 @@ class XylariaChat:
|
|
215 |
prompt += f"<|assistant|>\n{msg['content']}<|end|>\n"
|
216 |
prompt += "<|assistant|>\n" # Start of assistant's turn
|
217 |
return prompt
|
218 |
-
|
219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
def create_interface(self):
|
221 |
-
def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
|
222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
ocr_text = ""
|
|
|
224 |
if math_ocr_image_path:
|
225 |
ocr_text = self.perform_math_ocr(math_ocr_image_path)
|
226 |
if ocr_text.startswith("Error"):
|
227 |
-
# Handle OCR error
|
228 |
updated_history = chat_history + [[message, ocr_text]]
|
229 |
-
yield "", updated_history, None, None
|
|
|
|
|
|
|
|
|
|
|
230 |
return
|
231 |
else:
|
232 |
message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
|
@@ -242,7 +269,7 @@ class XylariaChat:
|
|
242 |
if isinstance(response_stream, str):
|
243 |
# Return immediately with the error message
|
244 |
updated_history = chat_history + [[message, response_stream]]
|
245 |
-
yield "", updated_history, None, None
|
246 |
return
|
247 |
|
248 |
# Prepare for streaming response
|
@@ -258,12 +285,12 @@ class XylariaChat:
|
|
258 |
|
259 |
# Update the last message in chat history with partial response
|
260 |
updated_history[-1][1] = full_response
|
261 |
-
yield "", updated_history, None, None
|
262 |
except Exception as e:
|
263 |
print(f"Streaming error: {e}")
|
264 |
# Display error in the chat interface
|
265 |
updated_history[-1][1] = f"Error during response: {e}"
|
266 |
-
yield "", updated_history, None, None
|
267 |
return
|
268 |
|
269 |
# Update conversation history
|
@@ -324,6 +351,16 @@ class XylariaChat:
|
|
324 |
transform: translateY(0);
|
325 |
}
|
326 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
"""
|
328 |
|
329 |
with gr.Blocks(theme='soft', css=custom_css) as demo:
|
@@ -336,7 +373,7 @@ class XylariaChat:
|
|
336 |
)
|
337 |
|
338 |
# Enhanced Image Upload Section
|
339 |
-
with gr.Accordion("Image Input", open=False):
|
340 |
with gr.Row(elem_classes="image-container"): # Use a Row for side-by-side layout
|
341 |
with gr.Column(elem_classes="image-upload"):
|
342 |
img = gr.Image(
|
@@ -362,6 +399,12 @@ class XylariaChat:
|
|
362 |
placeholder="Type your message...",
|
363 |
container=False
|
364 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
365 |
btn = gr.Button("Send", scale=1)
|
366 |
|
367 |
# Clear history and memory buttons
|
@@ -372,13 +415,13 @@ class XylariaChat:
|
|
372 |
# Submit functionality with streaming and image support
|
373 |
btn.click(
|
374 |
fn=streaming_response,
|
375 |
-
inputs=[txt, chatbot, img, math_ocr_img],
|
376 |
-
outputs=[txt, chatbot, img, math_ocr_img]
|
377 |
)
|
378 |
txt.submit(
|
379 |
fn=streaming_response,
|
380 |
-
inputs=[txt, chatbot, img, math_ocr_img],
|
381 |
-
outputs=[txt, chatbot, img, math_ocr_img]
|
382 |
)
|
383 |
|
384 |
# Clear conversation history
|
@@ -396,6 +439,21 @@ class XylariaChat:
|
|
396 |
outputs=[chatbot],
|
397 |
queue=False
|
398 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
# Ensure memory is cleared when the interface is closed
|
401 |
demo.load(self.reset_conversation, None, None)
|
|
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import InferenceClient
|
6 |
from dataclasses import dataclass
|
7 |
+
import speech_recognition as sr # Import speech_recognition
|
8 |
import pytesseract
|
9 |
from PIL import Image
|
10 |
|
|
|
216 |
prompt += f"<|assistant|>\n{msg['content']}<|end|>\n"
|
217 |
prompt += "<|assistant|>\n" # Start of assistant's turn
|
218 |
return prompt
|
|
|
219 |
|
220 |
+
def recognize_speech(self, audio_file):
|
221 |
+
"""
|
222 |
+
Transcribes audio to text using speech_recognition library.
|
223 |
+
"""
|
224 |
+
recognizer = sr.Recognizer()
|
225 |
+
|
226 |
+
try:
|
227 |
+
with sr.AudioFile(audio_file) as source:
|
228 |
+
audio_data = recognizer.record(source)
|
229 |
+
text = recognizer.recognize_google(audio_data) # Using Google Web Speech API
|
230 |
+
return text
|
231 |
+
except sr.UnknownValueError:
|
232 |
+
return "Could not understand audio"
|
233 |
+
except sr.RequestError:
|
234 |
+
return "Could not request results from Google Speech Recognition service"
|
235 |
+
|
236 |
def create_interface(self):
|
237 |
+
def streaming_response(message, chat_history, image_filepath, math_ocr_image_path, audio_file):
|
238 |
|
239 |
+
# Speech Recognition (if audio is uploaded)
|
240 |
+
if audio_file:
|
241 |
+
voice_message = self.recognize_speech(audio_file)
|
242 |
+
if not voice_message.startswith("Error"):
|
243 |
+
message = voice_message # Use transcribed text as the message
|
244 |
+
|
245 |
ocr_text = ""
|
246 |
+
# OCR (with output size check)
|
247 |
if math_ocr_image_path:
|
248 |
ocr_text = self.perform_math_ocr(math_ocr_image_path)
|
249 |
if ocr_text.startswith("Error"):
|
|
|
250 |
updated_history = chat_history + [[message, ocr_text]]
|
251 |
+
yield "", updated_history, None, None, None
|
252 |
+
return
|
253 |
+
elif len(ocr_text) > 500: # Check if OCR output is too large
|
254 |
+
ocr_text = "OCR output is too large to be processed."
|
255 |
+
updated_history = chat_history + [[message, ocr_text]]
|
256 |
+
yield "", updated_history, None, None, None
|
257 |
return
|
258 |
else:
|
259 |
message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
|
|
|
269 |
if isinstance(response_stream, str):
|
270 |
# Return immediately with the error message
|
271 |
updated_history = chat_history + [[message, response_stream]]
|
272 |
+
yield "", updated_history, None, None, None
|
273 |
return
|
274 |
|
275 |
# Prepare for streaming response
|
|
|
285 |
|
286 |
# Update the last message in chat history with partial response
|
287 |
updated_history[-1][1] = full_response
|
288 |
+
yield "", updated_history, None, None, None
|
289 |
except Exception as e:
|
290 |
print(f"Streaming error: {e}")
|
291 |
# Display error in the chat interface
|
292 |
updated_history[-1][1] = f"Error during response: {e}"
|
293 |
+
yield "", updated_history, None, None, None
|
294 |
return
|
295 |
|
296 |
# Update conversation history
|
|
|
351 |
transform: translateY(0);
|
352 |
}
|
353 |
}
|
354 |
+
/* Accordion Animation */
|
355 |
+
.gradio-accordion {
|
356 |
+
overflow: hidden;
|
357 |
+
transition: max-height 0.3s ease-in-out; /* Adjust duration as needed */
|
358 |
+
max-height: 0; /* Initially collapsed */
|
359 |
+
}
|
360 |
+
|
361 |
+
.gradio-accordion.open {
|
362 |
+
max-height: 500px; /* Adjust to expected max height of content */
|
363 |
+
}
|
364 |
"""
|
365 |
|
366 |
with gr.Blocks(theme='soft', css=custom_css) as demo:
|
|
|
373 |
)
|
374 |
|
375 |
# Enhanced Image Upload Section
|
376 |
+
with gr.Accordion("Image Input", open=False) as accordion:
|
377 |
with gr.Row(elem_classes="image-container"): # Use a Row for side-by-side layout
|
378 |
with gr.Column(elem_classes="image-upload"):
|
379 |
img = gr.Image(
|
|
|
399 |
placeholder="Type your message...",
|
400 |
container=False
|
401 |
)
|
402 |
+
with gr.Column(scale=1):
|
403 |
+
audio_input = gr.Audio(
|
404 |
+
source="microphone",
|
405 |
+
type="filepath",
|
406 |
+
label="Voice Input"
|
407 |
+
)
|
408 |
btn = gr.Button("Send", scale=1)
|
409 |
|
410 |
# Clear history and memory buttons
|
|
|
415 |
# Submit functionality with streaming and image support
|
416 |
btn.click(
|
417 |
fn=streaming_response,
|
418 |
+
inputs=[txt, chatbot, img, math_ocr_img, audio_input],
|
419 |
+
outputs=[txt, chatbot, img, math_ocr_img, audio_input]
|
420 |
)
|
421 |
txt.submit(
|
422 |
fn=streaming_response,
|
423 |
+
inputs=[txt, chatbot, img, math_ocr_img, audio_input],
|
424 |
+
outputs=[txt, chatbot, img, math_ocr_img, audio_input]
|
425 |
)
|
426 |
|
427 |
# Clear conversation history
|
|
|
439 |
outputs=[chatbot],
|
440 |
queue=False
|
441 |
)
|
442 |
+
|
443 |
+
# Accordion animation JavaScript
|
444 |
+
demo.load(None, None, None, _js="""
|
445 |
+
() => {
|
446 |
+
const accordion = document.querySelector(".gradio-accordion");
|
447 |
+
|
448 |
+
if (accordion) {
|
449 |
+
const accordionHeader = accordion.querySelector(".label-wrap");
|
450 |
+
|
451 |
+
accordionHeader.addEventListener("click", () => {
|
452 |
+
accordion.classList.toggle("open");
|
453 |
+
});
|
454 |
+
}
|
455 |
+
}
|
456 |
+
""")
|
457 |
|
458 |
# Ensure memory is cleared when the interface is closed
|
459 |
demo.load(self.reset_conversation, None, None)
|