Reality123b commited on
Commit
8dca5f4
·
verified ·
1 Parent(s): 7ab4fbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -12
app.py CHANGED
@@ -4,6 +4,7 @@ import requests
4
  import gradio as gr
5
  from huggingface_hub import InferenceClient
6
  from dataclasses import dataclass
 
7
  import pytesseract
8
  from PIL import Image
9
 
@@ -215,18 +216,44 @@ class XylariaChat:
215
  prompt += f"<|assistant|>\n{msg['content']}<|end|>\n"
216
  prompt += "<|assistant|>\n" # Start of assistant's turn
217
  return prompt
218
-
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  def create_interface(self):
221
- def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
222
 
 
 
 
 
 
 
223
  ocr_text = ""
 
224
  if math_ocr_image_path:
225
  ocr_text = self.perform_math_ocr(math_ocr_image_path)
226
  if ocr_text.startswith("Error"):
227
- # Handle OCR error
228
  updated_history = chat_history + [[message, ocr_text]]
229
- yield "", updated_history, None, None
 
 
 
 
 
230
  return
231
  else:
232
  message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
@@ -242,7 +269,7 @@ class XylariaChat:
242
  if isinstance(response_stream, str):
243
  # Return immediately with the error message
244
  updated_history = chat_history + [[message, response_stream]]
245
- yield "", updated_history, None, None
246
  return
247
 
248
  # Prepare for streaming response
@@ -258,12 +285,12 @@ class XylariaChat:
258
 
259
  # Update the last message in chat history with partial response
260
  updated_history[-1][1] = full_response
261
- yield "", updated_history, None, None
262
  except Exception as e:
263
  print(f"Streaming error: {e}")
264
  # Display error in the chat interface
265
  updated_history[-1][1] = f"Error during response: {e}"
266
- yield "", updated_history, None, None
267
  return
268
 
269
  # Update conversation history
@@ -324,6 +351,16 @@ class XylariaChat:
324
  transform: translateY(0);
325
  }
326
  }
 
 
 
 
 
 
 
 
 
 
327
  """
328
 
329
  with gr.Blocks(theme='soft', css=custom_css) as demo:
@@ -336,7 +373,7 @@ class XylariaChat:
336
  )
337
 
338
  # Enhanced Image Upload Section
339
- with gr.Accordion("Image Input", open=False):
340
  with gr.Row(elem_classes="image-container"): # Use a Row for side-by-side layout
341
  with gr.Column(elem_classes="image-upload"):
342
  img = gr.Image(
@@ -362,6 +399,12 @@ class XylariaChat:
362
  placeholder="Type your message...",
363
  container=False
364
  )
 
 
 
 
 
 
365
  btn = gr.Button("Send", scale=1)
366
 
367
  # Clear history and memory buttons
@@ -372,13 +415,13 @@ class XylariaChat:
372
  # Submit functionality with streaming and image support
373
  btn.click(
374
  fn=streaming_response,
375
- inputs=[txt, chatbot, img, math_ocr_img],
376
- outputs=[txt, chatbot, img, math_ocr_img]
377
  )
378
  txt.submit(
379
  fn=streaming_response,
380
- inputs=[txt, chatbot, img, math_ocr_img],
381
- outputs=[txt, chatbot, img, math_ocr_img]
382
  )
383
 
384
  # Clear conversation history
@@ -396,6 +439,21 @@ class XylariaChat:
396
  outputs=[chatbot],
397
  queue=False
398
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
  # Ensure memory is cleared when the interface is closed
401
  demo.load(self.reset_conversation, None, None)
 
4
  import gradio as gr
5
  from huggingface_hub import InferenceClient
6
  from dataclasses import dataclass
7
+ import speech_recognition as sr # Import speech_recognition
8
  import pytesseract
9
  from PIL import Image
10
 
 
216
  prompt += f"<|assistant|>\n{msg['content']}<|end|>\n"
217
  prompt += "<|assistant|>\n" # Start of assistant's turn
218
  return prompt
 
219
 
220
+ def recognize_speech(self, audio_file):
221
+ """
222
+ Transcribes audio to text using speech_recognition library.
223
+ """
224
+ recognizer = sr.Recognizer()
225
+
226
+ try:
227
+ with sr.AudioFile(audio_file) as source:
228
+ audio_data = recognizer.record(source)
229
+ text = recognizer.recognize_google(audio_data) # Using Google Web Speech API
230
+ return text
231
+ except sr.UnknownValueError:
232
+ return "Could not understand audio"
233
+ except sr.RequestError:
234
+ return "Could not request results from Google Speech Recognition service"
235
+
236
  def create_interface(self):
237
+ def streaming_response(message, chat_history, image_filepath, math_ocr_image_path, audio_file):
238
 
239
+ # Speech Recognition (if audio is uploaded)
240
+ if audio_file:
241
+ voice_message = self.recognize_speech(audio_file)
242
+ if not voice_message.startswith("Error"):
243
+ message = voice_message # Use transcribed text as the message
244
+
245
  ocr_text = ""
246
+ # OCR (with output size check)
247
  if math_ocr_image_path:
248
  ocr_text = self.perform_math_ocr(math_ocr_image_path)
249
  if ocr_text.startswith("Error"):
 
250
  updated_history = chat_history + [[message, ocr_text]]
251
+ yield "", updated_history, None, None, None
252
+ return
253
+ elif len(ocr_text) > 500: # Check if OCR output is too large
254
+ ocr_text = "OCR output is too large to be processed."
255
+ updated_history = chat_history + [[message, ocr_text]]
256
+ yield "", updated_history, None, None, None
257
  return
258
  else:
259
  message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
 
269
  if isinstance(response_stream, str):
270
  # Return immediately with the error message
271
  updated_history = chat_history + [[message, response_stream]]
272
+ yield "", updated_history, None, None, None
273
  return
274
 
275
  # Prepare for streaming response
 
285
 
286
  # Update the last message in chat history with partial response
287
  updated_history[-1][1] = full_response
288
+ yield "", updated_history, None, None, None
289
  except Exception as e:
290
  print(f"Streaming error: {e}")
291
  # Display error in the chat interface
292
  updated_history[-1][1] = f"Error during response: {e}"
293
+ yield "", updated_history, None, None, None
294
  return
295
 
296
  # Update conversation history
 
351
  transform: translateY(0);
352
  }
353
  }
354
+ /* Accordion Animation */
355
+ .gradio-accordion {
356
+ overflow: hidden;
357
+ transition: max-height 0.3s ease-in-out; /* Adjust duration as needed */
358
+ max-height: 0; /* Initially collapsed */
359
+ }
360
+
361
+ .gradio-accordion.open {
362
+ max-height: 500px; /* Adjust to expected max height of content */
363
+ }
364
  """
365
 
366
  with gr.Blocks(theme='soft', css=custom_css) as demo:
 
373
  )
374
 
375
  # Enhanced Image Upload Section
376
+ with gr.Accordion("Image Input", open=False) as accordion:
377
  with gr.Row(elem_classes="image-container"): # Use a Row for side-by-side layout
378
  with gr.Column(elem_classes="image-upload"):
379
  img = gr.Image(
 
399
  placeholder="Type your message...",
400
  container=False
401
  )
402
+ with gr.Column(scale=1):
403
+ audio_input = gr.Audio(
404
+ source="microphone",
405
+ type="filepath",
406
+ label="Voice Input"
407
+ )
408
  btn = gr.Button("Send", scale=1)
409
 
410
  # Clear history and memory buttons
 
415
  # Submit functionality with streaming and image support
416
  btn.click(
417
  fn=streaming_response,
418
+ inputs=[txt, chatbot, img, math_ocr_img, audio_input],
419
+ outputs=[txt, chatbot, img, math_ocr_img, audio_input]
420
  )
421
  txt.submit(
422
  fn=streaming_response,
423
+ inputs=[txt, chatbot, img, math_ocr_img, audio_input],
424
+ outputs=[txt, chatbot, img, math_ocr_img, audio_input]
425
  )
426
 
427
  # Clear conversation history
 
439
  outputs=[chatbot],
440
  queue=False
441
  )
442
+
443
+ # Accordion animation JavaScript
444
+ demo.load(None, None, None, _js="""
445
+ () => {
446
+ const accordion = document.querySelector(".gradio-accordion");
447
+
448
+ if (accordion) {
449
+ const accordionHeader = accordion.querySelector(".label-wrap");
450
+
451
+ accordionHeader.addEventListener("click", () => {
452
+ accordion.classList.toggle("open");
453
+ });
454
+ }
455
+ }
456
+ """)
457
 
458
  # Ensure memory is cleared when the interface is closed
459
  demo.load(self.reset_conversation, None, None)