Spaces:

Lap1official
/

API

Running

App Files Files Community

Reality123b commited on Dec 20, 2024

Commit

8dca5f4

verified ·

1 Parent(s): 7ab4fbd

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -12

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import requests
 import gradio as gr
 from huggingface_hub import InferenceClient
 from dataclasses import dataclass
 import pytesseract
 from PIL import Image
@@ -215,18 +216,44 @@ class XylariaChat:
                 prompt += f"<|assistant|>\n{msg['content']}<|end|>\n"
         prompt += "<|assistant|>\n"  # Start of assistant's turn
         return prompt
     def create_interface(self):
-        def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
             ocr_text = ""
             if math_ocr_image_path:
                 ocr_text = self.perform_math_ocr(math_ocr_image_path)
                 if ocr_text.startswith("Error"):
-                    # Handle OCR error
                     updated_history = chat_history + [[message, ocr_text]]
-                    yield "", updated_history, None, None
                     return
                 else:
                     message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
@@ -242,7 +269,7 @@ class XylariaChat:
             if isinstance(response_stream, str):
                 # Return immediately with the error message
                 updated_history = chat_history + [[message, response_stream]]
-                yield "", updated_history, None, None
                 return
             # Prepare for streaming response
@@ -258,12 +285,12 @@ class XylariaChat:
                         # Update the last message in chat history with partial response
                         updated_history[-1][1] = full_response
-                        yield "", updated_history, None, None
             except Exception as e:
                 print(f"Streaming error: {e}")
                 # Display error in the chat interface
                 updated_history[-1][1] = f"Error during response: {e}"
-                yield "", updated_history, None, None
                 return
             # Update conversation history
@@ -324,6 +351,16 @@ class XylariaChat:
                 transform: translateY(0);
             }
         }
         """
         with gr.Blocks(theme='soft', css=custom_css) as demo:
@@ -336,7 +373,7 @@ class XylariaChat:
                 )
                 # Enhanced Image Upload Section
-                with gr.Accordion("Image Input", open=False):
                     with gr.Row(elem_classes="image-container"):  # Use a Row for side-by-side layout
                         with gr.Column(elem_classes="image-upload"):
                             img = gr.Image(
@@ -362,6 +399,12 @@ class XylariaChat:
                             placeholder="Type your message...",
                             container=False
                         )
                     btn = gr.Button("Send", scale=1)
                 # Clear history and memory buttons
@@ -372,13 +415,13 @@ class XylariaChat:
                 # Submit functionality with streaming and image support
                 btn.click(
                     fn=streaming_response,
-                    inputs=[txt, chatbot, img, math_ocr_img],
-                    outputs=[txt, chatbot, img, math_ocr_img]
                 )
                 txt.submit(
                     fn=streaming_response,
-                    inputs=[txt, chatbot, img, math_ocr_img],
-                    outputs=[txt, chatbot, img, math_ocr_img]
                 )
                 # Clear conversation history
@@ -396,6 +439,21 @@ class XylariaChat:
                     outputs=[chatbot],
                     queue=False
                 )
                 # Ensure memory is cleared when the interface is closed
                 demo.load(self.reset_conversation, None, None)

 import gradio as gr
 from huggingface_hub import InferenceClient
 from dataclasses import dataclass
+import speech_recognition as sr  # Import speech_recognition
 import pytesseract
 from PIL import Image
                 prompt += f"<|assistant|>\n{msg['content']}<|end|>\n"
         prompt += "<|assistant|>\n"  # Start of assistant's turn
         return prompt
+    def recognize_speech(self, audio_file):
+        """
+        Transcribes audio to text using speech_recognition library.
+        """
+        recognizer = sr.Recognizer()
+        try:
+            with sr.AudioFile(audio_file) as source:
+                audio_data = recognizer.record(source)
+                text = recognizer.recognize_google(audio_data)  # Using Google Web Speech API
+                return text
+        except sr.UnknownValueError:
+            return "Could not understand audio"
+        except sr.RequestError:
+            return "Could not request results from Google Speech Recognition service"
     def create_interface(self):
+        def streaming_response(message, chat_history, image_filepath, math_ocr_image_path, audio_file):
+            # Speech Recognition (if audio is uploaded)
+            if audio_file:
+                voice_message = self.recognize_speech(audio_file)
+                if not voice_message.startswith("Error"):
+                    message = voice_message  # Use transcribed text as the message
             ocr_text = ""
+            # OCR (with output size check)
             if math_ocr_image_path:
                 ocr_text = self.perform_math_ocr(math_ocr_image_path)
                 if ocr_text.startswith("Error"):
                     updated_history = chat_history + [[message, ocr_text]]
+                    yield "", updated_history, None, None, None
+                    return
+                elif len(ocr_text) > 500:  # Check if OCR output is too large
+                    ocr_text = "OCR output is too large to be processed."
+                    updated_history = chat_history + [[message, ocr_text]]
+                    yield "", updated_history, None, None, None
                     return
                 else:
                     message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
             if isinstance(response_stream, str):
                 # Return immediately with the error message
                 updated_history = chat_history + [[message, response_stream]]
+                yield "", updated_history, None, None, None
                 return
             # Prepare for streaming response
                         # Update the last message in chat history with partial response
                         updated_history[-1][1] = full_response
+                        yield "", updated_history, None, None, None
             except Exception as e:
                 print(f"Streaming error: {e}")
                 # Display error in the chat interface
                 updated_history[-1][1] = f"Error during response: {e}"
+                yield "", updated_history, None, None, None
                 return
             # Update conversation history
                 transform: translateY(0);
             }
         }
+        /* Accordion Animation */
+        .gradio-accordion {
+            overflow: hidden;
+            transition: max-height 0.3s ease-in-out; /* Adjust duration as needed */
+            max-height: 0; /* Initially collapsed */
+        }
+        .gradio-accordion.open {
+            max-height: 500px; /* Adjust to expected max height of content */
+        }
         """
         with gr.Blocks(theme='soft', css=custom_css) as demo:
                 )
                 # Enhanced Image Upload Section
+                with gr.Accordion("Image Input", open=False) as accordion:
                     with gr.Row(elem_classes="image-container"):  # Use a Row for side-by-side layout
                         with gr.Column(elem_classes="image-upload"):
                             img = gr.Image(
                             placeholder="Type your message...",
                             container=False
                         )
+                    with gr.Column(scale=1):
+                        audio_input = gr.Audio(
+                            source="microphone",
+                            type="filepath",
+                            label="Voice Input"
+                        )
                     btn = gr.Button("Send", scale=1)
                 # Clear history and memory buttons
                 # Submit functionality with streaming and image support
                 btn.click(
                     fn=streaming_response,
+                    inputs=[txt, chatbot, img, math_ocr_img, audio_input],
+                    outputs=[txt, chatbot, img, math_ocr_img, audio_input]
                 )
                 txt.submit(
                     fn=streaming_response,
+                    inputs=[txt, chatbot, img, math_ocr_img, audio_input],
+                    outputs=[txt, chatbot, img, math_ocr_img, audio_input]
                 )
                 # Clear conversation history
                     outputs=[chatbot],
                     queue=False
                 )
+                # Accordion animation JavaScript
+                demo.load(None, None, None, _js="""
+                () => {
+                    const accordion = document.querySelector(".gradio-accordion");
+                    if (accordion) {
+                        const accordionHeader = accordion.querySelector(".label-wrap");
+                        accordionHeader.addEventListener("click", () => {
+                            accordion.classList.toggle("open");
+                        });
+                    }
+                }
+                """)
                 # Ensure memory is cleared when the interface is closed
                 demo.load(self.reset_conversation, None, None)