Spaces:

Lap1official
/

API

Running

App Files Files Community

Reality123b commited on Dec 20, 2024

Commit

21418e6

verified ·

1 Parent(s): 9977ea6

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -40

app.py CHANGED Viewed

@@ -4,8 +4,7 @@ import requests
 import gradio as gr
 from huggingface_hub import InferenceClient
 from dataclasses import dataclass
-import speech_recognition as sr
-import easyocr
 from PIL import Image
 @dataclass
@@ -35,8 +34,6 @@ class XylariaChat:
         self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin . You should think step-by-step."""
-        self.reader = easyocr.Reader(['ch_sim','en'], gpu=False)
     def store_information(self, key, value):
         self.persistent_memory[key] = value
         return f"Stored: {key} = {value}"
@@ -88,8 +85,7 @@ class XylariaChat:
     def perform_math_ocr(self, image_path):
         try:
             img = Image.open(image_path)
-            result = self.reader.readtext(image_path)
-            text = ' '.join([item[1] for item in result])
             return text.strip()
         except Exception as e:
             return f"Error during Math OCR: {e}"
@@ -156,37 +152,19 @@ class XylariaChat:
         prompt += "<|assistant|>\n"
         return prompt
-    def recognize_speech(self, audio_file):
-        recognizer = sr.Recognizer()
-        try:
-            with sr.AudioFile(audio_file) as source:
-                audio_data = recognizer.record(source)
-                text = recognizer.recognize_google(audio_data)
-                return text
-        except sr.UnknownValueError:
-            return "Could not understand audio"
-        except sr.RequestError:
-            return "Could not request results from Google Speech Recognition service"
     def create_interface(self):
-        def streaming_response(message, chat_history, image_filepath, math_ocr_image_path, audio_file):
-            if audio_file:
-                voice_message = self.recognize_speech(audio_file)
-                if not voice_message.startswith("Error"):
-                    message = voice_message
             ocr_text = ""
             if math_ocr_image_path:
                 ocr_text = self.perform_math_ocr(math_ocr_image_path)
                 if ocr_text.startswith("Error"):
                     updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
-                    yield "", updated_history, None, None, None
                     return
                 elif len(ocr_text) > 500:
                     ocr_text = "OCR output is too large to be processed."
                     updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
-                    yield "", updated_history, None, None, None
                     return
                 else:
                     message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
@@ -198,7 +176,7 @@ class XylariaChat:
             if isinstance(response_stream, str):
                 updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": response_stream}]]
-                yield "", updated_history, None, None, None
                 return
             full_response = ""
@@ -211,11 +189,11 @@ class XylariaChat:
                         full_response += chunk_content
                         updated_history[-1][1]["content"] = full_response
-                        yield "", updated_history, None, None, None
             except Exception as e:
                 print(f"Streaming error: {e}")
                 updated_history[-1][1]["content"] = f"Error during response: {e}"
-                yield "", updated_history, None, None, None
                 return
             self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
@@ -313,12 +291,6 @@ class XylariaChat:
                             placeholder="Type your message...",
                             container=False
                         )
-                    with gr.Column(scale=1):
-                        audio_input = gr.Audio(
-                            sources=["microphone"],
-                            type="filepath",
-                            label="Voice Input"
-                        )
                     btn = gr.Button("Send", scale=1)
                 with gr.Row():
@@ -327,13 +299,13 @@ class XylariaChat:
                 btn.click(
                     fn=streaming_response,
-                    inputs=[txt, chatbot, img, math_ocr_img, audio_input],
-                    outputs=[txt, chatbot, img, math_ocr_img, audio_input]
                 )
                 txt.submit(
                     fn=streaming_response,
-                    inputs=[txt, chatbot, img, math_ocr_img, audio_input],
-                    outputs=[txt, chatbot, img, math_ocr_img, audio_input]
                 )
                 clear.click(

 import gradio as gr
 from huggingface_hub import InferenceClient
 from dataclasses import dataclass
+import pytesseract
 from PIL import Image
 @dataclass
         self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin . You should think step-by-step."""
     def store_information(self, key, value):
         self.persistent_memory[key] = value
         return f"Stored: {key} = {value}"
     def perform_math_ocr(self, image_path):
         try:
             img = Image.open(image_path)
+            text = pytesseract.image_to_string(img)
             return text.strip()
         except Exception as e:
             return f"Error during Math OCR: {e}"
         prompt += "<|assistant|>\n"
         return prompt
     def create_interface(self):
+        def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
             ocr_text = ""
             if math_ocr_image_path:
                 ocr_text = self.perform_math_ocr(math_ocr_image_path)
                 if ocr_text.startswith("Error"):
                     updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
+                    yield "", updated_history, None, None
                     return
                 elif len(ocr_text) > 500:
                     ocr_text = "OCR output is too large to be processed."
                     updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
+                    yield "", updated_history, None, None
                     return
                 else:
                     message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
             if isinstance(response_stream, str):
                 updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": response_stream}]]
+                yield "", updated_history, None, None
                 return
             full_response = ""
                         full_response += chunk_content
                         updated_history[-1][1]["content"] = full_response
+                        yield "", updated_history, None, None
             except Exception as e:
                 print(f"Streaming error: {e}")
                 updated_history[-1][1]["content"] = f"Error during response: {e}"
+                yield "", updated_history, None, None
                 return
             self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
                             placeholder="Type your message...",
                             container=False
                         )
                     btn = gr.Button("Send", scale=1)
                 with gr.Row():
                 btn.click(
                     fn=streaming_response,
+                    inputs=[txt, chatbot, img, math_ocr_img],
+                    outputs=[txt, chatbot, img, math_ocr_img]
                 )
                 txt.submit(
                     fn=streaming_response,
+                    inputs=[txt, chatbot, img, math_ocr_img],
+                    outputs=[txt, chatbot, img, math_ocr_img]
                 )
                 clear.click(