Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,8 +4,7 @@ import requests
|
|
| 4 |
import gradio as gr
|
| 5 |
from huggingface_hub import InferenceClient
|
| 6 |
from dataclasses import dataclass
|
| 7 |
-
import
|
| 8 |
-
import easyocr
|
| 9 |
from PIL import Image
|
| 10 |
|
| 11 |
@dataclass
|
|
@@ -35,8 +34,6 @@ class XylariaChat:
|
|
| 35 |
|
| 36 |
self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin . You should think step-by-step."""
|
| 37 |
|
| 38 |
-
self.reader = easyocr.Reader(['ch_sim','en'], gpu=False)
|
| 39 |
-
|
| 40 |
def store_information(self, key, value):
|
| 41 |
self.persistent_memory[key] = value
|
| 42 |
return f"Stored: {key} = {value}"
|
|
@@ -88,8 +85,7 @@ class XylariaChat:
|
|
| 88 |
def perform_math_ocr(self, image_path):
|
| 89 |
try:
|
| 90 |
img = Image.open(image_path)
|
| 91 |
-
|
| 92 |
-
text = ' '.join([item[1] for item in result])
|
| 93 |
return text.strip()
|
| 94 |
except Exception as e:
|
| 95 |
return f"Error during Math OCR: {e}"
|
|
@@ -156,37 +152,19 @@ class XylariaChat:
|
|
| 156 |
prompt += "<|assistant|>\n"
|
| 157 |
return prompt
|
| 158 |
|
| 159 |
-
def recognize_speech(self, audio_file):
|
| 160 |
-
recognizer = sr.Recognizer()
|
| 161 |
-
|
| 162 |
-
try:
|
| 163 |
-
with sr.AudioFile(audio_file) as source:
|
| 164 |
-
audio_data = recognizer.record(source)
|
| 165 |
-
text = recognizer.recognize_google(audio_data)
|
| 166 |
-
return text
|
| 167 |
-
except sr.UnknownValueError:
|
| 168 |
-
return "Could not understand audio"
|
| 169 |
-
except sr.RequestError:
|
| 170 |
-
return "Could not request results from Google Speech Recognition service"
|
| 171 |
-
|
| 172 |
def create_interface(self):
|
| 173 |
-
def streaming_response(message, chat_history, image_filepath, math_ocr_image_path
|
| 174 |
-
if audio_file:
|
| 175 |
-
voice_message = self.recognize_speech(audio_file)
|
| 176 |
-
if not voice_message.startswith("Error"):
|
| 177 |
-
message = voice_message
|
| 178 |
-
|
| 179 |
ocr_text = ""
|
| 180 |
if math_ocr_image_path:
|
| 181 |
ocr_text = self.perform_math_ocr(math_ocr_image_path)
|
| 182 |
if ocr_text.startswith("Error"):
|
| 183 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
|
| 184 |
-
yield "", updated_history, None, None
|
| 185 |
return
|
| 186 |
elif len(ocr_text) > 500:
|
| 187 |
ocr_text = "OCR output is too large to be processed."
|
| 188 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
|
| 189 |
-
yield "", updated_history, None, None
|
| 190 |
return
|
| 191 |
else:
|
| 192 |
message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
|
|
@@ -198,7 +176,7 @@ class XylariaChat:
|
|
| 198 |
|
| 199 |
if isinstance(response_stream, str):
|
| 200 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": response_stream}]]
|
| 201 |
-
yield "", updated_history, None, None
|
| 202 |
return
|
| 203 |
|
| 204 |
full_response = ""
|
|
@@ -211,11 +189,11 @@ class XylariaChat:
|
|
| 211 |
full_response += chunk_content
|
| 212 |
|
| 213 |
updated_history[-1][1]["content"] = full_response
|
| 214 |
-
yield "", updated_history, None, None
|
| 215 |
except Exception as e:
|
| 216 |
print(f"Streaming error: {e}")
|
| 217 |
updated_history[-1][1]["content"] = f"Error during response: {e}"
|
| 218 |
-
yield "", updated_history, None, None
|
| 219 |
return
|
| 220 |
|
| 221 |
self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
|
|
@@ -313,12 +291,6 @@ class XylariaChat:
|
|
| 313 |
placeholder="Type your message...",
|
| 314 |
container=False
|
| 315 |
)
|
| 316 |
-
with gr.Column(scale=1):
|
| 317 |
-
audio_input = gr.Audio(
|
| 318 |
-
sources=["microphone"],
|
| 319 |
-
type="filepath",
|
| 320 |
-
label="Voice Input"
|
| 321 |
-
)
|
| 322 |
btn = gr.Button("Send", scale=1)
|
| 323 |
|
| 324 |
with gr.Row():
|
|
@@ -327,13 +299,13 @@ class XylariaChat:
|
|
| 327 |
|
| 328 |
btn.click(
|
| 329 |
fn=streaming_response,
|
| 330 |
-
inputs=[txt, chatbot, img, math_ocr_img
|
| 331 |
-
outputs=[txt, chatbot, img, math_ocr_img
|
| 332 |
)
|
| 333 |
txt.submit(
|
| 334 |
fn=streaming_response,
|
| 335 |
-
inputs=[txt, chatbot, img, math_ocr_img
|
| 336 |
-
outputs=[txt, chatbot, img, math_ocr_img
|
| 337 |
)
|
| 338 |
|
| 339 |
clear.click(
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
from huggingface_hub import InferenceClient
|
| 6 |
from dataclasses import dataclass
|
| 7 |
+
import pytesseract
|
|
|
|
| 8 |
from PIL import Image
|
| 9 |
|
| 10 |
@dataclass
|
|
|
|
| 34 |
|
| 35 |
self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin . You should think step-by-step."""
|
| 36 |
|
|
|
|
|
|
|
| 37 |
def store_information(self, key, value):
|
| 38 |
self.persistent_memory[key] = value
|
| 39 |
return f"Stored: {key} = {value}"
|
|
|
|
| 85 |
def perform_math_ocr(self, image_path):
|
| 86 |
try:
|
| 87 |
img = Image.open(image_path)
|
| 88 |
+
text = pytesseract.image_to_string(img)
|
|
|
|
| 89 |
return text.strip()
|
| 90 |
except Exception as e:
|
| 91 |
return f"Error during Math OCR: {e}"
|
|
|
|
| 152 |
prompt += "<|assistant|>\n"
|
| 153 |
return prompt
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
def create_interface(self):
|
| 156 |
+
def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
ocr_text = ""
|
| 158 |
if math_ocr_image_path:
|
| 159 |
ocr_text = self.perform_math_ocr(math_ocr_image_path)
|
| 160 |
if ocr_text.startswith("Error"):
|
| 161 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
|
| 162 |
+
yield "", updated_history, None, None
|
| 163 |
return
|
| 164 |
elif len(ocr_text) > 500:
|
| 165 |
ocr_text = "OCR output is too large to be processed."
|
| 166 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
|
| 167 |
+
yield "", updated_history, None, None
|
| 168 |
return
|
| 169 |
else:
|
| 170 |
message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
|
|
|
|
| 176 |
|
| 177 |
if isinstance(response_stream, str):
|
| 178 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": response_stream}]]
|
| 179 |
+
yield "", updated_history, None, None
|
| 180 |
return
|
| 181 |
|
| 182 |
full_response = ""
|
|
|
|
| 189 |
full_response += chunk_content
|
| 190 |
|
| 191 |
updated_history[-1][1]["content"] = full_response
|
| 192 |
+
yield "", updated_history, None, None
|
| 193 |
except Exception as e:
|
| 194 |
print(f"Streaming error: {e}")
|
| 195 |
updated_history[-1][1]["content"] = f"Error during response: {e}"
|
| 196 |
+
yield "", updated_history, None, None
|
| 197 |
return
|
| 198 |
|
| 199 |
self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
|
|
|
|
| 291 |
placeholder="Type your message...",
|
| 292 |
container=False
|
| 293 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
btn = gr.Button("Send", scale=1)
|
| 295 |
|
| 296 |
with gr.Row():
|
|
|
|
| 299 |
|
| 300 |
btn.click(
|
| 301 |
fn=streaming_response,
|
| 302 |
+
inputs=[txt, chatbot, img, math_ocr_img],
|
| 303 |
+
outputs=[txt, chatbot, img, math_ocr_img]
|
| 304 |
)
|
| 305 |
txt.submit(
|
| 306 |
fn=streaming_response,
|
| 307 |
+
inputs=[txt, chatbot, img, math_ocr_img],
|
| 308 |
+
outputs=[txt, chatbot, img, math_ocr_img]
|
| 309 |
)
|
| 310 |
|
| 311 |
clear.click(
|