Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,6 @@ import tempfile
|
|
13 |
from typing import Union, Dict, List
|
14 |
from contextlib import contextmanager
|
15 |
import requests
|
16 |
-
from enum import Enum
|
17 |
|
18 |
# Constants
|
19 |
DEFAULT_LANGUAGE = "English"
|
@@ -38,12 +37,8 @@ class OCRProcessor:
|
|
38 |
|
39 |
@staticmethod
|
40 |
def _encode_image(image_path: str) -> str:
|
41 |
-
|
42 |
-
|
43 |
-
return base64.b64encode(image_file.read()).decode('utf-8')
|
44 |
-
except Exception as e:
|
45 |
-
logger.error(f"Error encoding image {image_path}: {str(e)}")
|
46 |
-
raise
|
47 |
|
48 |
@staticmethod
|
49 |
@contextmanager
|
@@ -73,23 +68,18 @@ class OCRProcessor:
|
|
73 |
logger.error(f"Chat complete API call failed: {str(e)}")
|
74 |
raise
|
75 |
|
76 |
-
def _get_file_content(self, file_input: Union[str,
|
77 |
-
|
78 |
-
if
|
79 |
# Handle URLs
|
80 |
-
response = requests.get(file_input
|
81 |
response.raise_for_status()
|
82 |
return response.content
|
83 |
-
|
|
|
84 |
with open(file_input, "rb") as f:
|
85 |
return f.read()
|
86 |
-
|
87 |
-
return file_input.read()
|
88 |
-
else:
|
89 |
-
raise ValueError("Invalid file input: must be a URL, path, or file-like object")
|
90 |
-
except Exception as e:
|
91 |
-
logger.error(f"Error getting file content: {str(e)}")
|
92 |
-
raise
|
93 |
|
94 |
def ocr_pdf_url(self, pdf_url: str) -> str:
|
95 |
logger.info(f"Processing PDF URL: {pdf_url}")
|
@@ -99,7 +89,7 @@ class OCRProcessor:
|
|
99 |
except Exception as e:
|
100 |
return self._handle_error("PDF URL processing", e)
|
101 |
|
102 |
-
def ocr_uploaded_pdf(self, pdf_file: Union[str,
|
103 |
file_name = getattr(pdf_file, 'name', 'unknown')
|
104 |
logger.info(f"Processing uploaded PDF: {file_name}")
|
105 |
try:
|
@@ -123,7 +113,7 @@ class OCRProcessor:
|
|
123 |
except Exception as e:
|
124 |
return self._handle_error("image URL processing", e)
|
125 |
|
126 |
-
def ocr_uploaded_image(self, image_file: Union[str,
|
127 |
file_name = getattr(image_file, 'name', 'unknown')
|
128 |
logger.info(f"Processing uploaded image: {file_name}")
|
129 |
try:
|
@@ -148,7 +138,7 @@ class OCRProcessor:
|
|
148 |
except Exception as e:
|
149 |
return self._handle_error("document understanding", e)
|
150 |
|
151 |
-
def structured_ocr(self, image_file: Union[str,
|
152 |
file_name = getattr(image_file, 'name', 'unknown')
|
153 |
logger.info(f"Processing structured OCR for: {file_name}")
|
154 |
try:
|
@@ -175,24 +165,19 @@ class OCRProcessor:
|
|
175 |
temperature=0
|
176 |
)
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
response_dict = {}
|
186 |
-
return self._format_structured_response(temp_path, response_dict)
|
187 |
except Exception as e:
|
188 |
return self._handle_error("structured OCR", e)
|
189 |
|
190 |
@staticmethod
|
191 |
def _extract_markdown(response: OCRResponse) -> str:
|
192 |
-
|
193 |
-
return response.pages[0].markdown if response.pages else "No text extracted"
|
194 |
-
except AttributeError:
|
195 |
-
return "Invalid OCR response format"
|
196 |
|
197 |
@staticmethod
|
198 |
def _handle_error(context: str, error: Exception) -> str:
|
@@ -202,8 +187,8 @@ class OCRProcessor:
|
|
202 |
@staticmethod
|
203 |
def _format_structured_response(file_path: str, content: Dict) -> str:
|
204 |
languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
|
205 |
-
valid_langs = [l for l in
|
206 |
-
|
207 |
response = {
|
208 |
"file_name": Path(file_path).name,
|
209 |
"topics": content.get("topics", []),
|
@@ -232,7 +217,7 @@ def create_interface():
|
|
232 |
except Exception as e:
|
233 |
return None, f"**Error:** Unexpected error: {str(e)}"
|
234 |
|
235 |
-
processor_state = gr.State(
|
236 |
api_status = gr.Markdown("API key not set. Please enter and set your key.")
|
237 |
|
238 |
set_api_button = gr.Button("Set API Key")
|
|
|
13 |
from typing import Union, Dict, List
|
14 |
from contextlib import contextmanager
|
15 |
import requests
|
|
|
16 |
|
17 |
# Constants
|
18 |
DEFAULT_LANGUAGE = "English"
|
|
|
37 |
|
38 |
@staticmethod
|
39 |
def _encode_image(image_path: str) -> str:
|
40 |
+
with open(image_path, "rb") as image_file:
|
41 |
+
return base64.b64encode(image_file.read()).decode('utf-8')
|
|
|
|
|
|
|
|
|
42 |
|
43 |
@staticmethod
|
44 |
@contextmanager
|
|
|
68 |
logger.error(f"Chat complete API call failed: {str(e)}")
|
69 |
raise
|
70 |
|
71 |
+
def _get_file_content(self, file_input: Union[str, bytes]) -> bytes:
|
72 |
+
if isinstance(file_input, str):
|
73 |
+
if file_input.startswith("http"):
|
74 |
# Handle URLs
|
75 |
+
response = requests.get(file_input)
|
76 |
response.raise_for_status()
|
77 |
return response.content
|
78 |
+
else:
|
79 |
+
# Handle local file paths
|
80 |
with open(file_input, "rb") as f:
|
81 |
return f.read()
|
82 |
+
return file_input.read() if hasattr(file_input, 'read') else file_input
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
def ocr_pdf_url(self, pdf_url: str) -> str:
|
85 |
logger.info(f"Processing PDF URL: {pdf_url}")
|
|
|
89 |
except Exception as e:
|
90 |
return self._handle_error("PDF URL processing", e)
|
91 |
|
92 |
+
def ocr_uploaded_pdf(self, pdf_file: Union[str, bytes]) -> str:
|
93 |
file_name = getattr(pdf_file, 'name', 'unknown')
|
94 |
logger.info(f"Processing uploaded PDF: {file_name}")
|
95 |
try:
|
|
|
113 |
except Exception as e:
|
114 |
return self._handle_error("image URL processing", e)
|
115 |
|
116 |
+
def ocr_uploaded_image(self, image_file: Union[str, bytes]) -> str:
|
117 |
file_name = getattr(image_file, 'name', 'unknown')
|
118 |
logger.info(f"Processing uploaded image: {file_name}")
|
119 |
try:
|
|
|
138 |
except Exception as e:
|
139 |
return self._handle_error("document understanding", e)
|
140 |
|
141 |
+
def structured_ocr(self, image_file: Union[str, bytes]) -> str:
|
142 |
file_name = getattr(image_file, 'name', 'unknown')
|
143 |
logger.info(f"Processing structured OCR for: {file_name}")
|
144 |
try:
|
|
|
165 |
temperature=0
|
166 |
)
|
167 |
|
168 |
+
# Ensure the response is a dictionary
|
169 |
+
response_content = chat_response.choices[0].message.content
|
170 |
+
if isinstance(response_content, list):
|
171 |
+
response_content = response_content[0] if response_content else "{}"
|
172 |
+
|
173 |
+
content = json.loads(response_content)
|
174 |
+
return self._format_structured_response(temp_path, content)
|
|
|
|
|
175 |
except Exception as e:
|
176 |
return self._handle_error("structured OCR", e)
|
177 |
|
178 |
@staticmethod
|
179 |
def _extract_markdown(response: OCRResponse) -> str:
|
180 |
+
return response.pages[0].markdown if response.pages else "No text extracted"
|
|
|
|
|
|
|
181 |
|
182 |
@staticmethod
|
183 |
def _handle_error(context: str, error: Exception) -> str:
|
|
|
187 |
@staticmethod
|
188 |
def _format_structured_response(file_path: str, content: Dict) -> str:
|
189 |
languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
|
190 |
+
valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
|
191 |
+
|
192 |
response = {
|
193 |
"file_name": Path(file_path).name,
|
194 |
"topics": content.get("topics", []),
|
|
|
217 |
except Exception as e:
|
218 |
return None, f"**Error:** Unexpected error: {str(e)}"
|
219 |
|
220 |
+
processor_state = gr.State()
|
221 |
api_status = gr.Markdown("API key not set. Please enter and set your key.")
|
222 |
|
223 |
set_api_button = gr.Button("Set API Key")
|