Svngoku commited on
Commit
005a056
·
verified ·
1 Parent(s): e2c6744

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -38
app.py CHANGED
@@ -13,7 +13,6 @@ import tempfile
13
  from typing import Union, Dict, List
14
  from contextlib import contextmanager
15
  import requests
16
- from enum import Enum
17
 
18
  # Constants
19
  DEFAULT_LANGUAGE = "English"
@@ -38,12 +37,8 @@ class OCRProcessor:
38
 
39
  @staticmethod
40
  def _encode_image(image_path: str) -> str:
41
- try:
42
- with open(image_path, "rb") as image_file:
43
- return base64.b64encode(image_file.read()).decode('utf-8')
44
- except Exception as e:
45
- logger.error(f"Error encoding image {image_path}: {str(e)}")
46
- raise
47
 
48
  @staticmethod
49
  @contextmanager
@@ -73,23 +68,18 @@ class OCRProcessor:
73
  logger.error(f"Chat complete API call failed: {str(e)}")
74
  raise
75
 
76
- def _get_file_content(self, file_input: Union[str, object]) -> bytes:
77
- try:
78
- if isinstance(file_input, str) and file_input.startswith(("http://", "https://")):
79
  # Handle URLs
80
- response = requests.get(file_input, timeout=10)
81
  response.raise_for_status()
82
  return response.content
83
- elif isinstance(file_input, str): # File path
 
84
  with open(file_input, "rb") as f:
85
  return f.read()
86
- elif hasattr(file_input, 'read'): # File-like object
87
- return file_input.read()
88
- else:
89
- raise ValueError("Invalid file input: must be a URL, path, or file-like object")
90
- except Exception as e:
91
- logger.error(f"Error getting file content: {str(e)}")
92
- raise
93
 
94
  def ocr_pdf_url(self, pdf_url: str) -> str:
95
  logger.info(f"Processing PDF URL: {pdf_url}")
@@ -99,7 +89,7 @@ class OCRProcessor:
99
  except Exception as e:
100
  return self._handle_error("PDF URL processing", e)
101
 
102
- def ocr_uploaded_pdf(self, pdf_file: Union[str, object]) -> str:
103
  file_name = getattr(pdf_file, 'name', 'unknown')
104
  logger.info(f"Processing uploaded PDF: {file_name}")
105
  try:
@@ -123,7 +113,7 @@ class OCRProcessor:
123
  except Exception as e:
124
  return self._handle_error("image URL processing", e)
125
 
126
- def ocr_uploaded_image(self, image_file: Union[str, object]) -> str:
127
  file_name = getattr(image_file, 'name', 'unknown')
128
  logger.info(f"Processing uploaded image: {file_name}")
129
  try:
@@ -148,7 +138,7 @@ class OCRProcessor:
148
  except Exception as e:
149
  return self._handle_error("document understanding", e)
150
 
151
- def structured_ocr(self, image_file: Union[str, object]) -> str:
152
  file_name = getattr(image_file, 'name', 'unknown')
153
  logger.info(f"Processing structured OCR for: {file_name}")
154
  try:
@@ -175,24 +165,19 @@ class OCRProcessor:
175
  temperature=0
176
  )
177
 
178
- content = chat_response.choices[0].message.content if chat_response.choices else "{}"
179
- try:
180
- response_dict = json.loads(content)
181
- if isinstance(response_dict, list): # Handle unexpected list response
182
- response_dict = response_dict[0] if response_dict else {}
183
- except json.JSONDecodeError:
184
- logger.error("Invalid JSON response from chat API")
185
- response_dict = {}
186
- return self._format_structured_response(temp_path, response_dict)
187
  except Exception as e:
188
  return self._handle_error("structured OCR", e)
189
 
190
  @staticmethod
191
  def _extract_markdown(response: OCRResponse) -> str:
192
- try:
193
- return response.pages[0].markdown if response.pages else "No text extracted"
194
- except AttributeError:
195
- return "Invalid OCR response format"
196
 
197
  @staticmethod
198
  def _handle_error(context: str, error: Exception) -> str:
@@ -202,8 +187,8 @@ class OCRProcessor:
202
  @staticmethod
203
  def _format_structured_response(file_path: str, content: Dict) -> str:
204
  languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
205
- valid_langs = [l for l in (content.get("languages") or [DEFAULT_LANGUAGE]) if l in languages.values()]
206
-
207
  response = {
208
  "file_name": Path(file_path).name,
209
  "topics": content.get("topics", []),
@@ -232,7 +217,7 @@ def create_interface():
232
  except Exception as e:
233
  return None, f"**Error:** Unexpected error: {str(e)}"
234
 
235
- processor_state = gr.State(value=None)
236
  api_status = gr.Markdown("API key not set. Please enter and set your key.")
237
 
238
  set_api_button = gr.Button("Set API Key")
 
13
  from typing import Union, Dict, List
14
  from contextlib import contextmanager
15
  import requests
 
16
 
17
  # Constants
18
  DEFAULT_LANGUAGE = "English"
 
37
 
38
  @staticmethod
39
  def _encode_image(image_path: str) -> str:
40
+ with open(image_path, "rb") as image_file:
41
+ return base64.b64encode(image_file.read()).decode('utf-8')
 
 
 
 
42
 
43
  @staticmethod
44
  @contextmanager
 
68
  logger.error(f"Chat complete API call failed: {str(e)}")
69
  raise
70
 
71
+ def _get_file_content(self, file_input: Union[str, bytes]) -> bytes:
72
+ if isinstance(file_input, str):
73
+ if file_input.startswith("http"):
74
  # Handle URLs
75
+ response = requests.get(file_input)
76
  response.raise_for_status()
77
  return response.content
78
+ else:
79
+ # Handle local file paths
80
  with open(file_input, "rb") as f:
81
  return f.read()
82
+ return file_input.read() if hasattr(file_input, 'read') else file_input
 
 
 
 
 
 
83
 
84
  def ocr_pdf_url(self, pdf_url: str) -> str:
85
  logger.info(f"Processing PDF URL: {pdf_url}")
 
89
  except Exception as e:
90
  return self._handle_error("PDF URL processing", e)
91
 
92
+ def ocr_uploaded_pdf(self, pdf_file: Union[str, bytes]) -> str:
93
  file_name = getattr(pdf_file, 'name', 'unknown')
94
  logger.info(f"Processing uploaded PDF: {file_name}")
95
  try:
 
113
  except Exception as e:
114
  return self._handle_error("image URL processing", e)
115
 
116
+ def ocr_uploaded_image(self, image_file: Union[str, bytes]) -> str:
117
  file_name = getattr(image_file, 'name', 'unknown')
118
  logger.info(f"Processing uploaded image: {file_name}")
119
  try:
 
138
  except Exception as e:
139
  return self._handle_error("document understanding", e)
140
 
141
+ def structured_ocr(self, image_file: Union[str, bytes]) -> str:
142
  file_name = getattr(image_file, 'name', 'unknown')
143
  logger.info(f"Processing structured OCR for: {file_name}")
144
  try:
 
165
  temperature=0
166
  )
167
 
168
+ # Ensure the response is a dictionary
169
+ response_content = chat_response.choices[0].message.content
170
+ if isinstance(response_content, list):
171
+ response_content = response_content[0] if response_content else "{}"
172
+
173
+ content = json.loads(response_content)
174
+ return self._format_structured_response(temp_path, content)
 
 
175
  except Exception as e:
176
  return self._handle_error("structured OCR", e)
177
 
178
  @staticmethod
179
  def _extract_markdown(response: OCRResponse) -> str:
180
+ return response.pages[0].markdown if response.pages else "No text extracted"
 
 
 
181
 
182
  @staticmethod
183
  def _handle_error(context: str, error: Exception) -> str:
 
187
  @staticmethod
188
  def _format_structured_response(file_path: str, content: Dict) -> str:
189
  languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
190
+ valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
191
+
192
  response = {
193
  "file_name": Path(file_path).name,
194
  "topics": content.get("topics", []),
 
217
  except Exception as e:
218
  return None, f"**Error:** Unexpected error: {str(e)}"
219
 
220
+ processor_state = gr.State()
221
  api_status = gr.Markdown("API key not set. Please enter and set your key.")
222
 
223
  set_api_button = gr.Button("Set API Key")