Svngoku commited on
Commit
982eaf5
·
verified ·
1 Parent(s): 94f6203

Re Restore

Browse files
Files changed (1) hide show
  1. app.py +24 -38
app.py CHANGED
@@ -3,7 +3,7 @@ import base64
3
  import gradio as gr
4
  from mistralai import Mistral, ImageURLChunk
5
  from mistralai.models import OCRResponse
6
- from typing import Union, List, Tuple, Any
7
  import requests
8
  import shutil
9
  import time
@@ -110,10 +110,7 @@ class OCRProcessor:
110
  range(pdf_document.page_count)
111
  ))
112
  pdf_document.close()
113
- valid_data = [data for data in image_data if data and data[0] and os.path.exists(data[0])]
114
- if not valid_data:
115
- logger.warning("No valid images generated from PDF")
116
- return valid_data
117
  except Exception as e:
118
  logger.error(f"Error converting PDF to images: {str(e)}")
119
  return []
@@ -143,16 +140,11 @@ class OCRProcessor:
143
  document=ImageURLChunk(image_url=base64_url),
144
  include_image_base64=True
145
  )
146
- logger.info(f"OCR API call successful. Pages: {len(response.pages)}")
147
- for page in response.pages:
148
- logger.debug(f"Page markdown: {page.markdown}")
149
  return response
150
  except (ConnectionError, Timeout, socket.error) as e:
151
  logger.error(f"Network error during OCR API call: {str(e)}")
152
  raise
153
- except Exception as e:
154
- logger.error(f"OCR API error: {str(e)}")
155
- raise
156
 
157
  def ocr_uploaded_pdf(self, pdf_file: Union[str, bytes]) -> Tuple[str, List[str]]:
158
  file_name = getattr(pdf_file, 'name', f"pdf_{int(time.time())}.pdf")
@@ -166,7 +158,7 @@ class OCRProcessor:
166
 
167
  image_data = self._pdf_to_images(pdf_path)
168
  if not image_data:
169
- return "No valid pages converted from PDF", []
170
 
171
  ocr_results = []
172
  image_paths = [path for path, _ in image_data]
@@ -175,7 +167,7 @@ class OCRProcessor:
175
  markdown_with_images = self._get_combined_markdown_with_images(response, image_paths, i)
176
  ocr_results.append(markdown_with_images)
177
 
178
- return "\n\n".join(ocr_results) or "No text detected in PDF", image_paths
179
  except Exception as e:
180
  return self._handle_error("uploaded PDF processing", e), []
181
 
@@ -190,7 +182,7 @@ class OCRProcessor:
190
 
191
  image_data = self._pdf_to_images(pdf_path)
192
  if not image_data:
193
- return "No valid pages converted from PDF", []
194
 
195
  ocr_results = []
196
  image_paths = [path for path, _ in image_data]
@@ -199,11 +191,11 @@ class OCRProcessor:
199
  markdown_with_images = self._get_combined_markdown_with_images(response, image_paths, i)
200
  ocr_results.append(markdown_with_images)
201
 
202
- return "\n\n".join(ocr_results) or "No text detected in PDF", image_paths
203
  except Exception as e:
204
  return self._handle_error("PDF URL processing", e), []
205
 
206
- def ocr_uploaded_image(self, image_file: Union[str, bytes]) -> Tuple[str, Any]:
207
  file_name = getattr(image_file, 'name', f"image_{int(time.time())}.jpg")
208
  logger.info(f"Processing uploaded image: {file_name}")
209
  try:
@@ -211,20 +203,17 @@ class OCRProcessor:
211
  image_path = self._save_uploaded_file(image_file, file_name)
212
  encoded_image = self._encode_image(image_path)
213
  response = self._call_ocr_api(encoded_image)
214
- markdown_with_images = self._get_combined_markdown_with_images(response)
215
- preview_update = gr.Image.update(value=image_path) if image_path else gr.Image.update()
216
- return markdown_with_images or "No text detected in image", preview_update
217
  except Exception as e:
218
- return self._handle_error("image processing", e), gr.Image.update()
219
 
220
  @staticmethod
221
  def _get_combined_markdown_with_images(response: OCRResponse, image_paths: List[str] = None, page_index: int = None) -> str:
222
  markdown_parts = []
223
- logger.info(f"Processing response with {len(response.pages)} pages")
224
  for i, page in enumerate(response.pages):
225
- if page.markdown and page.markdown.strip():
226
- markdown = page.markdown.strip()
227
- logger.info(f"Page {i} markdown: {markdown[:100]}...") # Log first 100 chars
228
  if hasattr(page, 'images') and page.images:
229
  logger.info(f"Found {len(page.images)} images in page {i}")
230
  for img in page.images:
@@ -244,8 +233,10 @@ class OCRProcessor:
244
  )
245
  else:
246
  logger.warning(f"No images found in page {i}")
 
247
  if image_paths and page_index is not None and page_index < len(image_paths):
248
  local_encoded = OCRProcessor._encode_image(image_paths[page_index])
 
249
  placeholder = f"img-{i}.jpeg"
250
  if placeholder in markdown:
251
  markdown = markdown.replace(
@@ -253,15 +244,14 @@ class OCRProcessor:
253
  f"![Page {i} Image](data:image/png;base64,{local_encoded})"
254
  )
255
  else:
 
256
  markdown += f"\n\n![Page {i} Image](data:image/png;base64,{local_encoded})"
257
  markdown_parts.append(markdown)
258
- else:
259
- logger.warning(f"No markdown content in page {i}")
260
  return "\n\n".join(markdown_parts) or "No text or images detected"
261
 
262
  @staticmethod
263
  def _handle_error(context: str, error: Exception) -> str:
264
- logger.error(f"Error in {context}: {str(e)}")
265
  return f"**Error in {context}:** {str(error)}"
266
 
267
  def create_interface():
@@ -305,9 +295,8 @@ def create_interface():
305
 
306
  def process_image(processor, image):
307
  if not processor or not image:
308
- return "Please set API key and upload an image", gr.Image.update()
309
- result, preview_update = processor.ocr_uploaded_image(image)
310
- return result, preview_update
311
 
312
  process_image_btn.click(
313
  fn=process_image,
@@ -332,19 +321,15 @@ def create_interface():
332
 
333
  def process_pdf(processor, pdf_file, pdf_url):
334
  if not processor:
335
- return "Please set API key first", gr.Gallery.update()
336
  logger.info(f"Received inputs - PDF file: {pdf_file}, PDF URL: {pdf_url}")
337
  if pdf_file is not None and hasattr(pdf_file, 'name'):
338
  logger.info(f"Processing as uploaded PDF: {pdf_file.name}")
339
- result, image_paths = processor.ocr_uploaded_pdf(pdf_file)
340
- gallery = gr.Gallery.update(value=[(p, os.path.basename(p)) for p in image_paths]) if image_paths else gr.Gallery.update()
341
- return result, gallery
342
  elif pdf_url and pdf_url.strip():
343
  logger.info(f"Processing as PDF URL: {pdf_url}")
344
- result, image_paths = processor.ocr_pdf_url(pdf_url)
345
- gallery = gr.Gallery.update(value=[(p, os.path.basename(p)) for p in image_paths]) if image_paths else gr.Gallery.update()
346
- return result, gallery
347
- return "Please upload a PDF or provide a valid URL", gr.Gallery.update()
348
 
349
  process_pdf_btn.click(
350
  fn=process_pdf,
@@ -359,4 +344,5 @@ if __name__ == "__main__":
359
  print(f"===== Application Startup at {os.environ['START_TIME']} =====")
360
  create_interface().launch(
361
  share=True,
 
362
  )
 
3
  import gradio as gr
4
  from mistralai import Mistral, ImageURLChunk
5
  from mistralai.models import OCRResponse
6
+ from typing import Union, List, Tuple
7
  import requests
8
  import shutil
9
  import time
 
110
  range(pdf_document.page_count)
111
  ))
112
  pdf_document.close()
113
+ return [data for data in image_data if data]
 
 
 
114
  except Exception as e:
115
  logger.error(f"Error converting PDF to images: {str(e)}")
116
  return []
 
140
  document=ImageURLChunk(image_url=base64_url),
141
  include_image_base64=True
142
  )
143
+ logger.info("OCR API call successful")
 
 
144
  return response
145
  except (ConnectionError, Timeout, socket.error) as e:
146
  logger.error(f"Network error during OCR API call: {str(e)}")
147
  raise
 
 
 
148
 
149
  def ocr_uploaded_pdf(self, pdf_file: Union[str, bytes]) -> Tuple[str, List[str]]:
150
  file_name = getattr(pdf_file, 'name', f"pdf_{int(time.time())}.pdf")
 
158
 
159
  image_data = self._pdf_to_images(pdf_path)
160
  if not image_data:
161
+ raise ValueError("No pages converted from PDF")
162
 
163
  ocr_results = []
164
  image_paths = [path for path, _ in image_data]
 
167
  markdown_with_images = self._get_combined_markdown_with_images(response, image_paths, i)
168
  ocr_results.append(markdown_with_images)
169
 
170
+ return "\n\n".join(ocr_results), image_paths
171
  except Exception as e:
172
  return self._handle_error("uploaded PDF processing", e), []
173
 
 
182
 
183
  image_data = self._pdf_to_images(pdf_path)
184
  if not image_data:
185
+ raise ValueError("No pages converted from PDF")
186
 
187
  ocr_results = []
188
  image_paths = [path for path, _ in image_data]
 
191
  markdown_with_images = self._get_combined_markdown_with_images(response, image_paths, i)
192
  ocr_results.append(markdown_with_images)
193
 
194
+ return "\n\n".join(ocr_results), image_paths
195
  except Exception as e:
196
  return self._handle_error("PDF URL processing", e), []
197
 
198
+ def ocr_uploaded_image(self, image_file: Union[str, bytes]) -> Tuple[str, str]:
199
  file_name = getattr(image_file, 'name', f"image_{int(time.time())}.jpg")
200
  logger.info(f"Processing uploaded image: {file_name}")
201
  try:
 
203
  image_path = self._save_uploaded_file(image_file, file_name)
204
  encoded_image = self._encode_image(image_path)
205
  response = self._call_ocr_api(encoded_image)
206
+ return self._get_combined_markdown_with_images(response), image_path
 
 
207
  except Exception as e:
208
+ return self._handle_error("image processing", e), None
209
 
210
  @staticmethod
211
  def _get_combined_markdown_with_images(response: OCRResponse, image_paths: List[str] = None, page_index: int = None) -> str:
212
  markdown_parts = []
 
213
  for i, page in enumerate(response.pages):
214
+ if page.markdown.strip():
215
+ markdown = page.markdown
216
+ logger.info(f"Page {i} markdown: {markdown}")
217
  if hasattr(page, 'images') and page.images:
218
  logger.info(f"Found {len(page.images)} images in page {i}")
219
  for img in page.images:
 
233
  )
234
  else:
235
  logger.warning(f"No images found in page {i}")
236
+ # Replace known placeholders or append the local image
237
  if image_paths and page_index is not None and page_index < len(image_paths):
238
  local_encoded = OCRProcessor._encode_image(image_paths[page_index])
239
+ # Replace placeholders like img-0.jpeg
240
  placeholder = f"img-{i}.jpeg"
241
  if placeholder in markdown:
242
  markdown = markdown.replace(
 
244
  f"![Page {i} Image](data:image/png;base64,{local_encoded})"
245
  )
246
  else:
247
+ # Append the image if no placeholder is found
248
  markdown += f"\n\n![Page {i} Image](data:image/png;base64,{local_encoded})"
249
  markdown_parts.append(markdown)
 
 
250
  return "\n\n".join(markdown_parts) or "No text or images detected"
251
 
252
  @staticmethod
253
  def _handle_error(context: str, error: Exception) -> str:
254
+ logger.error(f"Error in {context}: {str(error)}")
255
  return f"**Error in {context}:** {str(error)}"
256
 
257
  def create_interface():
 
295
 
296
  def process_image(processor, image):
297
  if not processor or not image:
298
+ return "Please set API key and upload an image", None
299
+ return processor.ocr_uploaded_image(image)
 
300
 
301
  process_image_btn.click(
302
  fn=process_image,
 
321
 
322
  def process_pdf(processor, pdf_file, pdf_url):
323
  if not processor:
324
+ return "Please set API key first", []
325
  logger.info(f"Received inputs - PDF file: {pdf_file}, PDF URL: {pdf_url}")
326
  if pdf_file is not None and hasattr(pdf_file, 'name'):
327
  logger.info(f"Processing as uploaded PDF: {pdf_file.name}")
328
+ return processor.ocr_uploaded_pdf(pdf_file)
 
 
329
  elif pdf_url and pdf_url.strip():
330
  logger.info(f"Processing as PDF URL: {pdf_url}")
331
+ return processor.ocr_pdf_url(pdf_url)
332
+ return "Please upload a PDF or provide a valid URL", []
 
 
333
 
334
  process_pdf_btn.click(
335
  fn=process_pdf,
 
344
  print(f"===== Application Startup at {os.environ['START_TIME']} =====")
345
  create_interface().launch(
346
  share=True,
347
+ debug=True,
348
  )