prithivMLmods commited on
Commit
b789dc3
·
verified ·
1 Parent(s): 62a17ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -36
app.py CHANGED
@@ -129,6 +129,97 @@ def fetch_image(image_input, min_pixels: int = None, max_pixels: int = None):
129
  image = image.resize((width, height), Image.LANCZOS)
130
  return image
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  @spaces.GPU
133
  def inference(model_name: str, image: Image.Image, text: str, max_new_tokens: int = 1024) -> str:
134
  try:
@@ -182,47 +273,33 @@ def inference(model_name: str, image: Image.Image, text: str, max_new_tokens: in
182
  traceback.print_exc()
183
  yield f"Error during inference: {str(e)}", f"Error during inference: {str(e)}"
184
 
 
185
  def process_image(
186
  model_name: str,
187
  image: Image.Image,
188
  min_pixels: Optional[int] = None,
189
  max_pixels: Optional[int] = None,
190
  max_new_tokens: int = 1024
191
- ) -> Dict[str, Any]:
192
  try:
193
  if min_pixels or max_pixels:
194
  image = fetch_image(image, min_pixels=min_pixels, max_pixels=max_pixels)
195
- result = {
196
- 'original_image': image,
197
- 'raw_output': "",
198
- 'layout_result': None,
199
- }
200
  buffer = ""
201
  for raw_output, _ in inference(model_name, image, prompt, max_new_tokens):
202
  buffer = raw_output
203
- result['raw_output'] = buffer
204
- yield result
205
  try:
206
  json_match = re.search(r'```json\s*([\s\S]+?)\s*```', buffer)
207
  json_str = json_match.group(1) if json_match else buffer
208
  layout_data = json.loads(json_str)
209
- result['layout_result'] = layout_data
210
  except json.JSONDecodeError:
211
- print("Failed to parse JSON output")
212
- result['layout_result'] = {"error": "Failed to parse JSON"}
213
- except Exception as e:
214
- print(f"Error processing layout: {e}")
215
- result['layout_result'] = {"error": str(e)}
216
- yield result
217
  except Exception as e:
218
  print(f"Error processing image: {e}")
219
  traceback.print_exc()
220
- result = {
221
- 'original_image': image,
222
- 'raw_output': f"Error processing image: {str(e)}",
223
- 'layout_result': {"error": str(e)}
224
- }
225
- yield result
226
 
227
  def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
228
  if not file_path or not os.path.exists(file_path):
@@ -287,9 +364,9 @@ def create_gradio_interface():
287
  with gr.Column(scale=2):
288
  with gr.Tabs():
289
  with gr.Tab("📝 Extracted Content"):
290
- output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=2, show_copy_button=True)
291
- with gr.Tab("📋 Layout JSON"):
292
- json_output = gr.JSON(label="Layout Analysis Results", value=None)
293
  def process_document(model_name, file_path, max_tokens, min_pix, max_pix):
294
  try:
295
  if not file_path:
@@ -297,31 +374,29 @@ def create_gradio_interface():
297
  image, status = load_file_for_preview(file_path)
298
  if image is None:
299
  return status, None
300
- for result in process_image(model_name, image, min_pixels=int(min_pix) if min_pix else None, max_pixels=int(max_pix) if max_pix else None, max_new_tokens=max_tokens):
301
- raw_output = result['raw_output']
302
- layout_result = result['layout_result']
303
- yield raw_output, layout_result
304
  except Exception as e:
305
  error_msg = f"Error processing document: {str(e)}"
306
  print(error_msg)
307
  traceback.print_exc()
308
- yield error_msg, {"error": str(e)}
309
  def handle_file_upload(file_path):
310
  if not file_path:
311
- return None, "No file loaded"
312
- image, page_info = load_file_for_preview(file_path)
313
- return image, page_info
314
  def clear_all():
315
- return None, None, "No file loaded", None
316
- file_input.change(handle_file_upload, inputs=[file_input], outputs=[image_preview, output])
317
  process_btn.click(
318
  process_document,
319
  inputs=[model_choice, file_input, max_new_tokens, min_pixels, max_pixels],
320
- outputs=[output, json_output]
321
  )
322
  clear_btn.click(
323
  clear_all,
324
- outputs=[file_input, image_preview, output, json_output]
325
  )
326
  return demo
327
 
 
129
  image = image.resize((width, height), Image.LANCZOS)
130
  return image
131
 
132
+ def is_arabic_text(text: str) -> bool:
133
+ if not text:
134
+ return False
135
+ header_pattern = r'^#{1,6}\s+(.+)$'
136
+ paragraph_pattern = r'^(?!#{1,6}\s|!\[|```|\||\s*[-*+]\s|\s*\d+\.\s)(.+)$'
137
+ content_text = []
138
+ for line in text.split('\n'):
139
+ line = line.strip()
140
+ if not line:
141
+ continue
142
+ header_match = re.match(header_pattern, line, re.MULTILINE)
143
+ if header_match:
144
+ content_text.append(header_match.group(1))
145
+ continue
146
+ if re.match(paragraph_pattern, line, re.MULTILINE):
147
+ content_text.append(line)
148
+ if not content_text:
149
+ return False
150
+ combined_text = ' '.join(content_text)
151
+ arabic_chars = 0
152
+ total_chars = 0
153
+ for char in combined_text:
154
+ if char.isalpha():
155
+ total_chars += 1
156
+ if ('\u0600' <= char <= '\u06FF') or ('\u0750' <= char <= '\u077F') or ('\u08A0' <= char <= '\u08FF'):
157
+ arabic_chars += 1
158
+ return total_chars > 0 and (arabic_chars / total_chars) > 0.5
159
+
160
+ def layoutjson2md(image: Image.Image, layout_data: List[Dict], text_key: str = 'text') -> str:
161
+ import base64
162
+ from io import BytesIO
163
+ markdown_lines = []
164
+ try:
165
+ sorted_items = sorted(layout_data, key=lambda x: (x.get('bbox', [0, 0, 0, 0])[1], x.get('bbox', [0, 0, 0, 0])[0]))
166
+ for item in sorted_items:
167
+ category = item.get('category', '')
168
+ text = item.get(text_key, '')
169
+ bbox = item.get('bbox', [])
170
+ if category == 'Picture':
171
+ if bbox and len(bbox) == 4:
172
+ try:
173
+ x1, y1, x2, y2 = bbox
174
+ x1, y1 = max(0, int(x1)), max(0, int(y1))
175
+ x2, y2 = min(image.width, int(x2)), min(image.height, int(y2))
176
+ if x2 > x1 and y2 > y1:
177
+ cropped_img = image.crop((x1, y1, x2, y2))
178
+ buffer = BytesIO()
179
+ cropped_img.save(buffer, format='PNG')
180
+ img_data = base64.b64encode(buffer.getvalue()).decode()
181
+ markdown_lines.append(f"![Image](data:image/png;base64,{img_data})\n")
182
+ else:
183
+ markdown_lines.append("![Image](Image region detected)\n")
184
+ except Exception as e:
185
+ print(f"Error processing image region: {e}")
186
+ markdown_lines.append("![Image](Image detected)\n")
187
+ else:
188
+ markdown_lines.append("![Image](Image detected)\n")
189
+ elif not text:
190
+ continue
191
+ elif category == 'Title':
192
+ markdown_lines.append(f"# {text}\n")
193
+ elif category == 'Section-header':
194
+ markdown_lines.append(f"## {text}\n")
195
+ elif category == 'Text':
196
+ markdown_lines.append(f"{text}\n")
197
+ elif category == 'List-item':
198
+ markdown_lines.append(f"- {text}\n")
199
+ elif category == 'Table':
200
+ if text.strip().startswith('<'):
201
+ markdown_lines.append(f"{text}\n")
202
+ else:
203
+ markdown_lines.append(f"**Table:** {text}\n")
204
+ elif category == 'Formula':
205
+ if text.strip().startswith('$') or '\\' in text:
206
+ markdown_lines.append(f"$$\n{text}\n$$\n")
207
+ else:
208
+ markdown_lines.append(f"**Formula:** {text}\n")
209
+ elif category == 'Caption':
210
+ markdown_lines.append(f"*{text}*\n")
211
+ elif category == 'Footnote':
212
+ markdown_lines.append(f"^{text}^\n")
213
+ elif category in ['Page-header', 'Page-footer']:
214
+ continue
215
+ else:
216
+ markdown_lines.append(f"{text}\n")
217
+ markdown_lines.append("")
218
+ except Exception as e:
219
+ print(f"Error converting to markdown: {e}")
220
+ return str(layout_data)
221
+ return "\n".join(markdown_lines)
222
+
223
  @spaces.GPU
224
  def inference(model_name: str, image: Image.Image, text: str, max_new_tokens: int = 1024) -> str:
225
  try:
 
273
  traceback.print_exc()
274
  yield f"Error during inference: {str(e)}", f"Error during inference: {str(e)}"
275
 
276
+ @spaces.GPU
277
  def process_image(
278
  model_name: str,
279
  image: Image.Image,
280
  min_pixels: Optional[int] = None,
281
  max_pixels: Optional[int] = None,
282
  max_new_tokens: int = 1024
283
+ ):
284
  try:
285
  if min_pixels or max_pixels:
286
  image = fetch_image(image, min_pixels=min_pixels, max_pixels=max_pixels)
 
 
 
 
 
287
  buffer = ""
288
  for raw_output, _ in inference(model_name, image, prompt, max_new_tokens):
289
  buffer = raw_output
290
+ yield buffer, None # Yield raw OCR stream and None for JSON during processing
 
291
  try:
292
  json_match = re.search(r'```json\s*([\s\S]+?)\s*```', buffer)
293
  json_str = json_match.group(1) if json_match else buffer
294
  layout_data = json.loads(json_str)
295
+ yield buffer, layout_data # Final yield with raw OCR and parsed JSON
296
  except json.JSONDecodeError:
297
+ print("Failed to parse JSON output, using raw output")
298
+ yield buffer, None # If JSON parsing fails, yield raw OCR with no JSON
 
 
 
 
299
  except Exception as e:
300
  print(f"Error processing image: {e}")
301
  traceback.print_exc()
302
+ yield f"Error processing image: {str(e)}", None
 
 
 
 
 
303
 
304
  def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
305
  if not file_path or not os.path.exists(file_path):
 
364
  with gr.Column(scale=2):
365
  with gr.Tabs():
366
  with gr.Tab("📝 Extracted Content"):
367
+ raw_output = gr.Textbox(label="Raw OCR Output", interactive=False, lines=10, show_copy_button=True)
368
+ with gr.Tab("📋 Layout Analysis Results"):
369
+ json_output = gr.JSON(label="Layout Analysis Results")
370
  def process_document(model_name, file_path, max_tokens, min_pix, max_pix):
371
  try:
372
  if not file_path:
 
374
  image, status = load_file_for_preview(file_path)
375
  if image is None:
376
  return status, None
377
+ for raw_buffer, layout_result in process_image(model_name, image, min_pixels=int(min_pix) if min_pix else None, max_pixels=int(max_pix) if max_pix else None, max_new_tokens=max_tokens):
378
+ yield raw_buffer, layout_result
 
 
379
  except Exception as e:
380
  error_msg = f"Error processing document: {str(e)}"
381
  print(error_msg)
382
  traceback.print_exc()
383
+ yield error_msg, None
384
  def handle_file_upload(file_path):
385
  if not file_path:
386
+ return None
387
+ image, status = load_file_for_preview(file_path)
388
+ return image
389
  def clear_all():
390
+ return None, None, "", None
391
+ file_input.change(handle_file_upload, inputs=[file_input], outputs=[image_preview])
392
  process_btn.click(
393
  process_document,
394
  inputs=[model_choice, file_input, max_new_tokens, min_pixels, max_pixels],
395
+ outputs=[raw_output, json_output]
396
  )
397
  clear_btn.click(
398
  clear_all,
399
+ outputs=[file_input, image_preview, raw_output, json_output]
400
  )
401
  return demo
402