MohamedRashad commited on
Commit
5f76d0a
Β·
1 Parent(s): 118bfa5

Refactor layoutjson2md function: remove unused parameter and clean up markdown output handling

Browse files
Files changed (1) hide show
  1. app.py +3 -25
app.py CHANGED
@@ -187,13 +187,10 @@ def draw_layout_on_image(image: Image.Image, layout_data: List[Dict]) -> Image.I
187
  return img_copy
188
 
189
 
190
- def layoutjson2md(image: Image.Image, layout_data: List[Dict], text_key: str = 'text', no_page_hf: bool = False) -> str:
191
  """Convert layout JSON to markdown format"""
192
  markdown_lines = []
193
 
194
- if not no_page_hf:
195
- markdown_lines.append("# Document Content\n")
196
-
197
  try:
198
  # Sort items by reading order (top to bottom, left to right)
199
  sorted_items = sorted(layout_data, key=lambda x: (x.get('bbox', [0, 0, 0, 0])[1], x.get('bbox', [0, 0, 0, 0])[0]))
@@ -584,12 +581,6 @@ def create_gradio_interface():
584
  </div>
585
  """)
586
 
587
- # Model status
588
- model_status = gr.HTML(
589
- '<div class="model-status status-loading">πŸ”„ Initializing model...</div>',
590
- elem_id="model_status"
591
- )
592
-
593
  # Main interface
594
  with gr.Row():
595
  # Left column - Input and controls
@@ -667,23 +658,12 @@ def create_gradio_interface():
667
  interactive=False,
668
  height=500
669
  )
670
-
671
  # Markdown output tab
672
  with gr.Tab("πŸ“ Extracted Content"):
673
  markdown_output = gr.Markdown(
674
  value="Click 'Process Document' to see extracted content...",
675
  height=500
676
  )
677
-
678
- # Raw output tab
679
- with gr.Tab("πŸ”§ Raw Output"):
680
- raw_output = gr.Textbox(
681
- label="Raw Model Output",
682
- lines=20,
683
- max_lines=30,
684
- interactive=False
685
- )
686
-
687
  # JSON layout tab
688
  with gr.Tab("πŸ“‹ Layout JSON"):
689
  json_output = gr.JSON(
@@ -840,8 +820,6 @@ def create_gradio_interface():
840
  )
841
 
842
  # Wire up event handlers
843
- demo.load(load_model_on_startup, outputs=[model_status])
844
-
845
  file_input.change(
846
  handle_file_upload,
847
  inputs=[file_input],
@@ -861,14 +839,14 @@ def create_gradio_interface():
861
  process_btn.click(
862
  process_document,
863
  inputs=[file_input, max_new_tokens, min_pixels, max_pixels],
864
- outputs=[processed_image, markdown_output, raw_output, json_output, model_status]
865
  )
866
 
867
  clear_btn.click(
868
  clear_all,
869
  outputs=[
870
  file_input, image_preview, page_info, processed_image,
871
- markdown_output, raw_output, json_output, model_status
872
  ]
873
  )
874
 
 
187
  return img_copy
188
 
189
 
190
+ def layoutjson2md(image: Image.Image, layout_data: List[Dict], text_key: str = 'text') -> str:
191
  """Convert layout JSON to markdown format"""
192
  markdown_lines = []
193
 
 
 
 
194
  try:
195
  # Sort items by reading order (top to bottom, left to right)
196
  sorted_items = sorted(layout_data, key=lambda x: (x.get('bbox', [0, 0, 0, 0])[1], x.get('bbox', [0, 0, 0, 0])[0]))
 
581
  </div>
582
  """)
583
 
 
 
 
 
 
 
584
  # Main interface
585
  with gr.Row():
586
  # Left column - Input and controls
 
658
  interactive=False,
659
  height=500
660
  )
 
661
  # Markdown output tab
662
  with gr.Tab("πŸ“ Extracted Content"):
663
  markdown_output = gr.Markdown(
664
  value="Click 'Process Document' to see extracted content...",
665
  height=500
666
  )
 
 
 
 
 
 
 
 
 
 
667
  # JSON layout tab
668
  with gr.Tab("πŸ“‹ Layout JSON"):
669
  json_output = gr.JSON(
 
820
  )
821
 
822
  # Wire up event handlers
 
 
823
  file_input.change(
824
  handle_file_upload,
825
  inputs=[file_input],
 
839
  process_btn.click(
840
  process_document,
841
  inputs=[file_input, max_new_tokens, min_pixels, max_pixels],
842
+ outputs=[processed_image, markdown_output, json_output]
843
  )
844
 
845
  clear_btn.click(
846
  clear_all,
847
  outputs=[
848
  file_input, image_preview, page_info, processed_image,
849
+ markdown_output, json_output
850
  ]
851
  )
852