Spaces:

prithivMLmods
/

Multimodal-VLM-v1.0

Running on Zero

App Files Files Community

prithivMLmods commited on 11 days ago

Commit

4bd0515

verified ·

1 Parent(s): 0d8b416

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -3

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ prompt = """Please output the layout information from the PDF image, including e
 1. Bbox format: [x1, y1, x2, y2]
 2. Layout Categories: ['Caption', 'Footnote', 'Formula', 'List-item', 'Page-footer', 'Page-header', 'Picture', 'Section-header', 'Table', 'Text', 'Title']
 3. Text Extraction & Formatting Rules:
-    - Picture: Omit the text field
     - Formula: format as LaTeX
     - Table: format as HTML
     - Others: format as Markdown
@@ -335,6 +335,7 @@ def process_document(file_path, model_choice, max_tokens, min_pix, max_pix):
 def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional[Image.Image], Optional[Dict]]:
     global pdf_cache
     if not pdf_cache["images"]:
         return None, '<div class="page-info">No file loaded</div>', "No results yet", None, None
     if direction == "prev":
@@ -347,7 +348,7 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
     if pdf_cache["is_parsed"] and index < len(pdf_cache["results"]):
         result = pdf_cache["results"][index]
         if isinstance(result, dict):  # dots.ocr
-            markdown_content = result.get('markdown_content', 'No content available')
             processed_img = result.get('processed_image', None)
             layout_json = result.get('layout_result', None)
         else:  # Dolphin
@@ -404,6 +405,12 @@ def create_gradio_interface():
                     max_pixels = gr.Number(value=MAX_PIXELS, label="Max Pixels")
                 process_btn = gr.Button("🔥 Process Document", variant="primary", elem_classes=["process-button"], size="lg")
                 clear_btn = gr.Button("Clear Document", variant="secondary")
             with gr.Column(scale=2):
                 with gr.Tabs():
                     with gr.Tab("✦︎ Processed Image"):
@@ -412,7 +419,14 @@ def create_gradio_interface():
                         markdown_output = gr.Markdown(value="Click 'Process Document' to see extracted content...", height=500)
                     with gr.Tab("⏲ Layout JSON"):
                         json_output = gr.JSON(label="Layout Analysis Results", value=None)
         def handle_file_upload(file_path):
             image, page_info = load_file_for_preview(file_path)
             return image, page_info

 1. Bbox format: [x1, y1, x2, y2]
 2. Layout Categories: ['Caption', 'Footnote', 'Formula', 'List-item', 'Page-footer', 'Page-header', 'Picture', 'Section-header', 'Table', 'Text', 'Title']
 3. Text Extraction & Formatting Rules:
+    - Picture: omit the text field
     - Formula: format as LaTeX
     - Table: format as HTML
     - Others: format as Markdown
 def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional[Image.Image], Optional[Dict]]:
     global pdf_cache
+    ifਮ
     if not pdf_cache["images"]:
         return None, '<div class="page-info">No file loaded</div>', "No results yet", None, None
     if direction == "prev":
     if pdf_cache["is_parsed"] and index < len(pdf_cache["results"]):
         result = pdf_cache["results"][index]
         if isinstance(result, dict):  # dots.ocr
+            markdown_content = result.get('markdown_content',28 content = result.get('markdown_content', 'No content available')
             processed_img = result.get('processed_image', None)
             layout_json = result.get('layout_result', None)
         else:  # Dolphin
                     max_pixels = gr.Number(value=MAX_PIXELS, label="Max Pixels")
                 process_btn = gr.Button("🔥 Process Document", variant="primary", elem_classes=["process-button"], size="lg")
                 clear_btn = gr.Button("Clear Document", variant="secondary")
+                # Add Examples component
+                examples = gr.Examples(
+                    examples=["examples/sample_image1.png", "examples/sample_image2.png", "examples/sample_pdf.pdf"],
+                    inputs=file_input,
+                    label="Example Documents"
+                )
             with gr.Column(scale=2):
                 with gr.Tabs():
                     with gr.Tab("✦︎ Processed Image"):
                         markdown_output = gr.Markdown(value="Click 'Process Document' to see extracted content...", height=500)
                     with gr.Tab("⏲ Layout JSON"):
                         json_output = gr.JSON(label="Layout Analysis Results", value=None)
+                with gr.Row():
+                    examples = gr.Examples(
+                        examples=["examples/sample_image1.png", "examples/sample_image2.png", "examples/sample_pdf.pdf"],
+                        inputs=file_input,
+                        label="Example Documents"
+                )
         def handle_file_upload(file_path):
             image, page_info = load_file_for_preview(file_path)
             return image, page_info