Spaces:

PaddlePaddle
/

PP-StructureV3_Online_Demo

Running

App Files Files Community

Bobholamovic commited on 29 days ago

Commit

b33797d

1 Parent(s): 455679a

[Feat] Another big update

Browse files

Files changed (1) hide show

app.py +153 -43

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import base64
 import io
 import json
 import os
 import tempfile
 import uuid
 import zipfile
@@ -14,7 +15,7 @@ from PIL import Image
 # API Configuration
 API_URL = "https://cf38vaydqdl2l4p2.aistudio-hub.baidu.com/layout-parsing"
-TOKEN = os.getenv("API_TOKEN")
 LOGO_PATH = Path(__file__).parent / "pp-structurev3.png"
 with open(LOGO_PATH, "rb") as image_file:
@@ -180,7 +181,56 @@ def embed_images_into_markdown_text(markdown_text, markdown_images):
     return markdown_text
-def process_file(file_path):
     """Process uploaded file with API"""
     try:
         if not file_path:
@@ -204,7 +254,15 @@ def process_file(file_path):
         response = requests.post(
             API_URL,
-            json={"file": file_data, "fileType": 0 if file_type == "pdf" else 1},
             headers=headers,
             timeout=1000,
         )
@@ -237,6 +295,10 @@ def process_file(file_path):
             )
             markdown_content_list.append(markdown_content)
         return {
             "original_file": file_path,
             "file_type": file_type,
@@ -244,6 +306,7 @@ def process_file(file_path):
             "markdown_texts": markdown_texts,
             "markdown_images": markdown_images,
             "markdown_content_list": markdown_content_list,
             "input_images": input_images,
             "api_response": result,
         }
@@ -318,9 +381,31 @@ with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
             file_types=[".pdf", ".jpg", ".jpeg", ".png"],
             type="filepath",
         )
         process_btn = gr.Button("Analyze Document", variant="primary")
         gr.Markdown(
-            f"*Please note that only the first {MAX_NUM_PAGES} pages will be processed.*"
         )
         loading_spinner = gr.Column(visible=False, elem_classes=["loader-container"])
@@ -335,28 +420,31 @@ with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
     # Results display section
     with gr.Column():
         gr.Markdown("### Results")
-        layout_ordering_images = []
-        markdown_display_list = []
-        for i in range(MAX_NUM_PAGES):
-            with gr.Row():
-                layout_ordering_images.append(
-                    gr.Image(
-                        label=f"Layout Ordering Image {i}",
-                        show_label=True,
-                        visible=False,
                     )
-                )
-                markdown_display_list.append(
-                    gr.Markdown(
-                        visible=False,
-                        container=True,
-                        show_copy_button=True,
-                        latex_delimiters=[
-                            {"left": "$$", "right": "$$", "display": True},
-                            {"left": "$", "right": "$", "display": False},
-                        ],
                     )
-                )
     # Download section
     with gr.Column(elem_classes=["download-section"]):
@@ -366,43 +454,65 @@ with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
     # Interaction logic
     def toggle_spinner():
-        return gr.update(visible=True)
     def hide_spinner():
-        return gr.update(visible=False)
-    def update_display(results):
         ret_img = []
-        ret_cont = []
-        cnt = 0
-        for img, cont in zip(
-            results["layout_ordering_images"], results["markdown_content_list"]
-        ):
-            ret_img.append(gr.update(value=bytes_to_image(img), visible=True))
-            ret_cont.append(gr.update(value=cont, visible=True))
-            cnt += 1
-        for _ in range(cnt, MAX_NUM_PAGES):
-            ret_img.append(gr.update(visible=False))
-            ret_cont.append(gr.update(visible=False))
         return ret_img + ret_cont
     process_btn.click(toggle_spinner, outputs=[loading_spinner]).then(
-        process_file, inputs=[file_input], outputs=[results_state]
     ).then(hide_spinner, outputs=[loading_spinner]).then(
         update_display,
-        inputs=[results_state],
         outputs=layout_ordering_images + markdown_display_list,
     )
     download_all_btn.click(
         export_full_results, inputs=[results_state], outputs=[download_file]
-    ).success(lambda: gr.update(visible=True), outputs=[download_file])
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
-        server_port=7860,
-        share=True,
         favicon_path=LOGO_PATH,
     )

 import io
 import json
 import os
+import re
 import tempfile
 import uuid
 import zipfile
 # API Configuration
 API_URL = "https://cf38vaydqdl2l4p2.aistudio-hub.baidu.com/layout-parsing"
+TOKEN = os.getenv("API_TOKEN", "")
 LOGO_PATH = Path(__file__).parent / "pp-structurev3.png"
 with open(LOGO_PATH, "rb") as image_file:
     return markdown_text
+# HACK: Adapted from PaddleX 3.0.0 code
+def concatenate_markdown_pages(markdown_list):
+    markdown_texts = ""
+    previous_page_last_element_paragraph_end_flag = True
+    for res in markdown_list:
+        # Get the paragraph flags for the current page
+        page_first_element_paragraph_start_flag: bool = res["isStart"]
+        page_last_element_paragraph_end_flag: bool = res["isEnd"]
+        # Determine whether to add a space or a newline
+        if (
+            not page_first_element_paragraph_start_flag
+            and not previous_page_last_element_paragraph_end_flag
+        ):
+            last_char_of_markdown = markdown_texts[-1] if markdown_texts else ""
+            first_char_of_handler = res["text"]
+            # Check if the last character and the first character are Chinese characters
+            last_is_chinese_char = (
+                re.match(r"[\u4e00-\u9fff]", last_char_of_markdown)
+                if last_char_of_markdown
+                else False
+            )
+            first_is_chinese_char = (
+                re.match(r"[\u4e00-\u9fff]", first_char_of_handler)
+                if first_char_of_handler
+                else False
+            )
+            if not (last_is_chinese_char or first_is_chinese_char):
+                markdown_texts += " " + res["text"]
+            else:
+                markdown_texts += res["text"]
+        else:
+            markdown_texts += "\n\n" + res["text"]
+        previous_page_last_element_paragraph_end_flag = (
+            page_last_element_paragraph_end_flag
+        )
+    return markdown_texts
+def process_file(
+    file_path,
+    use_formula_recognition,
+    use_chart_recognition,
+    use_doc_orientation_classify,
+    use_doc_unwarping,
+    use_textline_orientation,
+):
     """Process uploaded file with API"""
     try:
         if not file_path:
         response = requests.post(
             API_URL,
+            json={
+                "file": file_data,
+                "fileType": 0 if file_type == "pdf" else 1,
+                "useFormulaRecognition": use_formula_recognition,
+                "useChartRecognition": use_chart_recognition,
+                "useDocOrientationClassify": use_doc_orientation_classify,
+                "useDocUnwarping": use_doc_unwarping,
+                "useTextlineOrientation": use_textline_orientation,
+            },
             headers=headers,
             timeout=1000,
         )
             )
             markdown_content_list.append(markdown_content)
+        concatenated_markdown_content = concatenate_markdown_pages(
+            [res["markdown"] for res in layout_results]
+        )
         return {
             "original_file": file_path,
             "file_type": file_type,
             "markdown_texts": markdown_texts,
             "markdown_images": markdown_images,
             "markdown_content_list": markdown_content_list,
+            "concatenated_markdown_content": concatenated_markdown_content,
             "input_images": input_images,
             "api_response": result,
         }
             file_types=[".pdf", ".jpg", ".jpeg", ".png"],
             type="filepath",
         )
+        with gr.Row():
+            use_formula_recognition_cb = gr.Checkbox(
+                value=True, label="Use formula recognition"
+            )
+            use_chart_recognition_cb = gr.Checkbox(
+                value=False, label="Use chart recognition"
+            )
+        with gr.Row():
+            use_doc_orientation_classify_cb = gr.Checkbox(
+                value=False, label="Use document image orientation classification"
+            )
+            use_doc_unwarping_cb = gr.Checkbox(
+                value=False, label="Use text image unwarping"
+            )
+        with gr.Row():
+            use_textline_orientation_cb = gr.Checkbox(
+                value=False, label="Use text line orientation classification"
+            )
+            concatenate_pages_cb = gr.Checkbox(value=True, label="Concatenate pages")
         process_btn = gr.Button("Analyze Document", variant="primary")
         gr.Markdown(
+            f"""
+            1. Only the first {MAX_NUM_PAGES} pages will be processed.
+            2. Some formulas might not display correctly because of renderer limitations.
+            """
         )
         loading_spinner = gr.Column(visible=False, elem_classes=["loader-container"])
     # Results display section
     with gr.Column():
         gr.Markdown("### Results")
+        with gr.Row():
+            with gr.Column():
+                layout_ordering_images = []
+                for i in range(MAX_NUM_PAGES):
+                    layout_ordering_images.append(
+                        gr.Image(
+                            label=f"Layout Ordering Image {i}",
+                            show_label=True,
+                            visible=False,
+                        )
                     )
+            with gr.Column():
+                markdown_display_list = []
+                for i in range(MAX_NUM_PAGES):
+                    markdown_display_list.append(
+                        gr.Markdown(
+                            visible=False,
+                            container=True,
+                            show_copy_button=True,
+                            latex_delimiters=[
+                                {"left": "$$", "right": "$$", "display": True},
+                                {"left": "$", "right": "$", "display": False},
+                            ],
+                        )
                     )
     # Download section
     with gr.Column(elem_classes=["download-section"]):
     # Interaction logic
     def toggle_spinner():
+        return gr.Column(visible=True)
     def hide_spinner():
+        return gr.Column(visible=False)
+    def update_display(results, concatenate_pages):
+        if not results:
+            return gr.skip()
+        assert len(results["layout_ordering_images"]) <= MAX_NUM_PAGES, len(
+            results["layout_ordering_images"]
+        )
         ret_img = []
+        for img in results["layout_ordering_images"]:
+            ret_img.append(gr.Image(value=bytes_to_image(img), visible=True))
+        for _ in range(len(results["layout_ordering_images"]), MAX_NUM_PAGES):
+            ret_img.append(gr.Image(visible=False))
+        if concatenate_pages:
+            markdown_content = results["concatenated_markdown_content"]
+            ret_cont = [gr.Markdown(value=markdown_content, visible=True)]
+            for _ in range(1, MAX_NUM_PAGES):
+                ret_cont.append(gr.Markdown(visible=False))
+        else:
+            assert len(results["markdown_content_list"]) <= MAX_NUM_PAGES, len(
+                results["markdown_content_list"]
+            )
+            ret_cont = []
+            for cont in results["markdown_content_list"]:
+                ret_cont.append(gr.Markdown(value=cont, visible=True))
+            for _ in range(len(results["markdown_content_list"]), MAX_NUM_PAGES):
+                ret_cont.append(gr.Markdown(visible=False))
         return ret_img + ret_cont
     process_btn.click(toggle_spinner, outputs=[loading_spinner]).then(
+        process_file,
+        inputs=[
+            file_input,
+            use_formula_recognition_cb,
+            use_chart_recognition_cb,
+            use_doc_orientation_classify_cb,
+            use_doc_unwarping_cb,
+            use_textline_orientation_cb,
+        ],
+        outputs=[results_state],
     ).then(hide_spinner, outputs=[loading_spinner]).then(
         update_display,
+        inputs=[results_state, concatenate_pages_cb],
         outputs=layout_ordering_images + markdown_display_list,
     )
     download_all_btn.click(
         export_full_results, inputs=[results_state], outputs=[download_file]
+    ).success(lambda: gr.File(visible=True), outputs=[download_file])
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
+        server_port=8860,
         favicon_path=LOGO_PATH,
     )