Spaces:

xiaoyao9184
/

marker

Running

App Files Files Community

xiaoyao9184 commited on 10 days ago

Commit

178bdfd

verified ·

1 Parent(s): 5d2d018

Synced repo using 'sync_with_huggingface' Github Action

Browse files

Files changed (2) hide show

gradio_app.py +34 -6
requirements.txt +7 -2

gradio_app.py CHANGED Viewed

@@ -86,7 +86,7 @@ if 'model_dict' not in globals():
 with gr.Blocks(title="Marker") as demo:
     gr.Markdown("""
     # Marker Demo
     This app will let you try marker, a PDF -> Markdown converter. It works with any languages, and extracts images, tables, equations, etc.
     Find the project [here](https://github.com/VikParuchuri/marker).
@@ -129,17 +129,20 @@ with gr.Blocks(title="Marker") as demo:
         in_file.clear(
             fn=show_image,
             inputs=[in_file],
-            outputs=[in_num, in_img, page_range_txt]
         )
         in_file.upload(
             fn=show_image,
             inputs=[in_file],
-            outputs=[in_num, in_img, page_range_txt]
         )
         in_num.change(
             fn=show_image,
             inputs=[in_file, in_num],
-            outputs=[in_num, in_img, page_range_txt]
         )
         def check_page_range(page_range, file):
@@ -156,11 +159,36 @@ with gr.Blocks(title="Marker") as demo:
         page_range_txt.change(
             fn=check_page_range,
             inputs=[page_range_txt, in_file],
-            outputs=[page_range_txt, run_marker_btn]
         )
         # Run Marker
         def run_marker_img(filename, page_range, force_ocr, output_format, debug, use_llm, strip_existing_ocr):
             cli_options = {
                 "output_format": output_format,
                 "page_range": page_range,
@@ -224,4 +252,4 @@ with gr.Blocks(title="Marker") as demo:
         )
 if __name__ == "__main__":
-    demo.launch()

 with gr.Blocks(title="Marker") as demo:
     gr.Markdown("""
     # Marker Demo
+    ![](https://badge.mcpx.dev?type=server 'MCP Server')
     This app will let you try marker, a PDF -> Markdown converter. It works with any languages, and extracts images, tables, equations, etc.
     Find the project [here](https://github.com/VikParuchuri/marker).
         in_file.clear(
             fn=show_image,
             inputs=[in_file],
+            outputs=[in_num, in_img, page_range_txt],
+            api_name=False
         )
         in_file.upload(
             fn=show_image,
             inputs=[in_file],
+            outputs=[in_num, in_img, page_range_txt],
+            api_name=False
         )
         in_num.change(
             fn=show_image,
             inputs=[in_file, in_num],
+            outputs=[in_num, in_img, page_range_txt],
+            api_name=False
         )
         def check_page_range(page_range, file):
         page_range_txt.change(
             fn=check_page_range,
             inputs=[page_range_txt, in_file],
+            outputs=[page_range_txt, run_marker_btn],
+            api_name=False
         )
         # Run Marker
         def run_marker_img(filename, page_range, force_ocr, output_format, debug, use_llm, strip_existing_ocr):
+            """
+            Run marker on the given PDF file and return processed results in multiple formats.
+            Args:
+                filename (str): Path to the input PDF file.
+                page_range (str): Page range to process (e.g., "0-5").
+                force_ocr (bool, optional): If True (default), force OCR even on text-based PDFs.
+                output_format (str, optional): Output format. One of: "markdown", "html", "json".
+                    Defaults to "markdown".
+                debug (bool, optional): If True, return additional debug images (rendered page and layout).
+                    Defaults to False.
+                use_llm (bool, optional): If True, use LLM-assisted parsing for better semantic output.
+                    Defaults to False.
+                strip_existing_ocr (bool, optional): If True, strip embedded OCR text and re-run OCR.
+                    Defaults to False.
+            Returns:
+                tuple:
+                    - markdown_result (str): Markdown output string.
+                    - json_result (str): JSON output string.
+                    - html_result (str): HTML output string.
+                    - page_image (dict or None): Rendered image of PDF page (if debug is True, else None).
+                    - layout_image (dict or None): Visualized layout image (if debug is True, else None).
+            """
             cli_options = {
                 "output_format": output_format,
                 "page_range": page_range,
         )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True, ssr_mode=False)

requirements.txt CHANGED Viewed

@@ -1,4 +1,9 @@
 torch==2.5.1
 marker-pdf==1.2.3
-gradio==5.8.0
-huggingface-hub==0.26.3

 torch==2.5.1
 marker-pdf==1.2.3
+gradio[mcp]==5.28.0
+huggingface-hub==0.26.3
+# gradio[mcp] 5.28.0 depends on pydantic>=2.11
+pydantic==2.11.4
+# fix mcp bug https://github.com/modelcontextprotocol/python-sdk/issues/101
+mcp==1.7.1