taprosoft commited on
Commit
b7d4a95
·
1 Parent(s): 465d368

fix: add examples

Browse files
app.py CHANGED
@@ -70,9 +70,9 @@ def convert_document(path, method, start_page=0, enabled=True):
70
  text, debug_image_paths = convert_sycamore(path, file_name)
71
  # elif method == "Zerox":
72
  # text, debug_image_paths = convert_zerox(path, file_name)
73
- elif method == "Img2Table":
74
  text, debug_image_paths = convert_img2table(path, file_name)
75
- elif method == "GMFT":
76
  text, debug_image_paths = convert_gmft(path, file_name)
77
  else:
78
  raise ValueError(f"Unsupported method: {method}")
@@ -148,7 +148,7 @@ latex_delimiters = [
148
 
149
  # startup test (also for loading models the first time)
150
  start_startup = time.time()
151
- WARMUP_PDF_PATH = "table.pdf"
152
  SUPPORTED_METHODS = [
153
  "PyMuPDF",
154
  "Docling",
@@ -156,8 +156,8 @@ SUPPORTED_METHODS = [
156
  "MinerU",
157
  "Unstructured",
158
  "Gemini (API)",
159
- "Img2Table",
160
- "GMFT",
161
  "Sycamore",
162
  # "Zerox"
163
  ]
@@ -188,21 +188,15 @@ with gr.Blocks(
188
  ".pdf",
189
  ],
190
  )
191
- with gr.Accordion(
192
- "Advanced settings",
193
- open=False,
194
- ):
195
- start_page = gr.Number(
196
- label="Starting page (only max 5 consecutive pages are processed)",
197
- minimum=1,
198
- maximum=100,
199
- step=1,
200
- value=1,
201
- )
202
- visual_checkbox = gr.Checkbox(
203
- label="Enable debug visualization",
204
- visible=ENABLE_DEBUG_MODE,
205
- value=True,
206
  )
207
  progress_status = gr.Markdown("", show_label=False, container=False)
208
  output_file = gr.File(
@@ -219,6 +213,26 @@ with gr.Blocks(
219
  value=SUPPORTED_METHODS[:2],
220
  multiselect=True,
221
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  with gr.Row():
223
  convert_btn = gr.Button("Convert", variant="primary", scale=2)
224
  clear_btn = gr.ClearButton(value="Clear", scale=1)
@@ -358,7 +372,7 @@ with gr.Blocks(
358
  outputs=visualization_sub_tabs,
359
  )
360
 
361
- demo.launch(
362
  show_error=True,
363
  max_file_size="50mb",
364
  )
 
70
  text, debug_image_paths = convert_sycamore(path, file_name)
71
  # elif method == "Zerox":
72
  # text, debug_image_paths = convert_zerox(path, file_name)
73
+ elif method == "Img2Table (table-only)":
74
  text, debug_image_paths = convert_img2table(path, file_name)
75
+ elif method == "GMFT (table-only)":
76
  text, debug_image_paths = convert_gmft(path, file_name)
77
  else:
78
  raise ValueError(f"Unsupported method: {method}")
 
148
 
149
  # startup test (also for loading models the first time)
150
  start_startup = time.time()
151
+ WARMUP_PDF_PATH = "examples/table.pdf"
152
  SUPPORTED_METHODS = [
153
  "PyMuPDF",
154
  "Docling",
 
156
  "MinerU",
157
  "Unstructured",
158
  "Gemini (API)",
159
+ "Img2Table (table-only)",
160
+ "GMFT (table-only)",
161
  "Sycamore",
162
  # "Zerox"
163
  ]
 
188
  ".pdf",
189
  ],
190
  )
191
+ with gr.Accordion("Examples:"):
192
+ example_root = os.path.join(os.path.dirname(__file__), "examples")
193
+ gr.Examples(
194
+ examples=[
195
+ os.path.join(example_root, _)
196
+ for _ in os.listdir(example_root)
197
+ if _.endswith("pdf")
198
+ ],
199
+ inputs=input_file,
 
 
 
 
 
 
200
  )
201
  progress_status = gr.Markdown("", show_label=False, container=False)
202
  output_file = gr.File(
 
213
  value=SUPPORTED_METHODS[:2],
214
  multiselect=True,
215
  )
216
+ with gr.Row():
217
+ with gr.Accordion(
218
+ "Advanced settings",
219
+ open=False,
220
+ ):
221
+ start_page = gr.Number(
222
+ label=(
223
+ "Starting page (only max 5 "
224
+ "consecutive pages are processed)"
225
+ ),
226
+ minimum=1,
227
+ maximum=100,
228
+ step=1,
229
+ value=1,
230
+ )
231
+ visual_checkbox = gr.Checkbox(
232
+ label="Enable debug visualization",
233
+ visible=ENABLE_DEBUG_MODE,
234
+ value=True,
235
+ )
236
  with gr.Row():
237
  convert_btn = gr.Button("Convert", variant="primary", scale=2)
238
  clear_btn = gr.ClearButton(value="Clear", scale=1)
 
372
  outputs=visualization_sub_tabs,
373
  )
374
 
375
+ demo.queue(default_concurrency_limit=2,).launch(
376
  show_error=True,
377
  max_file_size="50mb",
378
  )
examples/academic_paper_figure.pdf ADDED
Binary file (63.2 kB). View file
 
examples/academic_paper_formula.pdf ADDED
Binary file (42.1 kB). View file
 
examples/complex_layout.pdf ADDED
Binary file (43.1 kB). View file
 
examples/handwriting_form.pdf ADDED
The diff for this file is too large to render. See raw diff
 
examples/invoice.pdf ADDED
Binary file (561 kB). View file
 
examples/magazine_complex_layout.pdf ADDED
Binary file (391 kB). View file
 
table.pdf → examples/table.pdf RENAMED
File without changes