taprosoft commited on
Commit
669f293
·
1 Parent(s): 0ef8ba3

feat: update examples

Browse files
Files changed (2) hide show
  1. app.py +28 -3
  2. header.html +4 -0
app.py CHANGED
@@ -33,6 +33,7 @@ from utils import remove_images_from_markdown, trim_pages
33
  TRIMMED_PDF_PATH = Path("/tmp/trimmed_input")
34
  TRIMMED_PDF_PATH.mkdir(exist_ok=True)
35
  DO_WARMUP = os.getenv("DO_WARMUP", "True").lower() == "true"
 
36
 
37
 
38
  def convert_document(path, method, start_page=0, enabled=True):
@@ -198,7 +199,9 @@ with gr.Blocks(
198
  with gr.Row():
199
  methods = gr.Dropdown(
200
  SUPPORTED_METHODS,
201
- label="Conversion methods",
 
 
202
  value=SUPPORTED_METHODS[:2],
203
  multiselect=True,
204
  )
@@ -234,6 +237,18 @@ with gr.Blocks(
234
  visible=True,
235
  height=800,
236
  )
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
  with gr.Column(variant="panel", scale=5):
239
  with gr.Tabs():
@@ -302,7 +317,19 @@ with gr.Blocks(
302
  visualization_sub_tabs.append(visual_sub_tab)
303
 
304
  input_file.change(fn=lambda x: x, inputs=input_file, outputs=pdf_preview)
 
 
 
 
 
 
 
 
 
305
  click_event = convert_btn.click(
 
 
 
306
  fn=show_tabs,
307
  inputs=[methods],
308
  outputs=output_tabs,
@@ -329,8 +356,6 @@ with gr.Blocks(
329
  return msg
330
 
331
  def process_method(input_file, start_page, selected_methods, method=method):
332
- if input_file is None:
333
- raise ValueError("Please upload a PDF file first!")
334
  return convert_document(
335
  input_file,
336
  method=method,
 
33
  TRIMMED_PDF_PATH = Path("/tmp/trimmed_input")
34
  TRIMMED_PDF_PATH.mkdir(exist_ok=True)
35
  DO_WARMUP = os.getenv("DO_WARMUP", "True").lower() == "true"
36
+ MAX_SELECTED_METHODS = 5
37
 
38
 
39
  def convert_document(path, method, start_page=0, enabled=True):
 
199
  with gr.Row():
200
  methods = gr.Dropdown(
201
  SUPPORTED_METHODS,
202
+ label=(
203
+ "Conversion methods " f"(select up-to {MAX_SELECTED_METHODS})"
204
+ ),
205
  value=SUPPORTED_METHODS[:2],
206
  multiselect=True,
207
  )
 
237
  visible=True,
238
  height=800,
239
  )
240
+ with gr.Accordion("Notes", open=False):
241
+ gr.Markdown(
242
+ container=False,
243
+ show_label=False,
244
+ value=(
245
+ "- Use the playground for non-sensitive data only.\n"
246
+ "- Figure and formular extraction might not work properly with some methods (we are working on that!).\n" # noqa
247
+ "- Some methods (e.g: Gemini) may use external API to process the document.\n" # noqa
248
+ "- Some methods (e.g: Unstructured) may has longer processing time due to not utilizing GPU resource, be patient!\n" # noqa
249
+ "- If you have suggestion for better default configuration of current methods, please let us know!\n" # noqa
250
+ ),
251
+ )
252
 
253
  with gr.Column(variant="panel", scale=5):
254
  with gr.Tabs():
 
317
  visualization_sub_tabs.append(visual_sub_tab)
318
 
319
  input_file.change(fn=lambda x: x, inputs=input_file, outputs=pdf_preview)
320
+
321
+ def check_preconditions(input_file, selected_methods):
322
+ if len(selected_methods) > MAX_SELECTED_METHODS:
323
+ raise ValueError(
324
+ "Please select up-to " f"{MAX_SELECTED_METHODS} methods only!"
325
+ )
326
+ if input_file is None:
327
+ raise ValueError("Please upload a PDF file first!")
328
+
329
  click_event = convert_btn.click(
330
+ fn=check_preconditions,
331
+ inputs=[input_file, methods],
332
+ ).success(
333
  fn=show_tabs,
334
  inputs=[methods],
335
  outputs=output_tabs,
 
356
  return msg
357
 
358
  def process_method(input_file, start_page, selected_methods, method=method):
 
 
359
  return convert_document(
360
  input_file,
361
  method=method,
header.html CHANGED
@@ -29,6 +29,10 @@
29
  font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
30
  'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
31
  ">
 
 
 
 
32
  PDF Parsers Playground
33
  </h1>
34
  </div>
 
29
  font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
30
  'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
31
  ">
32
+ <img style="width: 35px; height: 35px; display: inline; margin: -10px 0px;"
33
+ src=""
34
+ alt="chunking-ai"
35
+ />
36
  PDF Parsers Playground
37
  </h1>
38
  </div>