taprosoft
commited on
Commit
·
669f293
1
Parent(s):
0ef8ba3
feat: update examples
Browse files- app.py +28 -3
- header.html +4 -0
app.py
CHANGED
@@ -33,6 +33,7 @@ from utils import remove_images_from_markdown, trim_pages
|
|
33 |
TRIMMED_PDF_PATH = Path("/tmp/trimmed_input")
|
34 |
TRIMMED_PDF_PATH.mkdir(exist_ok=True)
|
35 |
DO_WARMUP = os.getenv("DO_WARMUP", "True").lower() == "true"
|
|
|
36 |
|
37 |
|
38 |
def convert_document(path, method, start_page=0, enabled=True):
|
@@ -198,7 +199,9 @@ with gr.Blocks(
|
|
198 |
with gr.Row():
|
199 |
methods = gr.Dropdown(
|
200 |
SUPPORTED_METHODS,
|
201 |
-
label=
|
|
|
|
|
202 |
value=SUPPORTED_METHODS[:2],
|
203 |
multiselect=True,
|
204 |
)
|
@@ -234,6 +237,18 @@ with gr.Blocks(
|
|
234 |
visible=True,
|
235 |
height=800,
|
236 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
with gr.Column(variant="panel", scale=5):
|
239 |
with gr.Tabs():
|
@@ -302,7 +317,19 @@ with gr.Blocks(
|
|
302 |
visualization_sub_tabs.append(visual_sub_tab)
|
303 |
|
304 |
input_file.change(fn=lambda x: x, inputs=input_file, outputs=pdf_preview)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
click_event = convert_btn.click(
|
|
|
|
|
|
|
306 |
fn=show_tabs,
|
307 |
inputs=[methods],
|
308 |
outputs=output_tabs,
|
@@ -329,8 +356,6 @@ with gr.Blocks(
|
|
329 |
return msg
|
330 |
|
331 |
def process_method(input_file, start_page, selected_methods, method=method):
|
332 |
-
if input_file is None:
|
333 |
-
raise ValueError("Please upload a PDF file first!")
|
334 |
return convert_document(
|
335 |
input_file,
|
336 |
method=method,
|
|
|
33 |
TRIMMED_PDF_PATH = Path("/tmp/trimmed_input")
|
34 |
TRIMMED_PDF_PATH.mkdir(exist_ok=True)
|
35 |
DO_WARMUP = os.getenv("DO_WARMUP", "True").lower() == "true"
|
36 |
+
MAX_SELECTED_METHODS = 5
|
37 |
|
38 |
|
39 |
def convert_document(path, method, start_page=0, enabled=True):
|
|
|
199 |
with gr.Row():
|
200 |
methods = gr.Dropdown(
|
201 |
SUPPORTED_METHODS,
|
202 |
+
label=(
|
203 |
+
"Conversion methods " f"(select up-to {MAX_SELECTED_METHODS})"
|
204 |
+
),
|
205 |
value=SUPPORTED_METHODS[:2],
|
206 |
multiselect=True,
|
207 |
)
|
|
|
237 |
visible=True,
|
238 |
height=800,
|
239 |
)
|
240 |
+
with gr.Accordion("Notes", open=False):
|
241 |
+
gr.Markdown(
|
242 |
+
container=False,
|
243 |
+
show_label=False,
|
244 |
+
value=(
|
245 |
+
"- Use the playground for non-sensitive data only.\n"
|
246 |
+
"- Figure and formular extraction might not work properly with some methods (we are working on that!).\n" # noqa
|
247 |
+
"- Some methods (e.g: Gemini) may use external API to process the document.\n" # noqa
|
248 |
+
"- Some methods (e.g: Unstructured) may has longer processing time due to not utilizing GPU resource, be patient!\n" # noqa
|
249 |
+
"- If you have suggestion for better default configuration of current methods, please let us know!\n" # noqa
|
250 |
+
),
|
251 |
+
)
|
252 |
|
253 |
with gr.Column(variant="panel", scale=5):
|
254 |
with gr.Tabs():
|
|
|
317 |
visualization_sub_tabs.append(visual_sub_tab)
|
318 |
|
319 |
input_file.change(fn=lambda x: x, inputs=input_file, outputs=pdf_preview)
|
320 |
+
|
321 |
+
def check_preconditions(input_file, selected_methods):
|
322 |
+
if len(selected_methods) > MAX_SELECTED_METHODS:
|
323 |
+
raise ValueError(
|
324 |
+
"Please select up-to " f"{MAX_SELECTED_METHODS} methods only!"
|
325 |
+
)
|
326 |
+
if input_file is None:
|
327 |
+
raise ValueError("Please upload a PDF file first!")
|
328 |
+
|
329 |
click_event = convert_btn.click(
|
330 |
+
fn=check_preconditions,
|
331 |
+
inputs=[input_file, methods],
|
332 |
+
).success(
|
333 |
fn=show_tabs,
|
334 |
inputs=[methods],
|
335 |
outputs=output_tabs,
|
|
|
356 |
return msg
|
357 |
|
358 |
def process_method(input_file, start_page, selected_methods, method=method):
|
|
|
|
|
359 |
return convert_document(
|
360 |
input_file,
|
361 |
method=method,
|
header.html
CHANGED
@@ -29,6 +29,10 @@
|
|
29 |
font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
|
30 |
'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
|
31 |
">
|
|
|
|
|
|
|
|
|
32 |
PDF Parsers Playground
|
33 |
</h1>
|
34 |
</div>
|
|
|
29 |
font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
|
30 |
'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
|
31 |
">
|
32 |
+
<img style="width: 35px; height: 35px; display: inline; margin: -10px 0px;"
|
33 |
+
src=""
|
34 |
+
alt="chunking-ai"
|
35 |
+
/>
|
36 |
PDF Parsers Playground
|
37 |
</h1>
|
38 |
</div>
|