Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,69 +1,21 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
# Extract image data (e.g., save as image, convert to base64)
|
23 |
-
images.append(element)
|
24 |
-
|
25 |
-
# Implement table extraction logic (e.g., using heuristics or advanced techniques)
|
26 |
-
# ...
|
27 |
-
|
28 |
-
# Convert extracted data to desired format and populate download_data
|
29 |
-
if output_format == "JSON":
|
30 |
-
json_data = {
|
31 |
-
"text": text,
|
32 |
-
"tables": tables, # Implement table conversion to JSON
|
33 |
-
"images": images # Implement image conversion to JSON (e.g., base64)
|
34 |
-
}
|
35 |
-
download_data = json.dumps(json_data).encode("utf-8") # Encode JSON for download
|
36 |
-
|
37 |
-
elif output_format == "Markdown":
|
38 |
-
markdown_text = f"# Extracted Text\n\n{text}\n\n# Tables\n"
|
39 |
-
# Implement table conversion to Markdown
|
40 |
-
# ...
|
41 |
-
markdown_text += "\n# Images\n"
|
42 |
-
# Implement image conversion to Markdown (e.g., embedding images)
|
43 |
-
# ...
|
44 |
-
download_data = markdown_text.encode("utf-8") # Encode Markdown for download
|
45 |
-
|
46 |
-
elif output_format == "HTML":
|
47 |
-
html_text = f"<p>{text}</p>\n\n<h2>Tables</h2>\n"
|
48 |
-
# Implement table conversion to HTML
|
49 |
-
# ...
|
50 |
-
html_text += "<h2>Images</h2>\n"
|
51 |
-
# Implement image conversion to HTML (e.g., embedding images)
|
52 |
-
# ...
|
53 |
-
download_data = html_text.encode("utf-8") # Encode HTML for download
|
54 |
-
|
55 |
-
return text, download_data
|
56 |
-
|
57 |
-
iface = gr.Interface(
|
58 |
-
fn=parse_pdf,
|
59 |
-
inputs=["file", gr.Dropdown(["JSON", "Markdown", "HTML"])],
|
60 |
-
outputs=[
|
61 |
-
gr.Text(label="Output Text"),
|
62 |
-
gr.File(label="Download Output")
|
63 |
-
],
|
64 |
-
title="PDF Parser",
|
65 |
-
description="Parse a PDF and choose the output format."
|
66 |
-
)
|
67 |
-
|
68 |
-
if __name__ == "__main__":
|
69 |
-
iface.launch(share=True) # Set share=True to create a public link
|
|
|
1 |
+
To create a public link, set `share=True` in `launch()`.
|
2 |
+
Traceback (most recent call last):
|
3 |
+
File "/usr/local/lib/python3.10/site-packages/gradio/queueing.py", line 624, in process_events
|
4 |
+
response = await route_utils.call_process_api(
|
5 |
+
File "/usr/local/lib/python3.10/site-packages/gradio/route_utils.py", line 323, in call_process_api
|
6 |
+
output = await app.get_blocks().process_api(
|
7 |
+
File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 2043, in process_api
|
8 |
+
result = await self.call_function(
|
9 |
+
File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1590, in call_function
|
10 |
+
prediction = await anyio.to_thread.run_sync( # type: ignore
|
11 |
+
File "/usr/local/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
|
12 |
+
return await get_async_backend().run_sync_in_worker_thread(
|
13 |
+
File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2505, in run_sync_in_worker_thread
|
14 |
+
return await future
|
15 |
+
File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 1005, in run
|
16 |
+
result = context.run(func, *args)
|
17 |
+
File "/usr/local/lib/python3.10/site-packages/gradio/utils.py", line 865, in wrapper
|
18 |
+
response = f(*args, **kwargs)
|
19 |
+
File "/home/user/app/app.py", line 35, in parse_pdf
|
20 |
+
download_data = json.dumps(json_data).encode("utf-8") # Encode JSON for download
|
21 |
+
NameError: name 'json' is not defined
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|