Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
import os
|
4 |
|
5 |
def convert_pdf(input_file, output_format):
|
6 |
"""
|
7 |
Convert a PDF file to the specified format.
|
8 |
-
|
9 |
Args:
|
10 |
input_file: Uploaded PDF file.
|
11 |
output_format: Desired output format (Markdown, HTML, JSON).
|
@@ -13,21 +13,21 @@ def convert_pdf(input_file, output_format):
|
|
13 |
Returns:
|
14 |
Path to the converted file.
|
15 |
"""
|
|
|
|
|
|
|
16 |
# Check the output format and define the output file path
|
17 |
-
output_file_path = f"output.{output_format.lower()}"
|
18 |
|
19 |
if output_format == "Markdown (.md)":
|
20 |
-
# Placeholder: Replace with actual PDF to Markdown conversion logic
|
21 |
with open(output_file_path, "w") as f:
|
22 |
-
f.write(
|
23 |
elif output_format == "HTML (.html)":
|
24 |
-
# Placeholder: Replace with actual PDF to HTML conversion logic
|
25 |
with open(output_file_path, "w") as f:
|
26 |
-
f.write(
|
27 |
elif output_format == "JSON (.json)":
|
28 |
-
# Placeholder: Replace with actual PDF to JSON conversion logic
|
29 |
with open(output_file_path, "w") as f:
|
30 |
-
f.write(
|
31 |
else:
|
32 |
return "Unsupported output format!"
|
33 |
|
|
|
1 |
import gradio as gr
|
2 |
+
from marker import PDF
|
3 |
import os
|
4 |
|
5 |
def convert_pdf(input_file, output_format):
|
6 |
"""
|
7 |
Convert a PDF file to the specified format.
|
8 |
+
|
9 |
Args:
|
10 |
input_file: Uploaded PDF file.
|
11 |
output_format: Desired output format (Markdown, HTML, JSON).
|
|
|
13 |
Returns:
|
14 |
Path to the converted file.
|
15 |
"""
|
16 |
+
# Ensure input file is processed correctly
|
17 |
+
pdf = PDF(input_file.name)
|
18 |
+
|
19 |
# Check the output format and define the output file path
|
20 |
+
output_file_path = f"output.{output_format.split(' ')[0].lower()}"
|
21 |
|
22 |
if output_format == "Markdown (.md)":
|
|
|
23 |
with open(output_file_path, "w") as f:
|
24 |
+
f.write(pdf.to_markdown())
|
25 |
elif output_format == "HTML (.html)":
|
|
|
26 |
with open(output_file_path, "w") as f:
|
27 |
+
f.write(pdf.to_html())
|
28 |
elif output_format == "JSON (.json)":
|
|
|
29 |
with open(output_file_path, "w") as f:
|
30 |
+
f.write(pdf.to_json())
|
31 |
else:
|
32 |
return "Unsupported output format!"
|
33 |
|