pdf-convert / app.py
sblumenf's picture
Update app.py
919f74f verified
raw
history blame
1.89 kB
import subprocess
import sys
# Install the 'marker' package from GitHub if not already installed
try:
import marker
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/VikParuchuri/marker.git"])
# Verify the marker package is installed and check its contents
import marker
print("Available modules in marker:", dir(marker))
from marker.pdf import PDF # Updated import path
import os
import gradio as gr
def convert_pdf(input_file, output_format):
"""
Convert a PDF file to the specified format.
Args:
input_file: Uploaded PDF file.
output_format: Desired output format (Markdown, HTML, JSON).
Returns:
Path to the converted file.
"""
pdf = PDF(input_file.name) # Initialize the PDF object
output_file_path = f"output.{output_format.split(' ')[0].lower()}"
if output_format == "Markdown (.md)":
with open(output_file_path, "w") as f:
f.write(pdf.to_markdown())
elif output_format == "HTML (.html)":
with open(output_file_path, "w") as f:
f.write(pdf.to_html())
elif output_format == "JSON (.json)":
with open(output_file_path, "w") as f:
f.write(pdf.to_json())
else:
return "Unsupported output format!"
return output_file_path
output_format_dropdown = gr.inputs.Dropdown(
["Markdown (.md)", "HTML (.html)", "JSON (.json)"],
label="Select Output File Format",
)
file_input = gr.inputs.File(label="Upload PDF File", type="file")
output_file = gr.outputs.File(label="Download Converted File")
gr_interface = gr.Interface(
fn=convert_pdf,
inputs=[file_input, output_format_dropdown],
outputs=output_file,
title="PDF Converter",
description="Upload a PDF file and select the desired output format (Markdown, HTML, or JSON).",
)
gr_interface.launch()