import subprocess import sys # Install the 'marker' package from GitHub if not already installed try: import marker except ImportError: subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/VikParuchuri/marker.git"]) # Verify the marker package is installed and check its contents import marker print("Available modules in marker:", dir(marker)) from marker.pdf import PDF # Updated import path import os import gradio as gr def convert_pdf(input_file, output_format): """ Convert a PDF file to the specified format. Args: input_file: Uploaded PDF file. output_format: Desired output format (Markdown, HTML, JSON). Returns: Path to the converted file. """ pdf = PDF(input_file.name) # Initialize the PDF object output_file_path = f"output.{output_format.split(' ')[0].lower()}" if output_format == "Markdown (.md)": with open(output_file_path, "w") as f: f.write(pdf.to_markdown()) elif output_format == "HTML (.html)": with open(output_file_path, "w") as f: f.write(pdf.to_html()) elif output_format == "JSON (.json)": with open(output_file_path, "w") as f: f.write(pdf.to_json()) else: return "Unsupported output format!" return output_file_path output_format_dropdown = gr.inputs.Dropdown( ["Markdown (.md)", "HTML (.html)", "JSON (.json)"], label="Select Output File Format", ) file_input = gr.inputs.File(label="Upload PDF File", type="file") output_file = gr.outputs.File(label="Download Converted File") gr_interface = gr.Interface( fn=convert_pdf, inputs=[file_input, output_format_dropdown], outputs=output_file, title="PDF Converter", description="Upload a PDF file and select the desired output format (Markdown, HTML, or JSON).", ) gr_interface.launch()