Page {i+1}

import gradio as gr
import tempfile
from io import BytesIO
from PIL import Image
from ebooklib import epub
from olmocr.model import process_pdf  # your forked olmocr model

def process_pdf_to_epub(pdf_file, title, author):
    # Run the OCR + LLM pipeline from olmocr
    print("Starting PDF processing...")
    page_results = process_pdf(pdf_file.name)

    # Create the EPUB book
    book = epub.EpubBook()
    book.set_identifier("id123456")
    book.set_title(title if title else "Untitled Document")
    book.set_language("en")
    if author:
        book.add_author(author)

    # Try to use the first page as cover
    try:
        with Image.open(pdf_file.name) as img:
            img.convert("RGB").save("cover.jpg", "JPEG")
            with open("cover.jpg", "rb") as f:
                cover_data = f.read()
                book.set_cover("cover.jpg", cover_data)
    except Exception as e:
        print("Could not generate cover:", e)

    # Add chapters from pages
    chapters = []
    for i, page in enumerate(page_results):
        text = page.get("decoded", {}).get("natural_text", "")
        if not text.strip():
            continue
        safe_text = text.replace("\n", "<br/>")
        chapter = epub.EpubHtml(title=f"Page {i+1}", file_name=f"page_{i+1}.xhtml", lang="en")
        chapter.content = f"<h1>Page {i+1}</h1><p>{safe_text}</p>"
        book.add_item(chapter)
        chapters.append(chapter)

    # Define spine and table of contents
    book.toc = chapters
    book.spine = ["nav"] + chapters
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # Write to in-memory buffer
    output_buffer = BytesIO()
    epub.write_epub(output_buffer, book)
    output_buffer.seek(0)
    return output_buffer

with gr.Blocks() as demo:
    gr.Markdown("# PDF to EPUB Converter\nPowered by `olmOCR`")
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
    title = gr.Textbox(label="EPUB Title", placeholder="Optional title")
    author = gr.Textbox(label="Author", placeholder="Optional author name")
    convert_button = gr.Button("Convert to EPUB")
    epub_output = gr.File(label="Download EPUB", file_types=[".epub"])

    convert_button.click(
        fn=process_pdf_to_epub,
        inputs=[pdf_input, title, author],
        outputs=epub_output
    )

if __name__ == "__main__":
    demo.launch()