from utils import fix_problematic_imports, prepare_env_mineru

fix_problematic_imports()  # noqa
prepare_env_mineru()  # noqa


import time
from pathlib import Path

import gradio as gr
import pymupdf4llm
from gradio_pdf import PDF

from backends import (
    convert_docling,
    convert_marker,
    convert_mineru,
    convert_unstructured,
)
from backends.settings import ENABLE_DEBUG_MODE
from utils import remove_images_from_markdown, trim_pages

TRIMMED_PDF_PATH = Path("/tmp/trimmed_input")
TRIMMED_PDF_PATH.mkdir(exist_ok=True)


def convert_document(path, method, enabled=True):
    if enabled:
        print("Processing file", path, "with method", method)
    else:
        return "", "", "", []

    # benchmarking
    start = time.time()

    path = trim_pages(path, output_path=TRIMMED_PDF_PATH)
    file_name = Path(path).stem
    debug_image_paths = []
    text = "unknown method"

    if method == "Docling":
        text, debug_image_paths = convert_docling(path, file_name)
    elif method == "Marker":
        text, debug_image_paths = convert_marker(path, file_name)
    elif method == "Unstructured":
        text, debug_image_paths = convert_unstructured(path, file_name)
    elif method == "PyMuPDF":
        text = pymupdf4llm.to_markdown(
            path,
            embed_images=True,
        )
    elif method == "MinerU":
        text, debug_image_paths = convert_mineru(path, file_name)

    duration = time.time() - start
    duration_message = f"Conversion with {method} took *{duration:.2f} seconds*"
    print(duration_message)
    return (
        duration_message,
        text,
        remove_images_from_markdown(text),
        debug_image_paths,
    )


def show_tabs(selected_methods):
    visible_tabs = []
    for method in SUPPORTED_METHODS:
        visible_tabs.append(gr.update(visible=method in selected_methods))

    return visible_tabs


latex_delimiters = [
    {"left": "$$", "right": "$$", "display": True},
    {"left": "$", "right": "$", "display": False},
]

# startup test (also for loading models the first time)
start_startup = time.time()
WARMUP_PDF_PATH = "table.pdf"
SUPPORTED_METHODS = ["Docling", "Marker", "Unstructured", "MinerU", "PyMuPDF"]

print("Warm-up sequence")
for method in SUPPORTED_METHODS:
    for _ in range(1):
        convert_document(WARMUP_PDF_PATH, method)
startup_duration = time.time() - start_startup
print(f"Total start-up time: {startup_duration:.2f} seconds")

with gr.Blocks(
    theme=gr.themes.Ocean(),
) as demo:
    with open("header.html", "r") as file:
        header = file.read()
    gr.HTML(header)
    output_components = []
    output_tabs = []
    visualization_sub_tabs = []
    first_method = SUPPORTED_METHODS[0]

    with gr.Row():
        with gr.Column(variant="panel", scale=5):
            input_file = gr.File(
                label="Upload PDF document",
                file_types=[
                    ".pdf",
                ],
            )
            progress_status = gr.Markdown("", show_label=False, container=False)

        with gr.Column(variant="panel", scale=5):
            with gr.Row():
                methods = gr.Dropdown(
                    SUPPORTED_METHODS,
                    label="Conversion methods",
                    value=first_method,
                    multiselect=True,
                )
            with gr.Row():
                visual_checkbox = gr.Checkbox(
                    label="Enable debug visualizations",
                    visible=ENABLE_DEBUG_MODE,
                    value=True,
                )
            with gr.Row():
                convert_btn = gr.Button("Convert", variant="primary", scale=2)
                clear_btn = gr.ClearButton(value="Clear", scale=1)

    with gr.Row():
        with gr.Column(variant="panel", scale=5):
            pdf_preview = PDF(
                label="PDF preview",
                interactive=False,
                visible=True,
                height=800,
            )

        with gr.Column(variant="panel", scale=5):
            with gr.Tabs():
                for method in SUPPORTED_METHODS:
                    with gr.Tab(method, visible=False) as output_tab:
                        with gr.Tabs():
                            with gr.Tab("Markdown rendering"):
                                markdown_render = gr.Markdown(
                                    label="Markdown rendering",
                                    height=900,
                                    show_copy_button=True,
                                    line_breaks=True,
                                    latex_delimiters=latex_delimiters,
                                )
                            with gr.Tab(
                                "Debug visualizations",
                                visible=ENABLE_DEBUG_MODE,
                            ) as visual_sub_tab:
                                debug_images = gr.Gallery(
                                    show_label=False,
                                    container=False,
                                    interactive=False,
                                )
                            with gr.Tab("Raw text"):
                                markdown_text = gr.TextArea(
                                    lines=45, show_label=False, container=False
                                )
                            with gr.Tab("Reference"):
                                output_description = gr.Markdown(
                                    container=False,
                                    show_label=False,
                                )

                    output_components.extend(
                        [
                            output_description,
                            markdown_render,
                            markdown_text,
                            debug_images,
                        ]
                    )
                    output_tabs.append(output_tab)
                    visualization_sub_tabs.append(visual_sub_tab)

    input_file.change(fn=lambda x: x, inputs=input_file, outputs=pdf_preview)
    click_event = convert_btn.click(
        fn=show_tabs,
        inputs=[methods],
        outputs=output_tabs,
    )
    for idx, method in enumerate(SUPPORTED_METHODS):

        def progress_message(selected_methods, method=method):
            selected_methods_indices = [
                idx
                for idx, current_method in enumerate(SUPPORTED_METHODS)
                if current_method in selected_methods
            ]
            try:
                current_method_idx = selected_methods_indices.index(
                    SUPPORTED_METHODS.index(method)
                )
                msg = (
                    f"Processing ({current_method_idx + 1} / "
                    f"{len(selected_methods)}) **{method}**...\n\n"
                )
            except ValueError:
                msg = gr.update()

            return msg

        def process_method(input_file, selected_methods, method=method):
            if input_file is None:
                raise ValueError("Please upload a PDF file first!")
            return convert_document(
                input_file, method=method, enabled=method in selected_methods
            )

        click_event = click_event.then(
            fn=lambda methods, method=method: progress_message(methods, method),
            inputs=[methods],
            outputs=[progress_status],
        ).then(
            fn=lambda input_file, methods, method=method: process_method(
                input_file, methods, method
            ),
            inputs=[input_file, methods],
            outputs=output_components[idx * 4 : (idx + 1) * 4],
        )

    click_event.then(
        lambda: "All tasks completed.",
        outputs=[progress_status],
    )

    clear_btn.add(
        [
            input_file,
            pdf_preview,
        ]
        + output_components
    )

    visual_checkbox.change(
        fn=lambda state: [gr.update(visible=state)] * len(visualization_sub_tabs),
        inputs=visual_checkbox,
        outputs=visualization_sub_tabs,
    )

    demo.launch(show_error=True)