import os
import json
import base64
from io import BytesIO
from PIL import Image
import gradio as gr

from inference import OcrReorderPipeline
from transformers import AutoProcessor, LayoutLMv3Model, AutoTokenizer

# ── 1) Load model + tokenizer + processor ─────────────────────────
repo      = "Uddipan107/ocr-layoutlmv3-base-t5-small"
model     = LayoutLMv3Model.from_pretrained(repo)
tokenizer = AutoTokenizer.from_pretrained(repo, subfolder="preprocessor")
processor = AutoProcessor.from_pretrained(repo, subfolder="preprocessor", apply_ocr=False)
pipe      = OcrReorderPipeline(model, tokenizer, processor, device=0)

# ── 2) Inference function ──────────────────────────────────────────
def infer(image_path, json_file):
    img_name = os.path.basename(image_path)

    # Parse NDJSON entries from uploaded file
    data = []
    with open(json_file.name, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            data.append(json.loads(line))

    # Find matching entry for this image
    entry = next((e for e in data if e["img_name"] == img_name), None)
    if entry is None:
        return f"❌ No JSON entry found for image '{img_name}'"

    words = entry["src_word_list"]
    boxes = entry["src_wordbox_list"]

    # Read and encode image to base64
    img = Image.open(image_path).convert("RGB")
    buf = BytesIO()
    img.save(buf, format="PNG")
    b64 = base64.b64encode(buf.getvalue()).decode()

    # Call pipeline with `inputs` keyword plus extra args
    reordered = pipe(inputs=b64, words=words, boxes=boxes)[0]
    return reordered

# ── 3) Gradio interface ─────────────────────────────────────────────
demo = gr.Interface(
    fn=infer,
    inputs=[
        gr.Image(type="filepath", label="Upload Image"),
        gr.File(label="Upload JSON (NDJSON)")
    ],
    outputs="text",
    title="OCR Reorder Pipeline"
)

if __name__ == "__main__":
    # set share=True if you want a public link
    demo.launch()