Uddipan Basu Bir
Download checkpoint from HF hub in OcrReorderPipeline
fabf362
raw
history blame
1.85 kB
import os
import json
import base64
from io import BytesIO
from PIL import Image
import gradio as gr
from inference import OcrReorderPipeline
from transformers import AutoProcessor, LayoutLMv3Model, AutoTokenizer
# 1) Load your model + tokenizer + processor as before
repo = "Uddipan107/ocr-layoutlmv3-base-t5-small"
model = LayoutLMv3Model.from_pretrained(repo)
tokenizer = AutoTokenizer.from_pretrained(repo, subfolder="preprocessor")
processor = AutoProcessor.from_pretrained(repo, subfolder="preprocessor", apply_ocr=False)
pipe = OcrReorderPipeline(model, tokenizer, processor, device=0)
def infer(image_path, json_file):
# 2) Extract the filename user uploaded
img_name = os.path.basename(image_path)
# 3) Load the entire JSON; assume it’s a list of entries
with open(json_file.name, "r", encoding="utf-8") as f:
data = json.load(f)
# 4) Find the entry matching this image
entry = next((e for e in data if e["img_name"] == img_name), None)
if entry is None:
return f"❌ No JSON entry found for image '{img_name}'"
words = entry["src_word_list"]
boxes = entry["src_wordbox_list"]
# 5) Read the image, encode to base64 for your pipeline
img = Image.open(image_path).convert("RGB")
buf = BytesIO(); img.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode()
# 6) Call your pipeline and return the reordered text
return pipe(b64, words, boxes)[0]
demo = gr.Interface(
fn=infer,
inputs=[
# get the file path so we can match the filename
gr.Image(type="filepath", label="Upload Image"),
# this is the JSON file containing a list of entries
gr.File(label="Upload JSON file")
],
outputs="text",
title="OCR Reorder (match image → JSON entry)"
)
if __name__ == "__main__":
demo.launch()