Uddipan Basu Bir
Add custom OCR reorder pipeline + Gradio UI
5b9baff
raw
history blame
1.15 kB
import json, base64
from io import BytesIO
from PIL import Image
import gradio as gr
from inference import OcrReorderPipeline
from transformers import (
AutoProcessor,
LayoutLMv3Model,
T5ForConditionalGeneration,
AutoTokenizer
)
import torch
# Load from your model repo
repo = "Uddipan107/ocr-layoutlmv3-base-t5-small"
model = LayoutLMv3Model.from_pretrained(repo)
tokenizer = AutoTokenizer.from_pretrained(repo)
processor = AutoProcessor.from_pretrained(repo, apply_ocr=False)
pipe = OcrReorderPipeline(model, tokenizer, processor, device=0)
def infer(image, words_json, boxes_json):
words = json.loads(words_json)
boxes = json.loads(boxes_json)
buf = BytesIO(); image.save(buf, "PNG")
b64 = base64.b64encode(buf.getvalue()).decode()
# returns a list of strings; take first
return pipe(b64, words, boxes)[0]
demo = gr.Interface(
fn=infer,
inputs=[
gr.Image(type="pil", label="Image"),
gr.Textbox(label="Words (JSON list)"),
gr.Textbox(label="Boxes (JSON list)")
],
outputs="text",
title="OCR Reorder Pipeline"
)
if __name__ == "__main__":
demo.launch()