Spaces:

Uddipan107
/

ocr-reorder-space

Sleeping

Uddipan Basu Bir

Add custom OCR reorder pipeline + Gradio UI

5b9baff 2 months ago

1.15 kB

	import json, base64
	from io import BytesIO
	from PIL import Image
	import gradio as gr
	from inference import OcrReorderPipeline
	from transformers import (
	AutoProcessor,
	LayoutLMv3Model,
	T5ForConditionalGeneration,
	AutoTokenizer
	)
	import torch

	# Load from your model repo
	repo = "Uddipan107/ocr-layoutlmv3-base-t5-small"
	model = LayoutLMv3Model.from_pretrained(repo)
	tokenizer = AutoTokenizer.from_pretrained(repo)
	processor = AutoProcessor.from_pretrained(repo, apply_ocr=False)
	pipe = OcrReorderPipeline(model, tokenizer, processor, device=0)

	def infer(image, words_json, boxes_json):
	words = json.loads(words_json)
	boxes = json.loads(boxes_json)
	buf = BytesIO(); image.save(buf, "PNG")
	b64 = base64.b64encode(buf.getvalue()).decode()
	# returns a list of strings; take first
	return pipe(b64, words, boxes)[0]

	demo = gr.Interface(
	fn=infer,
	inputs=[
	gr.Image(type="pil", label="Image"),
	gr.Textbox(label="Words (JSON list)"),
	gr.Textbox(label="Boxes (JSON list)")
	],
	outputs="text",
	title="OCR Reorder Pipeline"
	)

	if __name__ == "__main__":
	demo.launch()