Spaces:

Uddipan107
/

ocr-reorder-space

Running

Uddipan Basu Bir

Download checkpoint from HF hub in OcrReorderPipeline

fabf362 about 2 months ago

1.85 kB

	import os
	import json
	import base64
	from io import BytesIO
	from PIL import Image
	import gradio as gr

	from inference import OcrReorderPipeline
	from transformers import AutoProcessor, LayoutLMv3Model, AutoTokenizer

	# 1) Load your model + tokenizer + processor as before
	repo = "Uddipan107/ocr-layoutlmv3-base-t5-small"
	model = LayoutLMv3Model.from_pretrained(repo)
	tokenizer = AutoTokenizer.from_pretrained(repo, subfolder="preprocessor")
	processor = AutoProcessor.from_pretrained(repo, subfolder="preprocessor", apply_ocr=False)
	pipe = OcrReorderPipeline(model, tokenizer, processor, device=0)

	def infer(image_path, json_file):
	# 2) Extract the filename user uploaded
	img_name = os.path.basename(image_path)

	# 3) Load the entire JSON; assume it’s a list of entries
	with open(json_file.name, "r", encoding="utf-8") as f:
	data = json.load(f)

	# 4) Find the entry matching this image
	entry = next((e for e in data if e["img_name"] == img_name), None)
	if entry is None:
	return f"❌ No JSON entry found for image '{img_name}'"

	words = entry["src_word_list"]
	boxes = entry["src_wordbox_list"]

	# 5) Read the image, encode to base64 for your pipeline
	img = Image.open(image_path).convert("RGB")
	buf = BytesIO(); img.save(buf, format="PNG")
	b64 = base64.b64encode(buf.getvalue()).decode()

	# 6) Call your pipeline and return the reordered text
	return pipe(b64, words, boxes)[0]

	demo = gr.Interface(
	fn=infer,
	inputs=[
	# get the file path so we can match the filename
	gr.Image(type="filepath", label="Upload Image"),
	# this is the JSON file containing a list of entries
	gr.File(label="Upload JSON file")
	],
	outputs="text",
	title="OCR Reorder (match image → JSON entry)"
	)

	if __name__ == "__main__":
	demo.launch()