Spaces:

Keemoz0
/

my-table-transformer-structure-recognition

Runtime error

App Files Files Community

my-table-transformer-structure-recognition / app.py

Keemoz0

reaplce tesseract with easyocr

32ccca8 10 months ago

raw

history blame

2.87 kB

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from PIL import Image
	import torch
	import easyocr
	from transformers import AutoImageProcessor, AutoModelForObjectDetection

	# Load the processor and model for table structure recognition
	processor = AutoImageProcessor.from_pretrained("microsoft/table-transformer-structure-recognition")
	model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-structure-recognition")

	# Check if GPU is available and use it; otherwise, use CPU
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	# Initialize EasyOCR Reader
	reader = easyocr.Reader(['en']) # You can specify the language (e.g., 'en' for English)

	# Define the inference and OCR function
	def predict(image):
	# Preprocess the input image
	inputs = processor(images=image, return_tensors="pt").to(device)

	# Perform object detection using the model
	with torch.no_grad():
	outputs = model(**inputs)

	# Extract bounding boxes and filter for columns
	predicted_boxes = outputs.pred_boxes[0].cpu().numpy() # First image
	predicted_classes = outputs.logits.argmax(-1).cpu().numpy() # Class predictions

	# Prepare OCR results
	ocr_results = []

	image_width, image_height = image.size # Get original image dimensions

	# Iterate over detected boxes and perform OCR on columns
	for box in predicted_boxes:
	# Unpack the normalized bounding box (x_min, y_min, x_max, y_max)
	x_min, y_min, x_max, y_max = box

	# Ensure the coordinates are valid (x_max > x_min, y_max > y_min)
	if x_min >= x_max or y_min >= y_max:
	continue

	# Convert normalized coordinates to pixel values
	left = max(int(x_min * image_width), 0)
	top = max(int(y_min * image_height), 0)
	right = min(int(x_max * image_width), image_width)
	bottom = min(int(y_max * image_height), image_height)

	# Double-check that the coordinates are valid after conversion
	if right <= left or bottom <= top:
	continue

	# Crop the image to the bounding box area
	cropped_image = image.crop((left, top, right, bottom))

	# Perform OCR using EasyOCR
	ocr_text = reader.readtext(cropped_image, detail=0) # detail=0 returns just the text

	# Append OCR result for this box
	ocr_results.append({
	"box": [left, top, right, bottom],
	"text": ocr_text
	})

	# Return OCR results
	return {"ocr_results": ocr_results}

	# Set up the Gradio interface
	interface = gr.Interface(
	fn=predict, # The function that gets called when an image is uploaded
	inputs=gr.Image(type="pil"), # Image input (as PIL image)
	outputs="json", # Outputting a JSON with the OCR results
	)

	# Launch the Gradio app
	interface.launch()