Spaces:

Keemoz0
/

my-table-transformer-structure-recognition

Runtime error

my-table-transformer-structure-recognition

File size: 1,505 Bytes

cb14db8
eac3912
becb4f1
cb14db8
5c56c76
 
 
 
fa8646f
5c56c76
 
fa8646f
5c56c76
 
 
 
 
fa8646f
5c56c76
eac3912
becb4f1
 
eac3912
 
becb4f1
 
 
a6fc7d1
5c56c76
 
 
 
eac3912
5c56c76
 
 
f58ee97

import gradio as gr
from transformers import AutoImageProcessor, AutoModelForObjectDetection
import torch

# Load the processor and model for table structure recognition
processor = AutoImageProcessor.from_pretrained("microsoft/table-transformer-structure-recognition")
model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-structure-recognition")

# Define the inference function
def predict(image):
    # Preprocess the input image
    inputs = processor(images=image, return_tensors="pt")
    
    # Perform object detection using the model
    with torch.no_grad():
        outputs = model(**inputs)

    # Extract bounding boxes and class labels
    predicted_boxes = outputs.pred_boxes[0].cpu().numpy()  # First image
    predicted_classes = outputs.logits.argmax(-1).cpu().numpy()  # Class predictions

    # Log the relevant information (class IDs and bounding boxes)
    print("Predicted Classes (IDs):", predicted_classes)
    print("Bounding Boxes (x1, y1, x2, y2):", predicted_boxes)

    # Return the bounding boxes and class IDs for display in JSON
    return {"predicted_boxes": predicted_boxes.tolist(), "predicted_classes": predicted_classes.tolist()}

# Set up the Gradio interface
interface = gr.Interface(
    fn=predict,  # The function that gets called when an image is uploaded
    inputs=gr.Image(type="pil"),  # Image input (as PIL image)
    outputs="json",  # Outputting a JSON with the boxes and classes
)

# Launch the Gradio app
interface.launch()