Spaces:

atlury
/

document-layout-comparison

Running on Zero

File size: 1,557 Bytes

b764ffe
 
e134b51
be425b2
682c5ed
ec23149
 
682c5ed
e134b51
 
 
be425b2
ec23149
4dee5e9
6cd21dc
 
e134b51
ec23149
e134b51
6cd21dc
e134b51
be425b2
 
 
 
6cd21dc
 
4dee5e9
6cd21dc
 
e134b51
6cd21dc
 
ec23149
6cd21dc
ec23149
6cd21dc
 
 
 
b764ffe
6cd21dc
 
ec23149
b764ffe
6cd21dc
ec23149

import gradio as gr
from ultralytics import YOLO
import spaces
import torch

# Load pre-trained YOLOv8 model
model = YOLO("yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt")

# Get class names from model
class_names = model.names

@spaces.GPU(duration=60)
def process_image(image):
    try:
        # Process the image
        results = model(source=image, save=False, show_labels=True, show_conf=True, show_boxes=True)
        result = results[0]  # Get the first result

        # Extract annotated image and labels with class names
        annotated_image = result.plot()

        # Convert box.cls tensor to float before formatting
        detected_areas_labels = "\n".join([
            f"{class_names[int(box.cls.item())].upper()}: {box.conf:.2f}" for box in result.boxes
        ])

        return annotated_image, detected_areas_labels
    except Exception as e:
        return None, f"Error processing image: {e}"


# Create the Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Document Segmentation Demo (ZeroGPU)")  
    # Input Components
    input_image = gr.Image(type="pil", label="Upload Image")

    # Output Components
    output_image = gr.Image(type="pil", label="Annotated Image")
    output_text = gr.Textbox(label="Detected Areas and Labels")

    # Button to trigger inference
    btn = gr.Button("Run Document Segmentation")
    btn.click(fn=process_image, inputs=input_image, outputs=[output_image, output_text])

# Launch the demo
demo.queue(max_size=1).launch()  # Queue to handle concurrent requests