Spaces:

atlury
/

document-layout-comparison

Sleeping

File size: 1,787 Bytes

b764ffe
 
73cd058
682c5ed
6cd21dc
 
 
682c5ed
6cd21dc
 
 
 
 
b764ffe
6cd21dc
4dee5e9
6cd21dc
 
682c5ed
6cd21dc
 
 
4dee5e9
6cd21dc
 
4dee5e9
682c5ed
4dee5e9
6cd21dc
 
4dee5e9
6cd21dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b764ffe
6cd21dc
 
 
b764ffe
6cd21dc

import gradio as gr
from ultralytics import YOLO
import os

# Load pre-trained YOLOv8 models
docseg_model1 = YOLO("yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt")
docseg_model2 = YOLO("path/to/your/second/model.pt")  # Replace with your second model's path

# Available models
MODELS = {
    "DocLayNet YOLOv8": docseg_model1,
    # "Your Second Model": docseg_model2  # Uncomment and add more as needed
}

def process_image(image, model_name):
    try:
        # Select the model
        model = MODELS[model_name]

        # Process the image
        results = model(source=image, save=False, show_labels=True, show_conf=True, show_boxes=True)
        result = results[0]
        
        # Extract the annotated image and the labels/confidence scores
        annotated_image = result.plot()
        detected_areas_labels = "\n".join(
            [f"{box.label.upper()}: {box.conf:.2f}" for box in result.boxes]
        )

        return annotated_image, detected_areas_labels
    except Exception as e:
        return None, f"Error processing image: {e}"
    


# Create the Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Document Segmentation Demo")
    
    # Input Components
    with gr.Row():
        input_image = gr.Image(type="pil", label="Upload Image")
        model_dropdown = gr.Dropdown(list(MODELS.keys()), label="Select Model", value=list(MODELS.keys())[0])

    # Output Components
    output_image = gr.Image(type="pil", label="Annotated Image")
    output_text = gr.Textbox(label="Detected Areas and Labels")

    # Button to trigger inference
    btn = gr.Button("Run Document Segmentation")
    btn.click(fn=process_image, inputs=[input_image, model_dropdown], outputs=[output_image, output_text])

# Launch the demo
demo.launch()