Spaces:

atlury
/

document-layout-comparison

Running on Zero

File size: 2,197 Bytes

b764ffe
 
 
 
73cd058
b764ffe
db520f8
73cd058
4dee5e9
b764ffe
 
 
 
 
 
 
 
 
b44991c
4dee5e9
b764ffe
 
4dee5e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b764ffe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4dee5e9

import gradio as gr
from ultralytics import YOLO
import cv2
import numpy as np
import os
import requests
import torch

# Load the model file
model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
if not os.path.exists(model_path):
    # Download the model file if it doesn't exist
    model_url = "https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
    response = requests.get(model_url)
    with open(model_path, "wb") as f:
        f.write(response.content)

# Load the document segmentation model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
docseg_model = YOLO(model_path)  # Remove .to(device) to let ultralytics auto-detect

def process_image(image):
    try:
        # Convert image to the format YOLO model expects
        image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
        results = docseg_model.predict(image)  # Use predict for inference
        result = results[0]  # Get the first (and usually only) result
        
        # Extract annotated image from results
        annotated_img = result.plot()  # Simplified plotting
        annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)

        # Prepare detected areas and labels as text output
        detected_areas_labels = "\n".join(
            [f"{box.label.upper()}: {box.conf:.2f}" for box in result.boxes]  # Uppercase labels
        )
    except Exception as e:
        return None, f"Error during processing: {e}"  # Error handling

    return annotated_img, detected_areas_labels

# Define the Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("### Document Segmentation using YOLOv8")
    input_image = gr.Image(type="pil", label="Input Image")
    output_image = gr.Image(type="pil", label="Annotated Image")
    output_text = gr.Textbox(label="Detected Areas and Labels")

    gr.Button("Run").click(
        fn=process_image,
        inputs=input_image,
        outputs=[output_image, output_text]
    )

# Launch the interface (remove the conditional launch)
interface.launch(share=True)  # Allow sharing for easier debugging