Spaces:

atlury
/

document-layout-comparison

Running on Zero

File size: 2,180 Bytes

b764ffe
 
 
 
73cd058
b764ffe
db520f8
682c5ed
79b9485
 
 
 
 
 
 
682c5ed
73cd058
4dee5e9
b764ffe
 
 
 
79b9485
 
 
 
 
 
 
 
 
b764ffe
682c5ed
 
 
79b9485
b764ffe
 
4dee5e9
 
 
682c5ed
79b9485
 
682c5ed
 
4dee5e9
 
 
682c5ed
4dee5e9
 
 
 
682c5ed
4dee5e9
 
 
b764ffe
 
 
682c5ed

import gradio as gr
from ultralytics import YOLO
import cv2
import numpy as np
import os
import requests
import torch
import huggingface_hub
from accelerate import Accelerator
from huggingface_hub import notebook_login  # Added this for HF login
from huggingface_hub.utils import HfHubHTTPError  # Added this to catch HF login errors
# Initialize Hugging Face Hub login
notebook_login()
# Initialize Accelerator
accelerator = Accelerator()


# Load the model file
model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
if not os.path.exists(model_path):
    # Download the model file if it doesn't exist
    model_url = "https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
    try:
        response = requests.get(model_url)
        with open(model_path, "wb") as f:
            f.write(response.content)
    except HfHubHTTPError as e:
        if e.response.status_code == 401:
            print("Authentication error. Please login to Hugging Face Hub.")
        else:
            raise e
# Load the document segmentation model
docseg_model = YOLO(model_path) 


docseg_model = accelerator.prepare(docseg_model)

def process_image(image):
    try:
        # Convert image to the format YOLO model expects
        image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

        # Move image to accelerator
        image = torch.from_numpy(image).to(accelerator.device)

        results = docseg_model.predict(image)
        result = results[0]  # Get the first (and usually only) result
        
        # Extract annotated image from results
        annotated_img = result.plot() 
        annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)

        # Prepare detected areas and labels as text output
        detected_areas_labels = "\n".join(
            [f"{box.label.upper()}: {box.conf:.2f}" for box in result.boxes]
        )
    except Exception as e:
        return None, f"Error during processing: {e}"  # Error handling

    return annotated_img, detected_areas_labels

# The rest of the code remains the same (Gradio interface)