atlury's picture
Update app.py
34e259e verified
raw
history blame
1.9 kB
import gradio as gr
from ultralytics import YOLO
import cv2
import numpy as np
import os
import requests
import torch
import spaces # Import spaces to use ZeroGPU functionality
# Ensure the model file is in the correct location
model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
if not os.path.exists(model_path):
# Download the model file if it doesn't exist
model_url = "https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
response = requests.get(model_url)
with open(model_path, "wb") as f:
f.write(response.content)
# Load the document segmentation model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
docseg_model = YOLO(model_path).to(device)
@spaces.GPU
def process_image(image):
# Convert image to the format YOLO model expects
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
results = docseg_model(image)
# Extract annotated image from results
annotated_img = results[0].plot()
annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
# Prepare detected areas and labels as text output
detected_areas_labels = "\n".join(
[f"{box.label}: {box.conf:.2f}" for box in results[0].boxes]
)
return annotated_img, detected_areas_labels
# Define the Gradio interface
with gr.Blocks() as interface:
gr.Markdown("### Document Segmentation using YOLOv8")
input_image = gr.Image(type="pil", label="Input Image")
output_image = gr.Image(type="pil", label="Annotated Image")
output_text = gr.Textbox(label="Detected Areas and Labels")
gr.Button("Run").click(
fn=process_image,
inputs=input_image,
outputs=[output_image, output_text]
)
interface.launch()
if __name__ == "__main__":
interface.launch()