Spaces:

atlury
/

document-layout-comparison

Running on Zero

File size: 3,354 Bytes

b764ffe
 
 
 
73cd058
b764ffe
db520f8
b764ffe
 
b44991c
73cd058
0d5e9d4
 
 
 
 
 
b764ffe
 
 
 
 
 
 
 
 
 
b44991c
 
b764ffe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d5e9d4
b44991c
0d5e9d4
 
 
 
 
 
 
 
 
 
 
db520f8
0d5e9d4
 
 
 
 
 
 
 
 
 
 
 
 
 
db520f8
0d5e9d4
 
492a9fd
0d5e9d4
 
 
 
 
492a9fd
b764ffe
 
 
 
 
 
 
 
 
 
 
 
 
0d5e9d4
b764ffe

import gradio as gr
from ultralytics import YOLO
import cv2
import numpy as np
import os
import requests
import torch
import datetime
import subprocess
import spaces  # Ensure this import is correct and the module is available

CUSTOM_CSS = """
#output_box textarea {
    font-family: IBM Plex Mono, ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
}
"""

# Ensure the model file is in the correct location
model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
if not os.path.exists(model_path):
    # Download the model file if it doesn't exist
    model_url = "https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
    response = requests.get(model_url)
    with open(model_path, "wb") as f:
        f.write(response.content)

# Load the document segmentation model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
docseg_model = YOLO(model_path).to(device)

def process_image(image):
    # Convert image to the format YOLO model expects
    image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    results = docseg_model(image)

    # Extract annotated image from results
    annotated_img = results[0].plot()
    annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)

    # Prepare detected areas and labels as text output
    detected_areas_labels = "\n".join(
        [f"{box.label}: {box.conf:.2f}" for box in results[0].boxes]
    )

    return annotated_img, detected_areas_labels

zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cuda:0' if GPU is available, otherwise 'cpu'

@spaces.GPU
def run_gpu() -> str:
    print(zero.device) # <-- 'cuda:0' 🤗
    output: str = ""
    try:
        output = subprocess.check_output(["nvidia-smi"], text=True)
    except FileNotFoundError:
        output = "nvidia-smi failed"
    comment = (
        datetime.datetime.now().replace(microsecond=0).isoformat().replace("T", " ")
    )
    return f"# {comment}\n\n{output}"

def run(check: bool) -> str:
    if check:
        return run_gpu()
    else:
        comment = (
            datetime.datetime.now().replace(microsecond=0).isoformat().replace("T", " ")
        )
        return f"# {comment}\n\nThis is running on CPU\n\nClick on 'Run on GPU' below to move to GPU instantly and run nvidia-smi"

output = gr.Textbox(
    label="Command Output", max_lines=32, elem_id="output_box", value=run(False)
)

with gr.Blocks(css=CUSTOM_CSS) as demo:
    gr.Markdown("#### `zero-gpu`: how to run on serverless GPU for free on Spaces 🔥")

    output.render()

    check = gr.Checkbox(label="Run on GPU")

    check.change(run, inputs=[check], outputs=output, every=1)

# Define the Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("### Document Segmentation using YOLOv8")
    input_image = gr.Image(type="pil", label="Input Image")
    output_image = gr.Image(type="pil", label="Annotated Image")
    output_text = gr.Textbox(label="Detected Areas and Labels")

    gr.Button("Run").click(
        fn=process_image,
        inputs=input_image,
        outputs=[output_image, output_text]
    )

demo.queue().launch(show_api=False)
interface.launch()

if __name__ == "__main__":
    demo.launch()
    interface.launch()