import gradio as gr
import torch
from PIL import Image
import cv2
import numpy as np
from huggingface_hub import hf_hub_download

# Load the model from Hugging Face
model_path = hf_hub_download(repo_id="StephanST/WALDO30", filename="WALDO30_yolov8m_640x640.pt")
model = torch.hub.load('ultralytics/yolov8', 'custom', path=model_path)

# Detection function for images
def detect_on_image(image):
    results = model(image)
    results.render()  # Render the bounding boxes on the image
    detected_img = Image.fromarray(results.imgs[0])  # Convert to PIL format
    return detected_img

# Detection function for videos
def detect_on_video(video):
    temp_video_path = "processed_video.mp4"
    cap = cv2.VideoCapture(video)
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(temp_video_path, fourcc, cap.get(cv2.CAP_PROP_FPS), 
                          (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        results = model(frame)  # Run detection
        results.render()
        frame = np.squeeze(results.imgs[0])  # Extract processed frame
        out.write(frame)  # Write frame to output video

    cap.release()
    out.release()
    return temp_video_path

# Create Gradio Interface
image_input = gr.inputs.Image(type="pil", label="Upload Image")
video_input = gr.inputs.Video(type="file", label="Upload Video")

image_output = gr.outputs.Image(type="pil", label="Detected Image")
video_output = gr.outputs.Video(label="Detected Video")

app = gr.Interface(
    fn=[detect_on_image, detect_on_video],
    inputs=[image_input, video_input],
    outputs=[image_output, video_output],
    title="WALDO30 YOLOv8 Object Detection",
    description="Upload an image or video to see object detection results using WALDO30 YOLOv8 model."
)

# Launch the app
if __name__ == "__main__":
    app.launch()