import gradio as gr import torch from PIL import Image import cv2 import numpy as np from huggingface_hub import hf_hub_download # Load the model from Hugging Face model_path = hf_hub_download(repo_id="StephanST/WALDO30", filename="WALDO30_yolov8m_640x640.pt") model = torch.hub.load('ultralytics/yolov8', 'custom', path=model_path) # Detection function for images def detect_on_image(image): results = model(image) results.render() # Render the bounding boxes on the image detected_img = Image.fromarray(results.imgs[0]) # Convert to PIL format return detected_img # Detection function for videos def detect_on_video(video): temp_video_path = "processed_video.mp4" cap = cv2.VideoCapture(video) fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter(temp_video_path, fourcc, cap.get(cv2.CAP_PROP_FPS), (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))) while cap.isOpened(): ret, frame = cap.read() if not ret: break results = model(frame) # Run detection results.render() frame = np.squeeze(results.imgs[0]) # Extract processed frame out.write(frame) # Write frame to output video cap.release() out.release() return temp_video_path # Create Gradio Interface image_input = gr.inputs.Image(type="pil", label="Upload Image") video_input = gr.inputs.Video(type="file", label="Upload Video") image_output = gr.outputs.Image(type="pil", label="Detected Image") video_output = gr.outputs.Video(label="Detected Video") app = gr.Interface( fn=[detect_on_image, detect_on_video], inputs=[image_input, video_input], outputs=[image_output, video_output], title="WALDO30 YOLOv8 Object Detection", description="Upload an image or video to see object detection results using WALDO30 YOLOv8 model." ) # Launch the app if __name__ == "__main__": app.launch()