Spaces:
Sleeping
Sleeping
import gradio as gr | |
import cv2 | |
import torch | |
import numpy as np | |
from PIL import Image | |
# Load the YOLOv5 model | |
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) | |
# Function to run inference on an image | |
def run_inference(image): | |
# Convert the image from PIL format to a format compatible with OpenCV | |
image = np.array(image) | |
# Run YOLOv5 inference | |
results = model(image) | |
# Convert the annotated image from BGR to RGB for display | |
annotated_image = results.render()[0] | |
annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB) | |
return Image.fromarray(annotated_image) | |
# Function to generate a summary for the detected objects | |
def generate_summary(image): | |
results = model(image) | |
detected_objects = results.pandas().xyxy[0] | |
summary = "Detected objects:\n\n" | |
for idx, obj in detected_objects.iterrows(): | |
summary += f"- {obj['name']} with confidence {obj['confidence']:.2f}\n" | |
return summary | |
# Function to generate a scene description based on the summary | |
def generate_scene_description(summary): | |
if "person" in summary.lower(): | |
return "This scene might involve people interacting or a social gathering." | |
elif "car" in summary.lower() or "truck" in summary.lower(): | |
return "This could be a street scene or a transportation-related scenario." | |
elif "dog" in summary.lower() or "cat" in summary.lower(): | |
return "This appears to involve pets or animals, possibly in a domestic or outdoor setting." | |
else: | |
return "This scene involves various objects. It could be a dynamic or static environment." | |
# Create the Gradio interface with improved UI | |
with gr.Blocks(css=""" | |
body { | |
font-family: 'Poppins', sans-serif; | |
margin: 0; | |
background: linear-gradient(135deg, #3D52A0, #7091E6, #8697C4, #ADBBDA, #EDE8F5); | |
background-size: 400% 400%; | |
animation: gradient-animation 15s ease infinite; | |
color: #FFFFFF; | |
} | |
@keyframes gradient-animation { | |
0% { background-position: 0% 50%; } | |
50% { background-position: 100% 50%; } | |
100% { background-position: 0% 50%; } | |
} | |
h1 { | |
text-align: center; | |
color: #FFFFFF; | |
font-size: 2.5em; | |
font-weight: bold; | |
margin-bottom: 0.5em; | |
text-shadow: 2px 2px 5px rgba(0, 0, 0, 0.3); | |
} | |
footer { | |
text-align: center; | |
margin-top: 20px; | |
padding: 10px; | |
font-size: 1em; | |
color: #FFFFFF; | |
background: rgba(61, 82, 160, 0.8); | |
border-radius: 8px; | |
} | |
.gr-button { | |
font-size: 1em; | |
padding: 12px 24px; | |
background-color: #7091E6; | |
color: #FFFFFF; | |
border: none; | |
border-radius: 5px; | |
transition: all 0.3s ease-in-out; | |
} | |
.gr-button:hover { | |
background-color: #8697C4; | |
transform: scale(1.05); | |
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2); | |
} | |
.gr-box { | |
background: rgba(255, 255, 255, 0.1); | |
border: 1px solid rgba(255, 255, 255, 0.3); | |
border-radius: 10px; | |
padding: 15px; | |
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3); | |
color: #FFFFFF; | |
} | |
""") as demo: | |
with gr.Row(): | |
gr.Markdown("<h1>✨ InsightVision: Detect, Analyze, Summarize ✨</h1>") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
image_input = gr.Image(label="Upload Image", type="pil", elem_classes="gr-box") | |
detect_button = gr.Button("Run Detection", elem_classes="gr-button") | |
with gr.Column(scale=3): | |
annotated_image_output = gr.Image(label="Detected Image", type="pil", elem_classes="gr-box") | |
summary_output = gr.Textbox(label="Detection Summary", lines=10, interactive=False, elem_classes="gr-box") | |
scene_description_output = gr.Textbox(label="Scene Description", lines=5, interactive=False, elem_classes="gr-box") | |
# Actions for buttons | |
def detect_and_process(image): | |
annotated_image = run_inference(image) | |
summary = generate_summary(np.array(image)) | |
scene_description = generate_scene_description(summary) | |
return annotated_image, summary, scene_description | |
detect_button.click( | |
fn=detect_and_process, | |
inputs=[image_input], | |
outputs=[annotated_image_output, summary_output, scene_description_output] | |
) | |
gr.Markdown("<footer>Made with ❤️ using Gradio and YOLOv5 | © 2024 InsightVision</footer>") | |
# Launch the interface | |
demo.launch() |