Object_Detector / app.py
SatyamSinghal's picture
Update app.py
d763211 verified
raw
history blame
4.53 kB
import gradio as gr
import cv2
import torch
import numpy as np
from PIL import Image
# Load the YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
# Function to run inference on an image
def run_inference(image):
# Convert the image from PIL format to a format compatible with OpenCV
image = np.array(image)
# Run YOLOv5 inference
results = model(image)
# Convert the annotated image from BGR to RGB for display
annotated_image = results.render()[0]
annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
return Image.fromarray(annotated_image)
# Function to generate a summary for the detected objects
def generate_summary(image):
results = model(image)
detected_objects = results.pandas().xyxy[0]
summary = "Detected objects:\n\n"
for idx, obj in detected_objects.iterrows():
summary += f"- {obj['name']} with confidence {obj['confidence']:.2f}\n"
return summary
# Function to generate a scene description based on the summary
def generate_scene_description(summary):
if "person" in summary.lower():
return "This scene might involve people interacting or a social gathering."
elif "car" in summary.lower() or "truck" in summary.lower():
return "This could be a street scene or a transportation-related scenario."
elif "dog" in summary.lower() or "cat" in summary.lower():
return "This appears to involve pets or animals, possibly in a domestic or outdoor setting."
else:
return "This scene involves various objects. It could be a dynamic or static environment."
# Create the Gradio interface with improved UI
with gr.Blocks(css="""
body {
font-family: 'Poppins', sans-serif;
margin: 0;
background: linear-gradient(135deg, #3D52A0, #7091E6, #8697C4, #ADBBDA, #EDE8F5);
background-size: 400% 400%;
animation: gradient-animation 15s ease infinite;
color: #FFFFFF;
}
@keyframes gradient-animation {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
h1 {
text-align: center;
color: #FFFFFF;
font-size: 2.5em;
font-weight: bold;
margin-bottom: 0.5em;
text-shadow: 2px 2px 5px rgba(0, 0, 0, 0.3);
}
footer {
text-align: center;
margin-top: 20px;
padding: 10px;
font-size: 1em;
color: #FFFFFF;
background: rgba(61, 82, 160, 0.8);
border-radius: 8px;
}
.gr-button {
font-size: 1em;
padding: 12px 24px;
background-color: #7091E6;
color: #FFFFFF;
border: none;
border-radius: 5px;
transition: all 0.3s ease-in-out;
}
.gr-button:hover {
background-color: #8697C4;
transform: scale(1.05);
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
}
.gr-box {
background: rgba(255, 255, 255, 0.1);
border: 1px solid rgba(255, 255, 255, 0.3);
border-radius: 10px;
padding: 15px;
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3);
color: #FFFFFF;
}
""") as demo:
with gr.Row():
gr.Markdown("<h1>✨ InsightVision: Detect, Analyze, Summarize ✨</h1>")
with gr.Row():
with gr.Column(scale=2):
image_input = gr.Image(label="Upload Image", type="pil", elem_classes="gr-box")
detect_button = gr.Button("Run Detection", elem_classes="gr-button")
with gr.Column(scale=3):
annotated_image_output = gr.Image(label="Detected Image", type="pil", elem_classes="gr-box")
summary_output = gr.Textbox(label="Detection Summary", lines=10, interactive=False, elem_classes="gr-box")
scene_description_output = gr.Textbox(label="Scene Description", lines=5, interactive=False, elem_classes="gr-box")
# Actions for buttons
def detect_and_process(image):
annotated_image = run_inference(image)
summary = generate_summary(np.array(image))
scene_description = generate_scene_description(summary)
return annotated_image, summary, scene_description
detect_button.click(
fn=detect_and_process,
inputs=[image_input],
outputs=[annotated_image_output, summary_output, scene_description_output]
)
gr.Markdown("<footer>Made with ❤️ using Gradio and YOLOv5 | © 2024 InsightVision</footer>")
# Launch the interface
demo.launch()