File size: 4,533 Bytes
13bf6b4
 
 
 
d763211
13bf6b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d763211
2ec5f23
f3b7d6b
 
 
 
 
 
 
 
 
d763211
 
 
 
 
 
 
 
 
 
 
f3b7d6b
 
 
 
d763211
 
 
 
 
 
 
 
 
 
f3b7d6b
d763211
f3b7d6b
d763211
 
 
 
 
f3b7d6b
 
 
 
d763211
 
 
 
 
f3b7d6b
d763211
 
 
 
 
f3b7d6b
d763211
 
f3b7d6b
d763211
 
 
 
f3b7d6b
 
d763211
 
f3b7d6b
d763211
f3b7d6b
d763211
f3b7d6b
 
2ec5f23
d763211
 
2ec5f23
f3b7d6b
d763211
 
f3b7d6b
 
 
d763211
f3b7d6b
 
d763211
 
 
 
 
 
f3b7d6b
d763211
f3b7d6b
d763211
2ec5f23
 
f3b7d6b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gradio as gr
import cv2
import torch
import numpy as np
from PIL import Image

# Load the YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Function to run inference on an image
def run_inference(image):
    # Convert the image from PIL format to a format compatible with OpenCV
    image = np.array(image)

    # Run YOLOv5 inference
    results = model(image)

    # Convert the annotated image from BGR to RGB for display
    annotated_image = results.render()[0]
    annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)

    return Image.fromarray(annotated_image)

# Function to generate a summary for the detected objects
def generate_summary(image):
    results = model(image)
    detected_objects = results.pandas().xyxy[0]
    summary = "Detected objects:\n\n"
    for idx, obj in detected_objects.iterrows():
        summary += f"- {obj['name']} with confidence {obj['confidence']:.2f}\n"
    return summary

# Function to generate a scene description based on the summary
def generate_scene_description(summary):
    if "person" in summary.lower():
        return "This scene might involve people interacting or a social gathering."
    elif "car" in summary.lower() or "truck" in summary.lower():
        return "This could be a street scene or a transportation-related scenario."
    elif "dog" in summary.lower() or "cat" in summary.lower():
        return "This appears to involve pets or animals, possibly in a domestic or outdoor setting."
    else:
        return "This scene involves various objects. It could be a dynamic or static environment."

# Create the Gradio interface with improved UI
with gr.Blocks(css="""
    body {
        font-family: 'Poppins', sans-serif;
        margin: 0;
        background: linear-gradient(135deg, #3D52A0, #7091E6, #8697C4, #ADBBDA, #EDE8F5);
        background-size: 400% 400%;
        animation: gradient-animation 15s ease infinite;
        color: #FFFFFF;
    }
    @keyframes gradient-animation {
        0% { background-position: 0% 50%; }
        50% { background-position: 100% 50%; }
        100% { background-position: 0% 50%; }
    }
    h1 {
        text-align: center;
        color: #FFFFFF;
        font-size: 2.5em;
        font-weight: bold;
        margin-bottom: 0.5em;
        text-shadow: 2px 2px 5px rgba(0, 0, 0, 0.3);
    }
    footer {
        text-align: center;
        margin-top: 20px;
        padding: 10px;
        font-size: 1em;
        color: #FFFFFF;
        background: rgba(61, 82, 160, 0.8);
        border-radius: 8px;
    }
    .gr-button {
        font-size: 1em;
        padding: 12px 24px;
        background-color: #7091E6;
        color: #FFFFFF;
        border: none;
        border-radius: 5px;
        transition: all 0.3s ease-in-out;
    }
    .gr-button:hover {
        background-color: #8697C4;
        transform: scale(1.05);
        box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
    }
    .gr-box {
        background: rgba(255, 255, 255, 0.1);
        border: 1px solid rgba(255, 255, 255, 0.3);
        border-radius: 10px;
        padding: 15px;
        box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3);
        color: #FFFFFF;
    }
""") as demo:
    with gr.Row():
        gr.Markdown("<h1>✨ InsightVision: Detect, Analyze, Summarize ✨</h1>")

    with gr.Row():
        with gr.Column(scale=2):
            image_input = gr.Image(label="Upload Image", type="pil", elem_classes="gr-box")
            detect_button = gr.Button("Run Detection", elem_classes="gr-button")
        with gr.Column(scale=3):
            annotated_image_output = gr.Image(label="Detected Image", type="pil", elem_classes="gr-box")
            summary_output = gr.Textbox(label="Detection Summary", lines=10, interactive=False, elem_classes="gr-box")
            scene_description_output = gr.Textbox(label="Scene Description", lines=5, interactive=False, elem_classes="gr-box")
    
    # Actions for buttons
    def detect_and_process(image):
        annotated_image = run_inference(image)
        summary = generate_summary(np.array(image))
        scene_description = generate_scene_description(summary)
        return annotated_image, summary, scene_description
    
    detect_button.click(
        fn=detect_and_process,
        inputs=[image_input],
        outputs=[annotated_image_output, summary_output, scene_description_output]
    )

    gr.Markdown("<footer>Made with ❤️ using Gradio and YOLOv5 | © 2024 InsightVision</footer>")

# Launch the interface
demo.launch()