binuser007 commited on
Commit
07dabea
·
verified ·
1 Parent(s): 7d49a5a

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +266 -0
  2. config.py +40 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import tempfile
4
+ import requests
5
+ import base64
6
+ import numpy as np
7
+ import logging
8
+ from dataclasses import dataclass
9
+ from typing import Optional, Union, Tuple
10
+ from PIL import Image
11
+ from io import BytesIO
12
+ from ultralytics import YOLO
13
+ import streamlit as st
14
+ import yt_dlp as youtube_dl
15
+ from config import Config
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+ @dataclass
22
+ class DetectionResult:
23
+ """Data class to store detection results"""
24
+ success: bool
25
+ image: Optional[np.ndarray] = None
26
+ error_message: Optional[str] = None
27
+
28
+ class YOLOModel:
29
+ """Class to handle YOLO model operations"""
30
+ def __init__(self, model_name: str = Config.DEFAULT_MODEL):
31
+ self.model = self._load_model(model_name)
32
+
33
+ def _load_model(self, model_name: str) -> Optional[YOLO]:
34
+ """Load YOLO model with error handling"""
35
+ try:
36
+ return YOLO(model_name)
37
+ except Exception as e:
38
+ logger.error(f"Error loading model: {e}")
39
+ return None
40
+
41
+ def detect_objects(self, image: np.ndarray) -> DetectionResult:
42
+ """Perform object detection on the input image"""
43
+ if self.model is None:
44
+ return DetectionResult(False, error_message="Model not loaded")
45
+
46
+ try:
47
+ results = self.model(image)
48
+ annotated_image = image.copy()
49
+
50
+ for result in results[0].boxes:
51
+ x1, y1, x2, y2 = map(int, result.xyxy[0])
52
+ label = self.model.names[int(result.cls)]
53
+ confidence = result.conf.item()
54
+
55
+ if confidence < Config.CONFIDENCE_THRESHOLD:
56
+ continue
57
+
58
+ cv2.rectangle(
59
+ annotated_image,
60
+ (x1, y1),
61
+ (x2, y2),
62
+ Config.BBOX_COLOR,
63
+ 2
64
+ )
65
+ label_text = f'{label} {confidence:.2f}'
66
+ cv2.putText(
67
+ annotated_image,
68
+ label_text,
69
+ (x1, y1 - 10),
70
+ cv2.FONT_HERSHEY_SIMPLEX,
71
+ Config.FONT_SCALE,
72
+ Config.BBOX_COLOR,
73
+ Config.FONT_THICKNESS
74
+ )
75
+
76
+ return DetectionResult(True, annotated_image)
77
+ except Exception as e:
78
+ logger.error(f"Error during object detection: {e}")
79
+ return DetectionResult(False, error_message=str(e))
80
+
81
+ class ImageProcessor:
82
+ """Class to handle image processing operations"""
83
+ def __init__(self, model: YOLOModel):
84
+ self.model = model
85
+
86
+ def process_image(self, image: Union[Image.Image, str]) -> DetectionResult:
87
+ """Process image from various sources (PIL Image or URL)"""
88
+ try:
89
+ if isinstance(image, str):
90
+ image = self._load_image_from_url(image)
91
+
92
+ if image is None:
93
+ return DetectionResult(False, error_message="Failed to load image")
94
+
95
+ np_image = np.array(image)
96
+ return self.model.detect_objects(np_image)
97
+ except Exception as e:
98
+ logger.error(f"Error processing image: {e}")
99
+ return DetectionResult(False, error_message=str(e))
100
+
101
+ def _load_image_from_url(self, url: str) -> Optional[Image.Image]:
102
+ """Load image from URL with support for base64"""
103
+ try:
104
+ if url.startswith('data:image'):
105
+ header, encoded = url.split(',', 1)
106
+ image_data = base64.b64decode(encoded)
107
+ return Image.open(BytesIO(image_data))
108
+ else:
109
+ response = requests.get(url)
110
+ response.raise_for_status()
111
+ return Image.open(BytesIO(response.content))
112
+ except Exception as e:
113
+ logger.error(f"Error loading image from URL: {e}")
114
+ return None
115
+
116
+ class VideoProcessor:
117
+ """Class to handle video processing operations"""
118
+ def __init__(self, model: YOLOModel):
119
+ self.model = model
120
+ os.makedirs(Config.TEMP_DIR, exist_ok=True)
121
+
122
+ def process_video(self, input_path: str) -> Tuple[bool, Optional[str]]:
123
+ """Process video file and return path to processed video"""
124
+ try:
125
+ cap = cv2.VideoCapture(input_path)
126
+ if not cap.isOpened():
127
+ return False, "Cannot open video file"
128
+
129
+ output_path = os.path.join(Config.TEMP_DIR, "processed_video.mp4")
130
+ self._setup_video_writer(cap, output_path)
131
+
132
+ while True:
133
+ ret, frame = cap.read()
134
+ if not ret:
135
+ break
136
+
137
+ result = self.model.detect_objects(frame)
138
+ if result.success:
139
+ self.writer.write(result.image)
140
+
141
+ cap.release()
142
+ self.writer.release()
143
+ return True, output_path
144
+ except Exception as e:
145
+ logger.error(f"Error processing video: {e}")
146
+ return False, str(e)
147
+
148
+ def _setup_video_writer(self, cap: cv2.VideoCapture, output_path: str):
149
+ """Set up video writer with input video properties"""
150
+ frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
151
+ frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
152
+ fps = cap.get(cv2.CAP_PROP_FPS)
153
+ fourcc = cv2.VideoWriter_fourcc(*Config.VIDEO_OUTPUT_FORMAT)
154
+ self.writer = cv2.VideoWriter(
155
+ output_path,
156
+ fourcc,
157
+ fps,
158
+ (frame_width, frame_height)
159
+ )
160
+
161
+ def download_youtube_video(youtube_url: str) -> Optional[str]:
162
+ """Download YouTube video and return path to downloaded file"""
163
+ try:
164
+ temp_dir = tempfile.gettempdir()
165
+ output_path = os.path.join(temp_dir, 'downloaded_video.mp4')
166
+ ydl_opts = {
167
+ 'format': 'best',
168
+ 'outtmpl': output_path
169
+ }
170
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
171
+ ydl.download([youtube_url])
172
+ return output_path
173
+ except Exception as e:
174
+ logger.error(f"Failed to retrieve video from YouTube: {e}")
175
+ return None
176
+
177
+ def main():
178
+ """Main application function"""
179
+ st.title("MULTIMEDIA OBJECT DETECTION USING YOLO")
180
+
181
+ # Model selection with description
182
+ st.subheader("Model Selection")
183
+ model_choice = st.selectbox(
184
+ "Select YOLO Model",
185
+ options=Config.AVAILABLE_MODELS,
186
+ index=Config.AVAILABLE_MODELS.index(Config.DEFAULT_MODEL),
187
+ format_func=lambda x: f"{x} - {Config.YOLO_MODELS[x]}"
188
+ )
189
+
190
+ # Display model capabilities
191
+ model_type = "Detection"
192
+ if "pose" in model_choice:
193
+ model_type = "Pose Estimation"
194
+ st.info("This model will detect and estimate human poses in the image/video.")
195
+ elif "seg" in model_choice:
196
+ model_type = "Instance Segmentation"
197
+ st.info("This model will perform instance segmentation, creating precise masks for detected objects.")
198
+ else:
199
+ st.info("This model will detect and classify objects with bounding boxes.")
200
+
201
+ # Initialize model and processors
202
+ model = YOLOModel(model_choice)
203
+ image_processor = ImageProcessor(model)
204
+ video_processor = VideoProcessor(model)
205
+
206
+ tabs = st.tabs(["Image Detection", "Video Detection"])
207
+
208
+ with tabs[0]:
209
+ st.header("Image Detection")
210
+ input_choice = st.radio("Select Input Method", ["Upload", "URL"])
211
+
212
+ if input_choice == "Upload":
213
+ uploaded_image = st.file_uploader(
214
+ "Upload Image",
215
+ type=Config.ALLOWED_IMAGE_TYPES
216
+ )
217
+ if uploaded_image is not None:
218
+ image = Image.open(uploaded_image)
219
+ result = image_processor.process_image(image)
220
+ if result.success:
221
+ st.image(result.image, caption="Processed Image", use_container_width=True)
222
+ else:
223
+ st.error(result.error_message)
224
+
225
+ elif input_choice == "URL":
226
+ image_url = st.text_input("Image URL")
227
+ if image_url:
228
+ result = image_processor.process_image(image_url)
229
+ if result.success:
230
+ st.image(result.image, caption="Processed Image", use_container_width=True)
231
+ else:
232
+ st.error(result.error_message)
233
+
234
+ with tabs[1]:
235
+ st.header("Video Detection")
236
+ video_choice = st.radio("Select Input Method", ["Upload", "YouTube"])
237
+
238
+ if video_choice == "Upload":
239
+ uploaded_video = st.file_uploader(
240
+ "Upload Local Video",
241
+ type=Config.ALLOWED_VIDEO_TYPES
242
+ )
243
+ if uploaded_video is not None:
244
+ input_video_path = os.path.join(Config.TEMP_DIR, uploaded_video.name)
245
+ with open(input_video_path, "wb") as f:
246
+ f.write(uploaded_video.read())
247
+
248
+ success, result = video_processor.process_video(input_video_path)
249
+ if success:
250
+ st.video(result)
251
+ else:
252
+ st.error(result)
253
+
254
+ elif video_choice == "YouTube":
255
+ video_url = st.text_input("YouTube Video URL")
256
+ if video_url:
257
+ input_video_path = download_youtube_video(video_url)
258
+ if input_video_path:
259
+ success, result = video_processor.process_video(input_video_path)
260
+ if success:
261
+ st.video(result)
262
+ else:
263
+ st.error(result)
264
+
265
+ if __name__ == "__main__":
266
+ main()
config.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+
3
+ class Config:
4
+ # Model configurations with descriptions
5
+ YOLO_MODELS = {
6
+ "yolov8n.pt": "YOLOv8 Nano - Fastest and smallest model, best for CPU/edge devices",
7
+ "yolov8s.pt": "YOLOv8 Small - Good balance of speed and accuracy",
8
+ "yolov8m.pt": "YOLOv8 Medium - Better accuracy, still reasonable speed",
9
+ "yolov8l.pt": "YOLOv8 Large - High accuracy, slower speed",
10
+ "yolov8x.pt": "YOLOv8 XLarge - Highest accuracy, slowest speed",
11
+ # Pose estimation models
12
+ "yolov8n-pose.pt": "YOLOv8 Nano Pose - Fast pose estimation",
13
+ "yolov8s-pose.pt": "YOLOv8 Small Pose - Balanced pose estimation",
14
+ "yolov8m-pose.pt": "YOLOv8 Medium Pose - Accurate pose estimation",
15
+ "yolov8l-pose.pt": "YOLOv8 Large Pose - High accuracy pose estimation",
16
+ "yolov8x-pose.pt": "YOLOv8 XLarge Pose - Most accurate pose estimation",
17
+ # Segmentation models
18
+ "yolov8n-seg.pt": "YOLOv8 Nano Segmentation - Fast instance segmentation",
19
+ "yolov8s-seg.pt": "YOLOv8 Small Segmentation - Balanced segmentation",
20
+ "yolov8m-seg.pt": "YOLOv8 Medium Segmentation - Accurate segmentation",
21
+ "yolov8l-seg.pt": "YOLOv8 Large Segmentation - High accuracy segmentation",
22
+ "yolov8x-seg.pt": "YOLOv8 XLarge Segmentation - Most accurate segmentation"
23
+ }
24
+
25
+ AVAILABLE_MODELS: List[str] = list(YOLO_MODELS.keys())
26
+ DEFAULT_MODEL: str = "yolov8s.pt"
27
+
28
+ # File configurations
29
+ ALLOWED_IMAGE_TYPES: List[str] = ["jpg", "jpeg", "png"]
30
+ ALLOWED_VIDEO_TYPES: List[str] = ["mp4", "mov", "avi"]
31
+
32
+ # Video processing
33
+ TEMP_DIR: str = "temp"
34
+ VIDEO_OUTPUT_FORMAT: str = "mp4v"
35
+
36
+ # UI configurations
37
+ CONFIDENCE_THRESHOLD: float = 0.25 # Lowered for better detection
38
+ BBOX_COLOR: tuple = (0, 255, 0)
39
+ FONT_SCALE: float = 0.5
40
+ FONT_THICKNESS: int = 2
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ opencv-python>=4.7.0
2
+ pillow>=9.5.0
3
+ requests>=2.31.0
4
+ numpy>=1.24.3
5
+ ultralytics>=8.0.0
6
+ streamlit>=1.24.0
7
+ yt-dlp>=2023.3.4