Spaces:

AffordableAI
/

Real_Time_Safety_Monitoring

Sleeping

App Files Files Community

capradeepgujaran commited on Oct 23, 2024

Commit

aca1712

verified ·

1 Parent(s): 7f51b6d

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -90

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ import io
 import os
 import base64
 class SafetyMonitor:
     def __init__(self):
         """Initialize Safety Monitor with configuration."""
@@ -15,6 +17,9 @@ class SafetyMonitor:
         self.model_name = "llama-3.2-90b-vision-preview"
         self.max_image_size = (800, 800)
         self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
     def preprocess_image(self, frame):
         """Process image for analysis."""
@@ -39,13 +44,13 @@ class SafetyMonitor:
             return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
         return image
-    def encode_image(self, frame):
-        """Convert image to base64 encoding."""
-        frame_pil = PILImage.fromarray(frame)
-        buffered = io.BytesIO()
-        frame_pil.save(buffered, format="JPEG", quality=95)
-        img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-        return f"data:image/jpeg;base64,{img_base64}"
     def analyze_frame(self, frame):
         """Perform safety analysis on the frame."""
@@ -54,7 +59,7 @@ class SafetyMonitor:
         frame = self.preprocess_image(frame)
         image_url = self.encode_image(frame)
         try:
             completion = self.client.chat.completions.create(
                 model=self.model_name,
@@ -84,66 +89,22 @@ class SafetyMonitor:
             print(f"Analysis error: {str(e)}")
             return f"Analysis Error: {str(e)}", {}
-    def get_region_coordinates(self, position, image_shape):
-        """Convert textual position to coordinates."""
-        height, width = image_shape[:2]
-        # Define regions
-        regions = {
-            'center': (width//3, height//3, 2*width//3, 2*height//3),
-            'top': (width//3, 0, 2*width//3, height//3),
-            'bottom': (width//3, 2*height//3, 2*width//3, height),
-            'left': (0, height//3, width//3, 2*height//3),
-            'right': (2*width//3, height//3, width, 2*height//3),
-            'top-left': (0, 0, width//3, height//3),
-            'top-right': (2*width//3, 0, width, height//3),
-            'bottom-left': (0, 2*height//3, width//3, height),
-            'bottom-right': (2*width//3, 2*height//3, width, height),
-            'upper': (0, 0, width, height//2),
-            'lower': (0, height//2, width, height),
-            'middle': (0, height//3, width, 2*height//3)
-        }
-        # Ensure the region name from the model output matches one of our predefined regions
-        position = position.lower()
-        return regions.get(position, (0, 0, width, height))  # Default to full image if no match
-    def draw_observations(self, image, observations):
-        """Draw bounding boxes and labels for safety observations."""
-        height, width = image.shape[:2]
         font = cv2.FONT_HERSHEY_SIMPLEX
         font_scale = 0.5
         thickness = 2
-        padding = 10
-        for idx, obs in enumerate(observations):
             color = self.colors[idx % len(self.colors)]
-            # Get coordinates for this observation
-            x1, y1, x2, y2 = self.get_region_coordinates(obs['location'], image.shape)
-            print(f"Drawing box at coordinates: ({x1}, {y1}, {x2}, {y2}) for {obs['description']}")
-            # Draw rectangle
-            cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
-            # Add label with background
-            label = obs['description'][:50] + "..." if len(obs['description']) > 50 else obs['description']
-            label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
-            # Position text above the box
-            text_x = max(0, x1)
-            text_y = max(label_size[1] + padding, y1 - padding)
-            # Draw text background
-            cv2.rectangle(image,
-                         (text_x, text_y - label_size[1] - padding),
-                         (text_x + label_size[0] + padding, text_y),
-                         color, -1)
-            # Draw text
-            cv2.putText(image, label,
-                       (text_x + padding//2, text_y - padding//2),
-                       font, font_scale, (255, 255, 255), thickness)
         return image
@@ -153,35 +114,13 @@ class SafetyMonitor:
             return None, "No image provided"
         try:
-            # Get analysis
             analysis, _ = self.analyze_frame(frame)
-            print(f"Raw analysis: {analysis}")  # Debug print
-            display_frame = frame.copy()
-            # Parse observations
-            observations = []
-            for line in analysis.split('\n'):
-                line = line.strip()
-                if line.startswith('-') and '<location>' in line and '</location>' in line:
-                    start = line.find('<location>') + len('<location>')
-                    end = line.find('</location>')
-                    location_description = line[start:end].strip()
-                    if ':' in location_description:
-                        location, description = location_description.split(':', 1)
-                        observations.append({
-                            'location': location.strip(),
-                            'description': description.strip()
-                        })
-            print(f"Parsed observations: {observations}")  # Debug print
-            # Draw observations
-            if observations:
-                annotated_frame = self.draw_observations(display_frame, observations)
-                return annotated_frame, analysis
-            return display_frame, analysis
         except Exception as e:
             print(f"Processing error: {str(e)}")

 import os
 import base64
+import torch
 class SafetyMonitor:
     def __init__(self):
         """Initialize Safety Monitor with configuration."""
         self.model_name = "llama-3.2-90b-vision-preview"
         self.max_image_size = (800, 800)
         self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
+        # Load YOLOv5 model for object detection
+        self.yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
     def preprocess_image(self, frame):
         """Process image for analysis."""
             return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
         return image
+    def detect_objects(self, frame):
+        """Detect objects using YOLOv5."""
+        results = self.yolo_model(frame)
+        # Extract bounding boxes, class labels, and confidence scores
+        bbox_data = results.xyxy[0].numpy()  # Bounding box coordinates
+        labels = results.names  # Class names
+        return bbox_data, labels
     def analyze_frame(self, frame):
         """Perform safety analysis on the frame."""
         frame = self.preprocess_image(frame)
         image_url = self.encode_image(frame)
         try:
             completion = self.client.chat.completions.create(
                 model=self.model_name,
             print(f"Analysis error: {str(e)}")
             return f"Analysis Error: {str(e)}", {}
+    def draw_bounding_boxes(self, image, bboxes, labels):
+        """Draw bounding boxes around detected objects."""
         font = cv2.FONT_HERSHEY_SIMPLEX
         font_scale = 0.5
         thickness = 2
+        for idx, bbox in enumerate(bboxes):
+            x1, y1, x2, y2, conf, class_id = bbox
+            label = labels[int(class_id)]
             color = self.colors[idx % len(self.colors)]
+            # Draw bounding box
+            cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, thickness)
+            # Draw label
+            label_text = f"{label} {conf:.2f}"
+            cv2.putText(image, label_text, (int(x1), int(y1) - 10), font, font_scale, color, thickness)
         return image
             return None, "No image provided"
         try:
+            # Detect objects in the image using YOLO
+            bbox_data, labels = self.detect_objects(frame)
+            frame_with_boxes = self.draw_bounding_boxes(frame, bbox_data, labels)
+            # Get analysis from Groq's model
             analysis, _ = self.analyze_frame(frame)
+            return frame_with_boxes, analysis
         except Exception as e:
             print(f"Processing error: {str(e)}")