Spaces:

AffordableAI
/

Real_Time_Safety_Monitoring

Sleeping

App Files Files Community

capradeepgujaran commited on Oct 23, 2024

Commit

18cd948

verified ·

1 Parent(s): b122109

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -125

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from PIL import Image as PILImage
 import io
 import os
 import base64
 def create_monitor_interface():
     api_key = os.getenv("GROQ_API_KEY")
@@ -15,14 +16,27 @@ def create_monitor_interface():
         def __init__(self):
             self.client = Groq()
             self.model_name = "llama-3.2-90b-vision-preview"
-            self.max_image_size = (800, 800)
-            self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
         def analyze_frame(self, frame: np.ndarray) -> str:
             if frame is None:
-                return ""
-            # Convert image
             if len(frame.shape) == 2:
                 frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
             elif len(frame.shape) == 3 and frame.shape[2] == 4:
@@ -31,11 +45,12 @@ def create_monitor_interface():
             frame = self.resize_image(frame)
             frame_pil = PILImage.fromarray(frame)
             buffered = io.BytesIO()
             frame_pil.save(buffered,
-                          format="JPEG",
-                          quality=95,  # Increased quality
-                          optimize=True)
             img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
             image_url = f"data:image/jpeg;base64,{img_base64}"
@@ -43,36 +58,14 @@ def create_monitor_interface():
                 completion = self.client.chat.completions.create(
                     model=self.model_name,
                     messages=[
-                        {
-                            "role": "system",
-                            "content": """You are a construction site safety expert specializing in ergonomics and workplace safety.
-                            Analyze images for:
-                            1. Worker posture and ergonomic risks
-                            2. PPE usage and compliance
-                            3. Tool and equipment safety
-                            4. Environmental hazards
-                            5. Working position and technique"""
-                        },
                         {
                             "role": "user",
                             "content": [
                                 {
                                     "type": "text",
-                                    "text": """Carefully analyze this construction worker's position and environment. Look for:
-        1. Ergonomic issues (kneeling position, back posture, repetitive motions)
-        2. PPE compliance (knee pads, gloves, appropriate footwear)
-        3. Working technique and body mechanics
-        4. Surrounding hazards or risks
-        For each issue identified, format your response as:
-        - <location>position:specific safety concern and recommendation</location>
-        For example:
-        - <location>center:Worker kneeling without knee protection, risking joint injury. Recommend knee pads.</location>
-        - <location>bottom:Improper back posture while working, potential for strain. Should maintain straight back.</location>
-        Be specific about each safety concern you observe."""
                                 },
                                 {
                                     "type": "image_url",
@@ -81,127 +74,78 @@ def create_monitor_interface():
                                     }
                                 }
                             ]
                         }
                     ],
-                    temperature=0.3,  # Lowered temperature for more focused analysis
-                    max_tokens=500,
-                    stream=False
                 )
-                response = completion.choices[0].message.content
-                print(f"Raw response: {response}")  # For debugging
-                return response
             except Exception as e:
-                print(f"Analysis error: {str(e)}")
-                return ""
-        def resize_image(self, image):
-            height, width = image.shape[:2]
-            if height > self.max_image_size[1] or width > self.max_image_size[0]:
-                aspect = width / height
-                if width > height:
-                    new_width = self.max_image_size[0]
-                    new_height = int(new_width / aspect)
-                else:
-                    new_height = self.max_image_size[1]
-                    new_width = int(new_height * aspect)
-                return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
-            return image
-        def get_region_coordinates(self, position: str, image_shape: tuple) -> tuple:
-            height, width = image_shape[:2]
-            regions = {
-                'top-left': (0, 0, width//3, height//3),
-                'top': (width//3, 0, 2*width//3, height//3),
-                'top-right': (2*width//3, 0, width, height//3),
-                'left': (0, height//3, width//3, 2*height//3),
-                'center': (width//3, height//3, 2*width//3, 2*height//3),
-                'right': (2*width//3, height//3, width, 2*height//3),
-                'bottom-left': (0, 2*height//3, width//3, height),
-                'bottom': (width//3, 2*height//3, 2*width//3, height),
-                'bottom-right': (2*width//3, 2*height//3, width, height)
-            }
-            # Try to match the position with regions
-            matched_region = None
-            max_match_length = 0
-            position_lower = position.lower()
-            for region_name in regions:
-                if region_name in position_lower:
-                    if len(region_name) > max_match_length:
-                        matched_region = region_name
-                        max_match_length = len(region_name)
-            if matched_region:
-                return regions[matched_region]
-            return regions['center']
         def draw_observations(self, image, observations):
             height, width = image.shape[:2]
             font = cv2.FONT_HERSHEY_SIMPLEX
-            font_scale = 0.6
             thickness = 2
             for idx, obs in enumerate(observations):
                 color = self.colors[idx % len(self.colors)]
-                parts = obs.split(':')
-                if len(parts) >= 2:
-                    position = parts[0]
-                    description = ':'.join(parts[1:])
-                    x1, y1, x2, y2 = self.get_region_coordinates(position, image.shape)
-                    # Draw rectangle
-                    cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
-                    # Add label with background
-                    label = description[:50] + "..." if len(description) > 50 else description
-                    label_size = cv2.getTextSize(label, font, font_scale, thickness)[0]
-                    label_x = max(0, min(x1, width - label_size[0]))
-                    label_y = max(20, y1 - 5)
-                    cv2.rectangle(image, (label_x, label_y - 20),
-                                (label_x + label_size[0], label_y), color, -1)
-                    cv2.putText(image, label, (label_x, label_y - 5),
-                              font, font_scale, (255, 255, 255), thickness)
             return image
         def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
             if frame is None:
                 return None, "No image provided"
             analysis = self.analyze_frame(frame)
-            print(f"Analysis received: {analysis}")  # Debug print
             observations = []
             for line in analysis.split('\n'):
                 line = line.strip()
                 if line.startswith('-'):
                     if '<location>' in line and '</location>' in line:
                         start = line.find('<location>') + len('<location>')
                         end = line.find('</location>')
-                        observation = line[start:end].strip()
-                        if observation and ':' in observation:
-                            observations.append(observation)
-            print(f"Parsed observations: {observations}")  # Debug print
-            display_frame = frame.copy()
-            if observations:
-                annotated_frame = self.draw_observations(display_frame, observations)
-                return annotated_frame, analysis
-            # If no observations were found but we got some analysis
-            if analysis and not analysis.isspace():
-                return display_frame, analysis
-            return display_frame, "Please try again - no safety analysis was generated."
     monitor = SafetyMonitor()
     with gr.Blocks() as demo:
@@ -209,9 +153,9 @@ def create_monitor_interface():
         with gr.Row():
             input_image = gr.Image(label="Upload Image")
-            output_image = gr.Image(label="Analysis Results")
-        analysis_text = gr.Textbox(label="Safety Analysis", lines=5)
         def analyze_image(image):
             if image is None:

 import io
 import os
 import base64
+import random
 def create_monitor_interface():
     api_key = os.getenv("GROQ_API_KEY")
         def __init__(self):
             self.client = Groq()
             self.model_name = "llama-3.2-90b-vision-preview"
+            self.max_image_size = (640, 640)  # Increased size for better visibility
+            self.colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]
+        def resize_image(self, image):
+            height, width = image.shape[:2]
+            aspect = width / height
+            if width > height:
+                new_width = min(self.max_image_size[0], width)
+                new_height = int(new_width / aspect)
+            else:
+                new_height = min(self.max_image_size[1], height)
+                new_width = int(new_height * aspect)
+            return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
         def analyze_frame(self, frame: np.ndarray) -> str:
             if frame is None:
+                return "No frame received"
+            # Convert and resize image
             if len(frame.shape) == 2:
                 frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
             elif len(frame.shape) == 3 and frame.shape[2] == 4:
             frame = self.resize_image(frame)
             frame_pil = PILImage.fromarray(frame)
+            # Convert to base64 with minimal quality
             buffered = io.BytesIO()
             frame_pil.save(buffered,
+                         format="JPEG",
+                         quality=30,
+                         optimize=True)
             img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
             image_url = f"data:image/jpeg;base64,{img_base64}"
                 completion = self.client.chat.completions.create(
                     model=self.model_name,
                     messages=[
                         {
                             "role": "user",
                             "content": [
                                 {
                                     "type": "text",
+                                    "text": """Analyze this workplace image and describe each safety concern in this format:
+                                    - <location>Description</location>
+                                    Use one line per issue, starting with a dash and location in tags."""
                                 },
                                 {
                                     "type": "image_url",
                                     }
                                 }
                             ]
+                        },
+                        {
+                            "role": "assistant",
+                            "content": ""
                         }
                     ],
+                    temperature=0.1,
+                    max_tokens=150,
+                    top_p=1,
+                    stream=False,
+                    stop=None
                 )
+                return completion.choices[0].message.content
             except Exception as e:
+                print(f"Detailed error: {str(e)}")
+                return f"Analysis Error: {str(e)}"
         def draw_observations(self, image, observations):
             height, width = image.shape[:2]
             font = cv2.FONT_HERSHEY_SIMPLEX
+            font_scale = 0.5
             thickness = 2
+            # Generate random positions for each observation
             for idx, obs in enumerate(observations):
                 color = self.colors[idx % len(self.colors)]
+                # Generate random box position
+                box_width = width // 3
+                box_height = height // 3
+                x = random.randint(0, width - box_width)
+                y = random.randint(0, height - box_height)
+                # Draw rectangle
+                cv2.rectangle(image, (x, y), (x + box_width, y + box_height), color, 2)
+                # Add label with background
+                label = obs[:40] + "..." if len(obs) > 40 else obs
+                label_size = cv2.getTextSize(label, font, font_scale, thickness)[0]
+                cv2.rectangle(image, (x, y - 20), (x + label_size[0], y), color, -1)
+                cv2.putText(image, label, (x, y - 5), font, font_scale, (255, 255, 255), thickness)
             return image
         def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
             if frame is None:
                 return None, "No image provided"
             analysis = self.analyze_frame(frame)
+            display_frame = self.resize_image(frame.copy())
+            # Parse observations from the analysis
             observations = []
             for line in analysis.split('\n'):
                 line = line.strip()
                 if line.startswith('-'):
+                    # Extract text between <location> tags if present
                     if '<location>' in line and '</location>' in line:
                         start = line.find('<location>') + len('<location>')
                         end = line.find('</location>')
+                        observation = line[end + len('</location>'):].strip()
+                    else:
+                        observation = line[1:].strip()  # Remove the dash
+                    if observation:
+                        observations.append(observation)
+            # Draw observations on the image
+            annotated_frame = self.draw_observations(display_frame, observations)
+            return annotated_frame, analysis
+    # Create the main interface
     monitor = SafetyMonitor()
     with gr.Blocks() as demo:
         with gr.Row():
             input_image = gr.Image(label="Upload Image")
+            output_image = gr.Image(label="Annotated Results")
+        analysis_text = gr.Textbox(label="Detailed Analysis", lines=5)
         def analyze_image(image):
             if image is None: