David Driscoll commited on
Commit
5f27df7
·
1 Parent(s): e421b40

Constant interface

Browse files
Files changed (1) hide show
  1. app.py +121 -75
app.py CHANGED
@@ -8,6 +8,11 @@ from PIL import Image
8
  import mediapipe as mp
9
  from fer import FER # Facial emotion recognition
10
 
 
 
 
 
 
11
  # -----------------------------
12
  # Initialize Models and Helpers
13
  # -----------------------------
@@ -32,100 +37,141 @@ obj_transform = transforms.Compose([transforms.ToTensor()])
32
  emotion_detector = FER(mtcnn=True)
33
 
34
  # -----------------------------
35
- # Define Analysis Functions
36
  # -----------------------------
37
 
38
  def analyze_posture(image):
39
  """
40
- Processes an image captured from the webcam with MediaPipe Pose,
41
- draws pose landmarks, and returns an annotated image and a text summary.
42
  """
43
- # Convert from PIL (RGB) to OpenCV BGR format
44
- frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
45
- output_frame = frame.copy()
46
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
47
-
48
- posture_result = "No posture detected"
49
- pose_results = pose.process(frame_rgb)
50
- if pose_results.pose_landmarks:
51
- posture_result = "Posture detected"
52
- mp_drawing.draw_landmarks(
53
- output_frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
54
- mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
55
- mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
56
- )
57
-
58
- annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
59
- return annotated_image, f"Posture Analysis: {posture_result}"
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  def analyze_emotion(image):
62
  """
63
- Uses FER to detect facial emotions from the captured image.
64
- Returns the image and a text summary.
65
  """
66
- frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
67
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
68
- emotions = emotion_detector.detect_emotions(frame_rgb)
69
- if emotions:
70
- top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
71
- emotion_text = f"{top_emotion} ({score:.2f})"
 
 
 
 
 
 
 
 
 
 
 
 
72
  else:
73
- emotion_text = "No face detected for emotion analysis"
74
-
75
- annotated_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
76
- return annotated_image, f"Emotion Analysis: {emotion_text}"
77
 
78
  def analyze_objects(image):
79
  """
80
- Uses a pretrained Faster R-CNN to detect objects in the image.
81
- Returns an annotated image with bounding boxes and a text summary.
82
  """
83
- frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
84
- output_frame = frame.copy()
85
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
86
- image_pil = Image.fromarray(frame_rgb)
87
- img_tensor = obj_transform(image_pil)
88
-
89
- with torch.no_grad():
90
- detections = object_detection_model([img_tensor])[0]
91
-
92
- threshold = 0.8
93
- detected_boxes = detections["boxes"][detections["scores"] > threshold]
94
- for box in detected_boxes:
95
- box = box.int().cpu().numpy()
96
- cv2.rectangle(output_frame, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 2)
97
-
98
- object_result = f"Detected {len(detected_boxes)} object(s)" if len(detected_boxes) else "No objects detected"
99
- annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
100
- return annotated_image, f"Object Detection: {object_result}"
 
 
 
 
 
 
 
 
 
 
101
 
102
  def analyze_faces(image):
103
  """
104
- Uses MediaPipe face detection to identify faces in the image.
105
- Returns an annotated image with bounding boxes and a text summary.
106
  """
107
- frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
108
- output_frame = frame.copy()
109
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
110
- face_results = face_detection.process(frame_rgb)
111
-
112
- face_result = "No faces detected"
113
- if face_results.detections:
114
- face_result = f"Detected {len(face_results.detections)} face(s)"
115
- h, w, _ = output_frame.shape
116
- for detection in face_results.detections:
117
- bbox = detection.location_data.relative_bounding_box
118
- x = int(bbox.xmin * w)
119
- y = int(bbox.ymin * h)
120
- box_w = int(bbox.width * w)
121
- box_h = int(bbox.height * h)
122
- cv2.rectangle(output_frame, (x, y), (x + box_w, y + box_h), (0, 0, 255), 2)
123
-
124
- annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
125
- return annotated_image, f"Face Detection: {face_result}"
 
 
 
 
 
 
 
 
 
 
126
 
127
  # -----------------------------
128
- # Custom CSS for a High-Tech Look
129
  # -----------------------------
130
  custom_css = """
131
  @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
@@ -158,7 +204,7 @@ body {
158
  """
159
 
160
  # -----------------------------
161
- # Create Individual Interfaces for Each Analysis
162
  # -----------------------------
163
  posture_interface = gr.Interface(
164
  fn=analyze_posture,
 
8
  import mediapipe as mp
9
  from fer import FER # Facial emotion recognition
10
 
11
+ # -----------------------------
12
+ # Constants
13
+ # -----------------------------
14
+ SKIP_RATE = 5 # Run heavy detection every 5 frames
15
+
16
  # -----------------------------
17
  # Initialize Models and Helpers
18
  # -----------------------------
 
37
  emotion_detector = FER(mtcnn=True)
38
 
39
  # -----------------------------
40
+ # Define Analysis Functions with Frame Skipping
41
  # -----------------------------
42
 
43
  def analyze_posture(image):
44
  """
45
+ Processes an image from the webcam with MediaPipe Pose.
46
+ Runs heavy detection every SKIP_RATE frames; otherwise, returns last result.
47
  """
48
+ if not hasattr(analyze_posture, "counter"):
49
+ analyze_posture.counter = 0
50
+ analyze_posture.last_output = None
51
+ analyze_posture.counter += 1
52
+
53
+ # If first frame or time to run detection:
54
+ if analyze_posture.counter % SKIP_RATE == 0 or analyze_posture.last_output is None:
55
+ # Convert from PIL (RGB) to OpenCV BGR format
56
+ frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
57
+ output_frame = frame.copy()
58
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
59
+
60
+ posture_result = "No posture detected"
61
+ pose_results = pose.process(frame_rgb)
62
+ if pose_results.pose_landmarks:
63
+ posture_result = "Posture detected"
64
+ mp_drawing.draw_landmarks(
65
+ output_frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
66
+ mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
67
+ mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
68
+ )
69
+
70
+ annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
71
+ result = (annotated_image, f"Posture Analysis: {posture_result}")
72
+ analyze_posture.last_output = result
73
+ return result
74
+ else:
75
+ # For frames in between, return last result
76
+ return analyze_posture.last_output
77
 
78
  def analyze_emotion(image):
79
  """
80
+ Uses FER to detect facial emotions from the webcam image.
81
+ Runs heavy detection every SKIP_RATE frames.
82
  """
83
+ if not hasattr(analyze_emotion, "counter"):
84
+ analyze_emotion.counter = 0
85
+ analyze_emotion.last_output = None
86
+ analyze_emotion.counter += 1
87
+
88
+ if analyze_emotion.counter % SKIP_RATE == 0 or analyze_emotion.last_output is None:
89
+ frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
90
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
91
+ emotions = emotion_detector.detect_emotions(frame_rgb)
92
+ if emotions:
93
+ top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
94
+ emotion_text = f"{top_emotion} ({score:.2f})"
95
+ else:
96
+ emotion_text = "No face detected for emotion analysis"
97
+ annotated_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
98
+ result = (annotated_image, f"Emotion Analysis: {emotion_text}")
99
+ analyze_emotion.last_output = result
100
+ return result
101
  else:
102
+ return analyze_emotion.last_output
 
 
 
103
 
104
  def analyze_objects(image):
105
  """
106
+ Uses Faster R-CNN to detect objects in the webcam image.
107
+ Heavy detection is run every SKIP_RATE frames.
108
  """
109
+ if not hasattr(analyze_objects, "counter"):
110
+ analyze_objects.counter = 0
111
+ analyze_objects.last_output = None
112
+ analyze_objects.counter += 1
113
+
114
+ if analyze_objects.counter % SKIP_RATE == 0 or analyze_objects.last_output is None:
115
+ frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
116
+ output_frame = frame.copy()
117
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
118
+ image_pil = Image.fromarray(frame_rgb)
119
+ img_tensor = obj_transform(image_pil)
120
+
121
+ with torch.no_grad():
122
+ detections = object_detection_model([img_tensor])[0]
123
+
124
+ threshold = 0.8
125
+ detected_boxes = detections["boxes"][detections["scores"] > threshold]
126
+ for box in detected_boxes:
127
+ box = box.int().cpu().numpy()
128
+ cv2.rectangle(output_frame, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 2)
129
+
130
+ object_result = f"Detected {len(detected_boxes)} object(s)" if len(detected_boxes) else "No objects detected"
131
+ annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
132
+ result = (annotated_image, f"Object Detection: {object_result}")
133
+ analyze_objects.last_output = result
134
+ return result
135
+ else:
136
+ return analyze_objects.last_output
137
 
138
  def analyze_faces(image):
139
  """
140
+ Uses MediaPipe to detect faces in the webcam image.
141
+ Runs heavy detection every SKIP_RATE frames.
142
  """
143
+ if not hasattr(analyze_faces, "counter"):
144
+ analyze_faces.counter = 0
145
+ analyze_faces.last_output = None
146
+ analyze_faces.counter += 1
147
+
148
+ if analyze_faces.counter % SKIP_RATE == 0 or analyze_faces.last_output is None:
149
+ frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
150
+ output_frame = frame.copy()
151
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
152
+ face_results = face_detection.process(frame_rgb)
153
+
154
+ face_result = "No faces detected"
155
+ if face_results.detections:
156
+ face_result = f"Detected {len(face_results.detections)} face(s)"
157
+ h, w, _ = output_frame.shape
158
+ for detection in face_results.detections:
159
+ bbox = detection.location_data.relative_bounding_box
160
+ x = int(bbox.xmin * w)
161
+ y = int(bbox.ymin * h)
162
+ box_w = int(bbox.width * w)
163
+ box_h = int(bbox.height * h)
164
+ cv2.rectangle(output_frame, (x, y), (x + box_w, y + box_h), (0, 0, 255), 2)
165
+
166
+ annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
167
+ result = (annotated_image, f"Face Detection: {face_result}")
168
+ analyze_faces.last_output = result
169
+ return result
170
+ else:
171
+ return analyze_faces.last_output
172
 
173
  # -----------------------------
174
+ # Custom CSS for a High-Tech Look (with white fonts)
175
  # -----------------------------
176
  custom_css = """
177
  @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
 
204
  """
205
 
206
  # -----------------------------
207
+ # Create Individual Interfaces for Each Analysis (using real-time webcam input)
208
  # -----------------------------
209
  posture_interface = gr.Interface(
210
  fn=analyze_posture,