David Driscoll commited on
Commit
5148899
·
1 Parent(s): 4d52ef2

Update app

Browse files
Files changed (1) hide show
  1. app.py +128 -83
app.py CHANGED
@@ -28,44 +28,66 @@ object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
28
  object_detection_model.eval()
29
  obj_transform = transforms.Compose([transforms.ToTensor()])
30
 
31
- # Facial Emotion Detection using FER (this model will detect emotions from a face)
32
  emotion_detector = FER(mtcnn=True)
33
 
34
  # -----------------------------
35
  # Define Analysis Functions
36
  # -----------------------------
37
 
38
- def analyze_posture(frame_rgb, output_frame):
39
- """Runs pose estimation and draws landmarks on the frame."""
 
 
 
 
 
 
 
 
 
40
  pose_results = pose.process(frame_rgb)
41
- posture_text = "No posture detected"
42
  if pose_results.pose_landmarks:
43
- posture_text = "Posture detected"
44
- # Draw the pose landmarks on the output image
45
  mp_drawing.draw_landmarks(
46
  output_frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
47
  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
48
  mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
49
  )
50
- return posture_text
 
 
51
 
52
- def analyze_emotion(frame):
53
- """Detects emotion from faces using FER. Returns the dominant emotion."""
54
- # FER expects RGB images
 
 
 
 
55
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
56
  emotions = emotion_detector.detect_emotions(frame_rgb)
57
  if emotions:
58
- # Use the first detected face and its top emotion
59
  top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
60
  emotion_text = f"{top_emotion} ({score:.2f})"
61
  else:
62
  emotion_text = "No face detected for emotion analysis"
63
- return emotion_text
 
 
 
64
 
65
- def analyze_objects(frame_rgb, output_frame):
66
- """Performs object detection and draws bounding boxes for detections above a threshold."""
 
 
 
 
 
 
67
  image_pil = Image.fromarray(frame_rgb)
68
  img_tensor = obj_transform(image_pil)
 
69
  with torch.no_grad():
70
  detections = object_detection_model([img_tensor])[0]
71
 
@@ -74,15 +96,24 @@ def analyze_objects(frame_rgb, output_frame):
74
  for box in detected_boxes:
75
  box = box.int().cpu().numpy()
76
  cv2.rectangle(output_frame, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 2)
77
- object_text = f"Detected {len(detected_boxes)} object(s)" if len(detected_boxes) else "No objects detected"
78
- return object_text
 
 
79
 
80
- def analyze_faces(frame_rgb, output_frame):
81
- """Detects faces using MediaPipe and draws bounding boxes."""
 
 
 
 
 
 
82
  face_results = face_detection.process(frame_rgb)
83
- face_text = "No faces detected"
 
84
  if face_results.detections:
85
- face_text = f"Detected {len(face_results.detections)} face(s)"
86
  h, w, _ = output_frame.shape
87
  for detection in face_results.detections:
88
  bbox = detection.location_data.relative_bounding_box
@@ -91,77 +122,91 @@ def analyze_faces(frame_rgb, output_frame):
91
  box_w = int(bbox.width * w)
92
  box_h = int(bbox.height * h)
93
  cv2.rectangle(output_frame, (x, y), (x + box_w, y + box_h), (0, 0, 255), 2)
94
- return face_text
 
 
95
 
96
  # -----------------------------
97
- # Main Analysis Function
98
  # -----------------------------
 
 
99
 
100
- def analyze_webcam(video_path):
101
- """
102
- Receives a video file (captured from the webcam), extracts one frame,
103
- then runs posture analysis, facial emotion detection, object detection,
104
- and face detection on that frame.
105
- Returns an annotated image and a textual summary.
106
- """
107
- # Open the video file (the webcam stream is saved as a temporary file)
108
- cap = cv2.VideoCapture(video_path)
109
- success, frame = cap.read()
110
- cap.release()
111
-
112
- if not success:
113
- return None, "Could not read a frame from the video."
114
-
115
- # Create a copy for drawing annotations
116
- output_frame = frame.copy()
117
-
118
- # Convert frame to RGB for some analyses
119
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
120
-
121
- # Run analyses
122
- posture_result = analyze_posture(frame_rgb, output_frame)
123
- emotion_result = analyze_emotion(frame)
124
- object_result = analyze_objects(frame_rgb, output_frame)
125
- face_result = analyze_faces(frame_rgb, output_frame)
126
-
127
- # Compose the result summary text
128
- summary = (
129
- f"Posture Analysis: {posture_result}\n"
130
- f"Emotion Analysis: {emotion_result}\n"
131
- f"Object Detection: {object_result}\n"
132
- f"Face Detection: {face_result}"
133
- )
134
-
135
- # Optionally, overlay some summary text on the image
136
- cv2.putText(output_frame, f"Emotion: {emotion_result}", (10, 30),
137
- cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
138
- cv2.putText(output_frame, f"Objects: {object_result}", (10, 70),
139
- cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
140
- cv2.putText(output_frame, f"Faces: {face_result}", (10, 110),
141
- cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
142
-
143
- return output_frame, summary
144
 
145
  # -----------------------------
146
- # Gradio Interface Setup
147
  # -----------------------------
148
 
149
- # Note: In the current version of Gradio, the Video component does not accept a 'source' argument.
150
- # Remove the 'source' parameter. Streaming is still enabled.
151
- interface = gr.Interface(
152
- fn=analyze_webcam,
153
- inputs=gr.Video(streaming=True, label="Webcam Feed"),
154
- outputs=[
155
- gr.Image(type="numpy", label="Annotated Output"),
156
- gr.Textbox(label="Analysis Summary")
157
- ],
158
- title="Real-Time Multi-Analysis App",
159
- description=(
160
- "This app performs real-time posture analysis, facial emotion detection, "
161
- "object detection, and face detection using your webcam."
162
- ),
163
- live=True
164
  )
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  if __name__ == "__main__":
167
- interface.launch()
 
28
  object_detection_model.eval()
29
  obj_transform = transforms.Compose([transforms.ToTensor()])
30
 
31
+ # Facial Emotion Detection using FER (requires TensorFlow)
32
  emotion_detector = FER(mtcnn=True)
33
 
34
  # -----------------------------
35
  # Define Analysis Functions
36
  # -----------------------------
37
 
38
+ def analyze_posture(image):
39
+ """
40
+ Takes an image (captured via the webcam), processes it with MediaPipe Pose,
41
+ and returns an annotated image and a text summary.
42
+ """
43
+ # Convert from PIL (RGB) to OpenCV BGR format
44
+ frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
45
+ output_frame = frame.copy()
46
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
47
+
48
+ posture_result = "No posture detected"
49
  pose_results = pose.process(frame_rgb)
 
50
  if pose_results.pose_landmarks:
51
+ posture_result = "Posture detected"
 
52
  mp_drawing.draw_landmarks(
53
  output_frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
54
  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
55
  mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
56
  )
57
+
58
+ annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
59
+ return annotated_image, f"Posture Analysis: {posture_result}"
60
 
61
+ def analyze_emotion(image):
62
+ """
63
+ Uses FER to detect facial emotions from the captured image.
64
+ Returns the original image and a text summary.
65
+ """
66
+ frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
67
+ # FER expects an RGB image
68
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
69
  emotions = emotion_detector.detect_emotions(frame_rgb)
70
  if emotions:
 
71
  top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
72
  emotion_text = f"{top_emotion} ({score:.2f})"
73
  else:
74
  emotion_text = "No face detected for emotion analysis"
75
+
76
+ # For simplicity, we return the original image
77
+ annotated_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
78
+ return annotated_image, f"Emotion Analysis: {emotion_text}"
79
 
80
+ def analyze_objects(image):
81
+ """
82
+ Uses a pretrained Faster R-CNN to detect objects in the image.
83
+ Returns an annotated image with bounding boxes and a text summary.
84
+ """
85
+ frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
86
+ output_frame = frame.copy()
87
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
88
  image_pil = Image.fromarray(frame_rgb)
89
  img_tensor = obj_transform(image_pil)
90
+
91
  with torch.no_grad():
92
  detections = object_detection_model([img_tensor])[0]
93
 
 
96
  for box in detected_boxes:
97
  box = box.int().cpu().numpy()
98
  cv2.rectangle(output_frame, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 2)
99
+
100
+ object_result = f"Detected {len(detected_boxes)} object(s)" if len(detected_boxes) else "No objects detected"
101
+ annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
102
+ return annotated_image, f"Object Detection: {object_result}"
103
 
104
+ def analyze_faces(image):
105
+ """
106
+ Uses MediaPipe face detection to identify faces in the image.
107
+ Returns an annotated image with face bounding boxes and a text summary.
108
+ """
109
+ frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
110
+ output_frame = frame.copy()
111
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
112
  face_results = face_detection.process(frame_rgb)
113
+
114
+ face_result = "No faces detected"
115
  if face_results.detections:
116
+ face_result = f"Detected {len(face_results.detections)} face(s)"
117
  h, w, _ = output_frame.shape
118
  for detection in face_results.detections:
119
  bbox = detection.location_data.relative_bounding_box
 
122
  box_w = int(bbox.width * w)
123
  box_h = int(bbox.height * h)
124
  cv2.rectangle(output_frame, (x, y), (x + box_w, y + box_h), (0, 0, 255), 2)
125
+
126
+ annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
127
+ return annotated_image, f"Face Detection: {face_result}"
128
 
129
  # -----------------------------
130
+ # Custom CSS for a High-Tech Look
131
  # -----------------------------
132
+ custom_css = """
133
+ @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
134
 
135
+ body {
136
+ background-color: #0e0e0e;
137
+ color: #e0e0e0;
138
+ font-family: 'Orbitron', sans-serif;
139
+ }
140
+ .gradio-container {
141
+ background: linear-gradient(135deg, #1e1e2f, #3e3e55);
142
+ border-radius: 10px;
143
+ padding: 20px;
144
+ }
145
+ .gradio-title {
146
+ font-size: 2.5em;
147
+ color: #66fcf1;
148
+ text-align: center;
149
+ }
150
+ .gradio-description {
151
+ font-size: 1.2em;
152
+ text-align: center;
153
+ margin-bottom: 20px;
154
+ }
155
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  # -----------------------------
158
+ # Create Individual Interfaces for Each Analysis
159
  # -----------------------------
160
 
161
+ posture_interface = gr.Interface(
162
+ fn=analyze_posture,
163
+ inputs=gr.Camera(label="Capture Your Posture"),
164
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Posture Analysis")],
165
+ title="Posture Analysis",
166
+ description="Detects your posture using MediaPipe."
 
 
 
 
 
 
 
 
 
167
  )
168
 
169
+ emotion_interface = gr.Interface(
170
+ fn=analyze_emotion,
171
+ inputs=gr.Camera(label="Capture Your Face"),
172
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Emotion Analysis")],
173
+ title="Emotion Analysis",
174
+ description="Detects facial emotions using FER."
175
+ )
176
+
177
+ objects_interface = gr.Interface(
178
+ fn=analyze_objects,
179
+ inputs=gr.Camera(label="Capture the Scene"),
180
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Object Detection")],
181
+ title="Object Detection",
182
+ description="Detects objects using a pretrained Faster R-CNN."
183
+ )
184
+
185
+ faces_interface = gr.Interface(
186
+ fn=analyze_faces,
187
+ inputs=gr.Camera(label="Capture Your Face"),
188
+ outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Face Detection")],
189
+ title="Face Detection",
190
+ description="Detects faces using MediaPipe."
191
+ )
192
+
193
+ # -----------------------------
194
+ # Create a Tabbed Interface for All Analyses
195
+ # -----------------------------
196
+
197
+ tabbed_interface = gr.TabbedInterface(
198
+ interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface],
199
+ tab_names=["Posture", "Emotion", "Objects", "Faces"]
200
+ )
201
+
202
+ # -----------------------------
203
+ # Wrap Everything in a Blocks Layout with Custom CSS
204
+ # -----------------------------
205
+ demo = gr.Blocks(css=custom_css)
206
+ with demo:
207
+ gr.Markdown("<h1 class='gradio-title'>Real-Time Multi-Analysis App</h1>")
208
+ gr.Markdown("<p class='gradio-description'>Experience a high-tech, cinematic interface for real-time analysis of your posture, emotions, objects, and faces using your webcam.</p>")
209
+ demo_tab = tabbed_interface
210
+
211
  if __name__ == "__main__":
212
+ demo.launch()