Spaces:

ddriscoll
/

SOC3242-01_Group_3_Interactive

Sleeping

David Driscoll

Constant interface

5f27df7 5 months ago

9.82 kB

	import gradio as gr
	import cv2
	import numpy as np
	import torch
	from torchvision import models, transforms
	from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
	from PIL import Image
	import mediapipe as mp
	from fer import FER # Facial emotion recognition

	# -----------------------------
	# Constants
	# -----------------------------
	SKIP_RATE = 5 # Run heavy detection every 5 frames

	# -----------------------------
	# Initialize Models and Helpers
	# -----------------------------

	# MediaPipe Pose for posture analysis
	mp_pose = mp.solutions.pose
	pose = mp_pose.Pose()
	mp_drawing = mp.solutions.drawing_utils

	# MediaPipe Face Detection for face detection
	mp_face_detection = mp.solutions.face_detection
	face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)

	# Object Detection Model: Faster R-CNN (pretrained on COCO)
	object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
	weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
	)
	object_detection_model.eval()
	obj_transform = transforms.Compose([transforms.ToTensor()])

	# Facial Emotion Detection using FER (requires TensorFlow)
	emotion_detector = FER(mtcnn=True)

	# -----------------------------
	# Define Analysis Functions with Frame Skipping
	# -----------------------------

	def analyze_posture(image):
	"""
	Processes an image from the webcam with MediaPipe Pose.
	Runs heavy detection every SKIP_RATE frames; otherwise, returns last result.
	"""
	if not hasattr(analyze_posture, "counter"):
	analyze_posture.counter = 0
	analyze_posture.last_output = None
	analyze_posture.counter += 1

	# If first frame or time to run detection:
	if analyze_posture.counter % SKIP_RATE == 0 or analyze_posture.last_output is None:
	# Convert from PIL (RGB) to OpenCV BGR format
	frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
	output_frame = frame.copy()
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	posture_result = "No posture detected"
	pose_results = pose.process(frame_rgb)
	if pose_results.pose_landmarks:
	posture_result = "Posture detected"
	mp_drawing.draw_landmarks(
	output_frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
	mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
	mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
	)

	annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
	result = (annotated_image, f"Posture Analysis: {posture_result}")
	analyze_posture.last_output = result
	return result
	else:
	# For frames in between, return last result
	return analyze_posture.last_output

	def analyze_emotion(image):
	"""
	Uses FER to detect facial emotions from the webcam image.
	Runs heavy detection every SKIP_RATE frames.
	"""
	if not hasattr(analyze_emotion, "counter"):
	analyze_emotion.counter = 0
	analyze_emotion.last_output = None
	analyze_emotion.counter += 1

	if analyze_emotion.counter % SKIP_RATE == 0 or analyze_emotion.last_output is None:
	frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	emotions = emotion_detector.detect_emotions(frame_rgb)
	if emotions:
	top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
	emotion_text = f"{top_emotion} ({score:.2f})"
	else:
	emotion_text = "No face detected for emotion analysis"
	annotated_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	result = (annotated_image, f"Emotion Analysis: {emotion_text}")
	analyze_emotion.last_output = result
	return result
	else:
	return analyze_emotion.last_output

	def analyze_objects(image):
	"""
	Uses Faster R-CNN to detect objects in the webcam image.
	Heavy detection is run every SKIP_RATE frames.
	"""
	if not hasattr(analyze_objects, "counter"):
	analyze_objects.counter = 0
	analyze_objects.last_output = None
	analyze_objects.counter += 1

	if analyze_objects.counter % SKIP_RATE == 0 or analyze_objects.last_output is None:
	frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
	output_frame = frame.copy()
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	image_pil = Image.fromarray(frame_rgb)
	img_tensor = obj_transform(image_pil)

	with torch.no_grad():
	detections = object_detection_model([img_tensor])[0]

	threshold = 0.8
	detected_boxes = detections["boxes"][detections["scores"] > threshold]
	for box in detected_boxes:
	box = box.int().cpu().numpy()
	cv2.rectangle(output_frame, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 2)

	object_result = f"Detected {len(detected_boxes)} object(s)" if len(detected_boxes) else "No objects detected"
	annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
	result = (annotated_image, f"Object Detection: {object_result}")
	analyze_objects.last_output = result
	return result
	else:
	return analyze_objects.last_output

	def analyze_faces(image):
	"""
	Uses MediaPipe to detect faces in the webcam image.
	Runs heavy detection every SKIP_RATE frames.
	"""
	if not hasattr(analyze_faces, "counter"):
	analyze_faces.counter = 0
	analyze_faces.last_output = None
	analyze_faces.counter += 1

	if analyze_faces.counter % SKIP_RATE == 0 or analyze_faces.last_output is None:
	frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
	output_frame = frame.copy()
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	face_results = face_detection.process(frame_rgb)

	face_result = "No faces detected"
	if face_results.detections:
	face_result = f"Detected {len(face_results.detections)} face(s)"
	h, w, _ = output_frame.shape
	for detection in face_results.detections:
	bbox = detection.location_data.relative_bounding_box
	x = int(bbox.xmin * w)
	y = int(bbox.ymin * h)
	box_w = int(bbox.width * w)
	box_h = int(bbox.height * h)
	cv2.rectangle(output_frame, (x, y), (x + box_w, y + box_h), (0, 0, 255), 2)

	annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
	result = (annotated_image, f"Face Detection: {face_result}")
	analyze_faces.last_output = result
	return result
	else:
	return analyze_faces.last_output

	# -----------------------------
	# Custom CSS for a High-Tech Look (with white fonts)
	# -----------------------------
	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
	body {
	background-color: #0e0e0e;
	color: #ffffff;
	font-family: 'Orbitron', sans-serif;
	margin: 0;
	padding: 0;
	}
	.gradio-container {
	background: linear-gradient(135deg, #1e1e2f, #3e3e55);
	border-radius: 10px;
	padding: 20px;
	max-width: 1200px;
	margin: auto;
	}
	.gradio-title {
	font-size: 2.5em;
	color: #ffffff;
	text-align: center;
	margin-bottom: 0.2em;
	}
	.gradio-description {
	font-size: 1.2em;
	text-align: center;
	margin-bottom: 1em;
	color: #ffffff;
	}
	"""

	# -----------------------------
	# Create Individual Interfaces for Each Analysis (using real-time webcam input)
	# -----------------------------
	posture_interface = gr.Interface(
	fn=analyze_posture,
	inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Posture"),
	outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Posture Analysis")],
	title="Posture Analysis",
	description="Detects your posture using MediaPipe.",
	live=True
	)

	emotion_interface = gr.Interface(
	fn=analyze_emotion,
	inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Face"),
	outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Emotion Analysis")],
	title="Emotion Analysis",
	description="Detects facial emotions using FER.",
	live=True
	)

	objects_interface = gr.Interface(
	fn=analyze_objects,
	inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture the Scene"),
	outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Object Detection")],
	title="Object Detection",
	description="Detects objects using a pretrained Faster R-CNN.",
	live=True
	)

	faces_interface = gr.Interface(
	fn=analyze_faces,
	inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Face"),
	outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Face Detection")],
	title="Face Detection",
	description="Detects faces using MediaPipe.",
	live=True
	)

	# -----------------------------
	# Create a Tabbed Interface for All Analyses
	# -----------------------------
	tabbed_interface = gr.TabbedInterface(
	interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface],
	tab_names=["Posture", "Emotion", "Objects", "Faces"]
	)

	# -----------------------------
	# Wrap Everything in a Blocks Layout with Custom CSS
	# -----------------------------
	demo = gr.Blocks(css=custom_css)
	with demo:
	gr.Markdown("<h1 class='gradio-title'>Real-Time Multi-Analysis App</h1>")
	gr.Markdown("<p class='gradio-description'>Experience a high-tech cinematic interface for real-time analysis of your posture, emotions, objects, and faces using your webcam.</p>")
	tabbed_interface.render()

	if __name__ == "__main__":
	demo.launch()