|
import gradio as gr |
|
import cv2 |
|
import numpy as np |
|
import torch |
|
from PIL import Image |
|
import mediapipe as mp |
|
|
|
from transformers import ( |
|
AutoFeatureExtractor, |
|
AutoModel, |
|
AutoImageProcessor, |
|
AutoModelForImageClassification, |
|
AutoModelForSemanticSegmentation |
|
) |
|
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
DESIRED_SIZE = (640, 480) |
|
|
|
|
|
|
|
|
|
mp_face_detection = mp.solutions.face_detection |
|
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5) |
|
|
|
|
|
|
|
|
|
|
|
|
|
facial_recognition_extractor = AutoFeatureExtractor.from_pretrained("facebook/dino-vitb16") |
|
facial_recognition_model = AutoModel.from_pretrained("facebook/dino-vitb16") |
|
facial_recognition_model.to(device) |
|
facial_recognition_model.eval() |
|
|
|
|
|
dummy_database = { |
|
"Alice": torch.randn(768).to(device), |
|
"Bob": torch.randn(768).to(device) |
|
} |
|
|
|
|
|
emotion_processor = AutoImageProcessor.from_pretrained("nateraw/facial-expression-recognition") |
|
emotion_model = AutoModelForImageClassification.from_pretrained("nateraw/facial-expression-recognition") |
|
emotion_model.to(device) |
|
emotion_model.eval() |
|
|
|
|
|
age_gender_processor = AutoImageProcessor.from_pretrained("oayu/age-gender-estimation") |
|
age_gender_model = AutoModelForImageClassification.from_pretrained("oayu/age-gender-estimation") |
|
age_gender_model.to(device) |
|
age_gender_model.eval() |
|
|
|
|
|
face_parsing_processor = AutoImageProcessor.from_pretrained("hila-chefer/face-parsing") |
|
face_parsing_model = AutoModelForSemanticSegmentation.from_pretrained("hila-chefer/face-parsing") |
|
face_parsing_model.to(device) |
|
face_parsing_model.eval() |
|
|
|
|
|
deepfake_processor = AutoImageProcessor.from_pretrained("microsoft/FaceForensics") |
|
deepfake_model = AutoModelForImageClassification.from_pretrained("microsoft/FaceForensics") |
|
deepfake_model.to(device) |
|
deepfake_model.eval() |
|
|
|
|
|
|
|
|
|
|
|
def compute_facial_recognition(image): |
|
""" |
|
Detects a face using MediaPipe, crops it, and computes its embedding with DINO-ViT. |
|
Compares the embedding against a dummy database to "identify" the person. |
|
""" |
|
frame = np.array(image) |
|
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) |
|
frame_resized = cv2.resize(frame_bgr, DESIRED_SIZE) |
|
frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB) |
|
|
|
face_results = face_detection.process(frame_rgb) |
|
if face_results.detections: |
|
detection = face_results.detections[0] |
|
bbox = detection.location_data.relative_bounding_box |
|
h, w, _ = frame_rgb.shape |
|
x = int(bbox.xmin * w) |
|
y = int(bbox.ymin * h) |
|
box_w = int(bbox.width * w) |
|
box_h = int(bbox.height * h) |
|
face_crop = frame_rgb[y:y+box_h, x:x+box_w] |
|
face_image = Image.fromarray(face_crop) |
|
|
|
inputs = facial_recognition_extractor(face_image, return_tensors="pt").to(device) |
|
with torch.no_grad(): |
|
outputs = facial_recognition_model(**inputs) |
|
|
|
embeddings = outputs.last_hidden_state.mean(dim=1).squeeze() |
|
|
|
|
|
best_score = -1 |
|
best_name = "Unknown" |
|
for name, db_emb in dummy_database.items(): |
|
cos_sim = torch.nn.functional.cosine_similarity(embeddings, db_emb, dim=0) |
|
if cos_sim > best_score: |
|
best_score = cos_sim |
|
best_name = name |
|
threshold = 0.7 |
|
if best_score > threshold: |
|
result = f"Identified as {best_name} (sim: {best_score:.2f})" |
|
else: |
|
result = f"No match found (best: {best_name}, sim: {best_score:.2f})" |
|
return face_crop, result |
|
else: |
|
return frame, "No face detected" |
|
|
|
def compute_emotion_detection(image): |
|
""" |
|
Detects a face, crops it, and classifies the facial expression. |
|
""" |
|
frame = np.array(image) |
|
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) |
|
frame_resized = cv2.resize(frame_bgr, DESIRED_SIZE) |
|
frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB) |
|
|
|
face_results = face_detection.process(frame_rgb) |
|
if face_results.detections: |
|
detection = face_results.detections[0] |
|
bbox = detection.location_data.relative_bounding_box |
|
h, w, _ = frame_rgb.shape |
|
x = int(bbox.xmin * w) |
|
y = int(bbox.ymin * h) |
|
box_w = int(bbox.width * w) |
|
box_h = int(bbox.height * h) |
|
face_crop = frame_rgb[y:y+box_h, x:x+box_w] |
|
face_image = Image.fromarray(face_crop) |
|
|
|
inputs = emotion_processor(face_image, return_tensors="pt").to(device) |
|
with torch.no_grad(): |
|
outputs = emotion_model(**inputs) |
|
logits = outputs.logits |
|
pred = logits.argmax(-1).item() |
|
label = emotion_model.config.id2label[pred] |
|
return face_crop, f"Emotion: {label}" |
|
else: |
|
return frame, "No face detected" |
|
|
|
def compute_age_gender(image): |
|
""" |
|
Detects a face, crops it, and predicts the age & gender. |
|
""" |
|
frame = np.array(image) |
|
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) |
|
frame_resized = cv2.resize(frame_bgr, DESIRED_SIZE) |
|
frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB) |
|
|
|
face_results = face_detection.process(frame_rgb) |
|
if face_results.detections: |
|
detection = face_results.detections[0] |
|
bbox = detection.location_data.relative_bounding_box |
|
h, w, _ = frame_rgb.shape |
|
x = int(bbox.xmin * w) |
|
y = int(bbox.ymin * h) |
|
box_w = int(bbox.width * w) |
|
box_h = int(bbox.height * h) |
|
face_crop = frame_rgb[y:y+box_h, x:x+box_w] |
|
face_image = Image.fromarray(face_crop) |
|
|
|
inputs = age_gender_processor(face_image, return_tensors="pt").to(device) |
|
with torch.no_grad(): |
|
outputs = age_gender_model(**inputs) |
|
logits = outputs.logits |
|
pred = logits.argmax(-1).item() |
|
label = age_gender_model.config.id2label[pred] |
|
return face_crop, f"Age & Gender: {label}" |
|
else: |
|
return frame, "No face detected" |
|
|
|
def compute_face_parsing(image): |
|
""" |
|
Runs face parsing (segmentation) on the provided image. |
|
""" |
|
image_pil = Image.fromarray(np.array(image)) |
|
inputs = face_parsing_processor(image_pil, return_tensors="pt").to(device) |
|
with torch.no_grad(): |
|
outputs = face_parsing_model(**inputs) |
|
logits = outputs.logits |
|
segmentation = logits.argmax(dim=1)[0].cpu().numpy() |
|
|
|
segmentation_norm = np.uint8(255 * segmentation / (segmentation.max() + 1e-5)) |
|
segmentation_color = cv2.applyColorMap(segmentation_norm, cv2.COLORMAP_JET) |
|
return segmentation_color, "Face Parsing completed" |
|
|
|
def compute_deepfake_detection(image): |
|
""" |
|
Runs deepfake detection on the image. |
|
""" |
|
image_pil = Image.fromarray(np.array(image)) |
|
inputs = deepfake_processor(image_pil, return_tensors="pt").to(device) |
|
with torch.no_grad(): |
|
outputs = deepfake_model(**inputs) |
|
logits = outputs.logits |
|
pred = logits.argmax(-1).item() |
|
label = deepfake_model.config.id2label[pred] |
|
return np.array(image), f"Deepfake Detection: {label}" |
|
|
|
|
|
|
|
|
|
|
|
def analyze_facial_recognition(image): |
|
annotated_face, result = compute_facial_recognition(image) |
|
return annotated_face, f"<div style='color: lime !important;'>Facial Recognition: {result}</div>" |
|
|
|
def analyze_emotion_detection(image): |
|
face_crop, result = compute_emotion_detection(image) |
|
return face_crop, f"<div style='color: lime !important;'>{result}</div>" |
|
|
|
def analyze_age_gender(image): |
|
face_crop, result = compute_age_gender(image) |
|
return face_crop, f"<div style='color: lime !important;'>{result}</div>" |
|
|
|
def analyze_face_parsing(image): |
|
segmentation, result = compute_face_parsing(image) |
|
return segmentation, f"<div style='color: lime !important;'>{result}</div>" |
|
|
|
def analyze_deepfake_detection(image): |
|
output, result = compute_deepfake_detection(image) |
|
return output, f"<div style='color: lime !important;'>{result}</div>" |
|
|
|
|
|
|
|
|
|
custom_css = """ |
|
@import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap'); |
|
body { |
|
background-color: #0e0e0e; |
|
font-family: 'Orbitron', sans-serif; |
|
margin: 0; |
|
padding: 0; |
|
color: #32CD32; |
|
} |
|
.gradio-container { |
|
background: linear-gradient(135deg, #1a1a1a, #333333); |
|
border: 2px solid #32CD32; |
|
box-shadow: 0 0 15px #32CD32; |
|
border-radius: 10px; |
|
padding: 20px; |
|
max-width: 1200px; |
|
margin: auto; |
|
} |
|
.gradio-title, .gradio-description, .tab-item, .tab-item * { |
|
color: #32CD32 !important; |
|
text-shadow: 0 0 10px #32CD32; |
|
} |
|
input, button, .output { |
|
border: 1px solid #32CD32; |
|
box-shadow: 0 0 8px #32CD32; |
|
color: #32CD32; |
|
} |
|
""" |
|
|
|
|
|
|
|
|
|
facial_recognition_interface = gr.Interface( |
|
fn=analyze_facial_recognition, |
|
inputs=gr.Image(label="Upload a Face Image for Facial Recognition"), |
|
outputs=[gr.Image(type="numpy", label="Cropped Face / Embedding Visualization"), |
|
gr.HTML(label="Facial Recognition Result")], |
|
title="Facial Recognition & Identification", |
|
description="Extracts facial embeddings using facebook/dino-vitb16 and identifies the face by comparing against a dummy database.", |
|
live=False |
|
) |
|
|
|
emotion_interface = gr.Interface( |
|
fn=analyze_emotion_detection, |
|
inputs=gr.Image(label="Upload a Face Image for Emotion Detection"), |
|
outputs=[gr.Image(type="numpy", label="Cropped Face"), |
|
gr.HTML(label="Emotion Detection")], |
|
title="Emotion Detection", |
|
description="Classifies the facial expression using nateraw/facial-expression-recognition.", |
|
live=False |
|
) |
|
|
|
age_gender_interface = gr.Interface( |
|
fn=analyze_age_gender, |
|
inputs=gr.Image(label="Upload a Face Image for Age & Gender Prediction"), |
|
outputs=[gr.Image(type="numpy", label="Cropped Face"), |
|
gr.HTML(label="Age & Gender Prediction")], |
|
title="Age & Gender Prediction", |
|
description="Predicts age and gender from the face using oayu/age-gender-estimation.", |
|
live=False |
|
) |
|
|
|
face_parsing_interface = gr.Interface( |
|
fn=analyze_face_parsing, |
|
inputs=gr.Image(label="Upload a Face Image for Face Parsing"), |
|
outputs=[gr.Image(type="numpy", label="Segmentation Overlay"), |
|
gr.HTML(label="Face Parsing")], |
|
title="Face Parsing", |
|
description="Segments face regions (eyes, nose, lips, hair, etc.) using hila-chefer/face-parsing.", |
|
live=False |
|
) |
|
|
|
deepfake_interface = gr.Interface( |
|
fn=analyze_deepfake_detection, |
|
inputs=gr.Image(label="Upload an Image for Deepfake Detection"), |
|
outputs=[gr.Image(type="numpy", label="Input Image"), |
|
gr.HTML(label="Deepfake Detection")], |
|
title="Deepfake Detection", |
|
description="Detects manipulated or deepfake images using microsoft/FaceForensics.", |
|
live=False |
|
) |
|
|
|
|
|
|
|
|
|
tabbed_interface = gr.TabbedInterface( |
|
interface_list=[ |
|
facial_recognition_interface, |
|
emotion_interface, |
|
age_gender_interface, |
|
face_parsing_interface, |
|
deepfake_interface |
|
], |
|
tab_names=[ |
|
"Facial Recognition", |
|
"Emotion Detection", |
|
"Age & Gender", |
|
"Face Parsing", |
|
"Deepfake Detection" |
|
] |
|
) |
|
|
|
|
|
|
|
|
|
demo = gr.Blocks(css=custom_css) |
|
with demo: |
|
gr.Markdown("<h1 class='gradio-title' style='color: #32CD32;'>Multi-Analysis Face App</h1>") |
|
gr.Markdown("<p class='gradio-description' style='color: #32CD32;'>Upload an image to run advanced face analysis using state-of-the-art Hugging Face models.</p>") |
|
tabbed_interface.render() |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|