|
import gradio as gr |
|
import torch |
|
import torch.nn.functional as F |
|
from facenet_pytorch import MTCNN, InceptionResnetV1 |
|
import cv2 |
|
from PIL import Image |
|
import numpy as np |
|
import warnings |
|
from typing import Tuple, Dict |
|
import matplotlib.pyplot as plt |
|
import io |
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
|
|
mtcnn = MTCNN(select_largest=False, post_process=False, device=DEVICE).to(DEVICE).eval() |
|
model = InceptionResnetV1(pretrained="vggface2", classify=True, num_classes=1, device=DEVICE) |
|
|
|
checkpoint = torch.load("df_model.pth", map_location=torch.device('cpu')) |
|
model.load_state_dict(checkpoint['model_state_dict']) |
|
model.to(DEVICE) |
|
model.eval() |
|
|
|
def predict_frame(frame: np.ndarray) -> Tuple[str, Dict[str, float]]: |
|
"""Predict whether the input frame contains a real or fake face""" |
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
frame_pil = Image.fromarray(frame) |
|
|
|
face = mtcnn(frame_pil) |
|
if face is None: |
|
return None, None |
|
|
|
|
|
face = F.interpolate(face.unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False) |
|
face = face.to(DEVICE, dtype=torch.float32) / 255.0 |
|
|
|
|
|
with torch.no_grad(): |
|
output = torch.sigmoid(model(face).squeeze(0)) |
|
fake_confidence = output.item() |
|
real_confidence = 1 - fake_confidence |
|
prediction = "real" if real_confidence > fake_confidence else "fake" |
|
|
|
confidences = { |
|
'real': real_confidence, |
|
'fake': fake_confidence |
|
} |
|
|
|
return prediction, confidences |
|
|
|
def predict_video(input_video: str) -> Tuple[str, float, np.ndarray]: |
|
cap = cv2.VideoCapture(input_video) |
|
|
|
predictions = [] |
|
confidences_real = [] |
|
confidences_fake = [] |
|
frame_count = 0 |
|
skip_frames = 5 |
|
|
|
while True: |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
frame_count += 1 |
|
if frame_count % skip_frames != 0: |
|
continue |
|
|
|
prediction, confidence = predict_frame(frame) |
|
if prediction is None: |
|
continue |
|
|
|
predictions.append(prediction) |
|
confidences_real.append(confidence['real']) |
|
confidences_fake.append(confidence['fake']) |
|
|
|
cap.release() |
|
|
|
|
|
avg_real_confidence = sum(confidences_real) / len(confidences_real) |
|
avg_fake_confidence = sum(confidences_fake) / len(confidences_fake) |
|
final_prediction = 'real' if avg_real_confidence > avg_fake_confidence else 'fake' |
|
final_confidence = max(avg_real_confidence, avg_fake_confidence) |
|
|
|
|
|
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 8)) |
|
|
|
|
|
ax1.plot(confidences_real, label='Real', color='green') |
|
ax1.plot(confidences_fake, label='Fake', color='red') |
|
ax1.set_title('Confidence Scores Over Time') |
|
ax1.set_xlabel('Frame') |
|
ax1.set_ylabel('Confidence') |
|
ax1.legend() |
|
ax1.grid(True) |
|
|
|
|
|
labels, counts = np.unique(predictions, return_counts=True) |
|
ax2.bar(labels, counts, color=['green', 'red']) |
|
ax2.set_title('Distribution of Predictions') |
|
ax2.set_xlabel('Prediction') |
|
ax2.set_ylabel('Count') |
|
|
|
plt.tight_layout() |
|
|
|
|
|
buf = io.BytesIO() |
|
plt.savefig(buf, format='png') |
|
buf.seek(0) |
|
summary_plot = Image.open(buf) |
|
|
|
return final_prediction, final_confidence, summary_plot |
|
|
|
|
|
custom_css = """ |
|
.video-container { |
|
max-width: 400px; |
|
margin: 0 auto; |
|
} |
|
#output-container { |
|
display: flex; |
|
justify-content: center; |
|
align-items: center; |
|
flex-direction: column; |
|
} |
|
#confidence-label { |
|
font-size: 24px; |
|
font-weight: bold; |
|
margin-bottom: 10px; |
|
} |
|
#confidence-bar { |
|
width: 100%; |
|
height: 30px; |
|
background-color: #f0f0f0; |
|
border-radius: 15px; |
|
overflow: hidden; |
|
} |
|
#confidence-fill { |
|
height: 100%; |
|
background-color: #4CAF50; |
|
transition: width 0.5s ease-in-out; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# π΅οΈββοΈ DeepFake Video Detective π") |
|
gr.Markdown("Upload a video to determine if it's real or a deepfake. Our AI will analyze it frame by frame!") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
input_video = gr.Video(label="πΉ Upload Your Video", elem_classes=["video-container"]) |
|
|
|
with gr.Row(): |
|
submit_btn = gr.Button("π Analyze Video", variant="primary") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
output_label = gr.Label(label="π·οΈ Prediction") |
|
confidence_output = gr.HTML( |
|
""" |
|
<div id="output-container"> |
|
<div id="confidence-label">Confidence: 0%</div> |
|
<div id="confidence-bar"> |
|
<div id="confidence-fill" style="width: 0%;"></div> |
|
</div> |
|
</div> |
|
""" |
|
) |
|
summary_plot = gr.Image(label="π Analysis Summary") |
|
|
|
def update_confidence(prediction, confidence): |
|
color = "#4CAF50" if prediction == "real" else "#FF5722" |
|
return f""" |
|
<div id="output-container"> |
|
<div id="confidence-label">Confidence: {confidence:.2%}</div> |
|
<div id="confidence-bar"> |
|
<div id="confidence-fill" style="width: {confidence:.2%}; background-color: {color};"></div> |
|
</div> |
|
</div> |
|
""" |
|
|
|
def process_video(video): |
|
prediction, confidence, summary = predict_video(video) |
|
confidence_html = update_confidence(prediction, confidence) |
|
return {output_label: prediction, confidence_output: confidence_html, summary_plot: summary} |
|
|
|
submit_btn.click( |
|
process_video, |
|
inputs=[input_video], |
|
outputs=[output_label, confidence_output, summary_plot] |
|
) |
|
|
|
demo.launch() |