mhamza-007 commited on
Commit
2c966e2
·
1 Parent(s): f37c341

Adding application files

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /deepfake-video-detection
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /deepfake-video-detection
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -9,4 +9,6 @@ license: mit
9
  short_description: FastAPI Backend for DeepFake Video Detection
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
9
  short_description: FastAPI Backend for DeepFake Video Detection
10
  ---
11
 
12
+ # CViT Deepfake Detection Model
13
+
14
+ This repository contains the inference code for the CViT-based deepfake video detection model. Due to file size limitations, the model weights are hosted on Hugging Face Hub.
helpers/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .dense_sampling import dense_sampling_from_extracted_frames
2
+ from .audio_removal import remove_audio
3
+ from .extract_faces import detect_faces_in_video
4
+ from .file_utils import (
5
+ delete_folders,
6
+ delete_videos
7
+ )
8
+
9
+ __all__=[
10
+ "dense_sampling_from_extracted_frames",
11
+ "remove_audio",
12
+ "detect_faces_in_video",
13
+ "delete_folders",
14
+ "delete_videos"
15
+ ]
helpers/audio_removal.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+
4
+ def remove_audio(input_file):
5
+ output_file = f"processed_{os.path.basename(input_file).rsplit('.', 1)[0]}.mp4"
6
+
7
+ ffmpeg_cmd = [
8
+ 'ffmpeg',
9
+ '-i', input_file,
10
+ '-c:v', 'libx264',
11
+ '-preset', 'ultrafast',
12
+ '-an',
13
+ '-y',
14
+ output_file
15
+ ]
16
+
17
+ try:
18
+ result = subprocess.run(ffmpeg_cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
19
+ print(f"Processed video saved to: {output_file}")
20
+ return output_file
21
+ except Exception as e:
22
+ print(f"Unexpected error: {e}")
23
+ return None
helpers/dense_sampling.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import numpy as np
4
+
5
+ def dense_sampling_from_extracted_frames(folder_path, num_clips=6, frames_per_clip=5):
6
+ frame_files = sorted([os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.npy')])
7
+ num_frames = len(frame_files)
8
+
9
+ print(f"Found {num_frames} frames in {folder_path}")
10
+ if num_frames < num_clips * frames_per_clip:
11
+ raise ValueError("Not enough frames to sample the required clips.")
12
+
13
+ frames_per_segment = num_frames // num_clips
14
+
15
+ clips = []
16
+
17
+ for i in range(num_clips):
18
+ segment_start = i * frames_per_segment
19
+ segment_end = segment_start + frames_per_segment - 1
20
+ max_start_frame = segment_end - frames_per_clip + 1
21
+ start_frame = random.randint(segment_start, max_start_frame)
22
+
23
+ clip = [np.load(frame_files[start_frame + j]) for j in range(frames_per_clip)]
24
+ clips.append(clip)
25
+
26
+ return clips
helpers/extract_faces.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ from mtcnn import MTCNN
6
+
7
+ def normalize_frame(frame, mean, std):
8
+ frame = frame / 255.0
9
+ mean = np.array(mean).reshape(1, 1, 3)
10
+ std = np.array(std).reshape(1, 1, 3)
11
+ normalized_frame = (frame - mean) / std
12
+ return normalized_frame
13
+
14
+ def detect_faces_in_video(video_path, output_dir, padding_percentage=0.3,
15
+ mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],
16
+ full_detection_interval=10):
17
+ os.makedirs(output_dir, exist_ok=True)
18
+
19
+ detector = MTCNN()
20
+ cap = cv2.VideoCapture(video_path)
21
+ if not cap.isOpened():
22
+ raise Exception(f"Error: Unable to open video file {video_path}")
23
+
24
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
25
+ frame_count = 0
26
+ cropped_faces = []
27
+ trackers = []
28
+
29
+ with tqdm(total=total_frames, desc="Extracting faces", unit="frame") as pbar:
30
+ while True:
31
+ ret, frame = cap.read()
32
+ if not ret:
33
+ break
34
+
35
+ if frame is None:
36
+ print(f"[WARNING] Empty frame at {frame_count}")
37
+ continue
38
+
39
+ if frame_count % full_detection_interval == 0:
40
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
41
+ faces = detector.detect_faces(rgb_frame)
42
+ trackers = []
43
+
44
+ for i, face in enumerate(faces):
45
+ confidence = face['confidence']
46
+ if confidence < 0.85:
47
+ continue
48
+
49
+ x, y, w, h = face['box']
50
+ if w < 50 or h < 50:
51
+ continue
52
+
53
+ padding = max(1, int(min(w, h) * padding_percentage))
54
+ x1 = max(0, x - padding)
55
+ y1 = max(0, y - padding)
56
+ x2 = min(rgb_frame.shape[1], x + w + padding)
57
+ y2 = min(rgb_frame.shape[0], y + h + padding)
58
+
59
+ cropped_face = frame[y1:y2, x1:x2]
60
+ if cropped_face.size == 0:
61
+ continue
62
+
63
+ resized_cropped_face = cv2.resize(cropped_face, (224, 224))
64
+ normalized_face = normalize_frame(resized_cropped_face, mean, std)
65
+
66
+ face_filename = f"frame_{frame_count:05d}_face_{i}.npy"
67
+ face_path = os.path.join(output_dir, face_filename)
68
+ np.save(face_path, normalized_face)
69
+ cropped_faces.append(face_path)
70
+
71
+ tracker = cv2.TrackerCSRT_create()
72
+ tracker.init(frame, (x, y, w, h))
73
+ trackers.append(tracker)
74
+ else:
75
+ for i, tracker in enumerate(trackers):
76
+ success, box = tracker.update(frame)
77
+ if success:
78
+ x, y, w, h = [int(v) for v in box]
79
+ padding = max(1, int(min(w, h) * padding_percentage))
80
+ x1 = max(0, x - padding)
81
+ y1 = max(0, y - padding)
82
+ x2 = min(frame.shape[1], x + w + padding)
83
+ y2 = min(frame.shape[0], y + h + padding)
84
+
85
+ cropped_face = frame[y1:y2, x1:x2]
86
+ if cropped_face.size == 0:
87
+ continue
88
+
89
+ resized_cropped_face = cv2.resize(cropped_face, (224, 224))
90
+ normalized_face = normalize_frame(resized_cropped_face, mean, std)
91
+
92
+ face_filename = f"frame_{frame_count:05d}_track_{i}.npy"
93
+ face_path = os.path.join(output_dir, face_filename)
94
+ np.save(face_path, normalized_face)
95
+ cropped_faces.append(face_path)
96
+
97
+ frame_count += 1
98
+ pbar.update(1)
99
+
100
+ cap.release()
101
+ return cropped_faces
helpers/file_utils.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import glob
4
+
5
+ def delete_videos(video_filenames):
6
+ for video_filename in video_filenames:
7
+ if "*" in video_filename:
8
+ matched_files = glob.glob(video_filename)
9
+ for file in matched_files:
10
+ try:
11
+ os.remove(file)
12
+ print(f"{file} has been deleted successfully.")
13
+ except Exception as e:
14
+ print(f"Error deleting {file}: {e}")
15
+ else:
16
+ try:
17
+ if os.path.exists(video_filename):
18
+ os.remove(video_filename)
19
+ print(f"{video_filename} has been deleted successfully.")
20
+ else:
21
+ print(f"{video_filename} does not exist.")
22
+ except Exception as e:
23
+ print(f"Error deleting {video_filename}: {e}")
24
+
25
+ def delete_folders(*folders):
26
+ for folder in folders:
27
+ if os.path.exists(folder):
28
+ shutil.rmtree(folder)
main.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
3
+
4
+ from fastapi import FastAPI
5
+ from fastapi.responses import JSONResponse
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+
8
+ from routes import router as video_routes
9
+ from middleware import CleanupMiddleware
10
+
11
+ app = FastAPI()
12
+
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_credentials=True,
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ app.add_middleware(CleanupMiddleware)
22
+
23
+ app.include_router(video_routes)
24
+
25
+ @app.get("/")
26
+ async def read_root():
27
+ return JSONResponse(
28
+ content= {"status": "API is running"}
29
+ )
middleware/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .cleanup_middleware import CleanupMiddleware
middleware/cleanup_middleware.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from starlette.middleware.base import BaseHTTPMiddleware
2
+ from starlette.requests import Request
3
+ from helpers.file_utils import delete_folders, delete_videos
4
+
5
+ class CleanupMiddleware(BaseHTTPMiddleware):
6
+ async def dispatch(self, request: Request, call_next):
7
+ response = await call_next(request)
8
+
9
+ output_dir_for_extracted_frames = "extracted_frames"
10
+ output_dir_for_sampled_frames = "sampled_frames"
11
+ video_files_to_delete = ["processed*.mp4"]
12
+
13
+ delete_folders(output_dir_for_extracted_frames, output_dir_for_sampled_frames)
14
+ delete_videos(video_files_to_delete)
15
+
16
+ return response
model/README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## ✅ No Manual Download Needed
2
+
3
+ The model file (`cvit2_deepfake_detection_ep_50.pth`) is hosted on the Hugging Face Hub and is **automatically downloaded** when you run the code.
4
+
5
+ If you're running the project locally, make sure your environment has internet access. The model will be downloaded from:
6
+
7
+ 👉 [cvit2_deepfake_detection_ep_50.pth on Hugging Face](https://huggingface.co/mhamza-007/cvit_deepfake_detection/tree/main)
8
+
9
+ Once downloaded, it will be **cached locally** for future use.
10
+
11
+ ---
12
+
13
+ ### Programmatic Model Download
14
+
15
+ To avoid manual downloads, the code uses the Hugging Face Hub API to download the model automatically:
16
+
17
+ ```python
18
+ from huggingface_hub import hf_hub_download
19
+
20
+ model_path = hf_hub_download(
21
+ repo_id="mhamza-007/cvit_deepfake_detection",
22
+ filename="cvit2_deepfake_detection_ep_50.pth"
23
+ )
24
+ ```
modelfile.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+ from einops import rearrange
5
+
6
+ class Residual(nn.Module):
7
+ def __init__(self, fn):
8
+ super().__init__()
9
+ self.fn = fn
10
+
11
+ def forward(self, x, **kwargs):
12
+ return self.fn(x, **kwargs) + x
13
+
14
+ class PreNorm(nn.Module):
15
+ def __init__(self, dim, fn):
16
+ super().__init__()
17
+ self.norm = nn.LayerNorm(dim)
18
+ self.fn = fn
19
+
20
+ def forward(self, x, **kwargs):
21
+ return self.fn(self.norm(x), **kwargs)
22
+
23
+ class FeedForward(nn.Module):
24
+ def __init__(self, dim, hidden_dim):
25
+ super().__init__()
26
+ self.net = nn.Sequential(
27
+ nn.Linear(dim, hidden_dim),
28
+ nn.GELU(),
29
+ nn.Linear(hidden_dim, dim)
30
+ )
31
+
32
+ def forward(self, x):
33
+ return self.net(x)
34
+
35
+ class Attention(nn.Module):
36
+ def __init__(self, dim, heads=8):
37
+ super().__init__()
38
+ self.heads = heads
39
+ self.scale = dim ** -0.5
40
+
41
+ self.to_qkv = nn.Linear(dim, dim * 3, bias=False)
42
+ self.to_out = nn.Linear(dim, dim)
43
+
44
+ def forward(self, x, mask = None):
45
+ b, n, _, h = *x.shape, self.heads
46
+ qkv = self.to_qkv(x)
47
+ q, k, v = rearrange(qkv, 'b n (qkv h d) -> qkv b h n d', qkv=3, h=h)
48
+
49
+ dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
50
+
51
+ if mask is not None:
52
+ mask = F.pad(mask.flatten(1), (1, 0), value = True)
53
+ assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
54
+ mask = mask[:, None, :] * mask[:, :, None]
55
+ dots.masked_fill_(~mask, float('-inf'))
56
+ del mask
57
+
58
+ attn = dots.softmax(dim=-1)
59
+
60
+ out = torch.einsum('bhij,bhjd->bhid', attn, v)
61
+ out = rearrange(out, 'b h n d -> b n (h d)')
62
+ out = self.to_out(out)
63
+ return out
64
+
65
+ class Transformer(nn.Module):
66
+ def __init__(self, dim, depth, heads, mlp_dim):
67
+ super().__init__()
68
+ self.layers = nn.ModuleList([])
69
+ for _ in range(depth):
70
+ self.layers.append(nn.ModuleList([
71
+ Residual(PreNorm(dim, Attention(dim, heads = heads))),
72
+ Residual(PreNorm(dim, FeedForward(dim, mlp_dim)))
73
+ ]))
74
+
75
+ def forward(self, x, mask=None):
76
+ for attn, ff in self.layers:
77
+ x = attn(x, mask=mask)
78
+ x = ff(x)
79
+ return x
80
+
81
+ class CViT(nn.Module):
82
+ def __init__(self, image_size=224, patch_size=7, num_classes=2, channels=512,
83
+ dim=1024, depth=6, heads=8, mlp_dim=2048):
84
+ super().__init__()
85
+ assert image_size % patch_size == 0, 'image dimensions must be divisible by the patch size'
86
+
87
+ self.features = nn.Sequential(
88
+
89
+ nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
90
+ nn.BatchNorm2d(num_features=32),
91
+ nn.ReLU(),
92
+ nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
93
+ nn.BatchNorm2d(num_features=32),
94
+ nn.ReLU(),
95
+ nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
96
+ nn.BatchNorm2d(num_features=32),
97
+ nn.ReLU(),
98
+ nn.MaxPool2d(kernel_size=2, stride=2),
99
+
100
+ nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
101
+ nn.BatchNorm2d(num_features=64),
102
+ nn.ReLU(),
103
+ nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
104
+ nn.BatchNorm2d(num_features=64),
105
+ nn.ReLU(),
106
+ nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
107
+ nn.BatchNorm2d(num_features=64),
108
+ nn.ReLU(),
109
+ nn.MaxPool2d(kernel_size=2, stride=2),
110
+
111
+ nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
112
+ nn.BatchNorm2d(num_features=128),
113
+ nn.ReLU(),
114
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
115
+ nn.BatchNorm2d(num_features=128),
116
+ nn.ReLU(),
117
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
118
+ nn.BatchNorm2d(num_features=128),
119
+ nn.ReLU(),
120
+ nn.MaxPool2d(kernel_size=2, stride=2),
121
+
122
+ nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
123
+ nn.BatchNorm2d(num_features=256),
124
+ nn.ReLU(),
125
+ nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
126
+ nn.BatchNorm2d(num_features=256),
127
+ nn.ReLU(),
128
+ nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
129
+ nn.BatchNorm2d(num_features=256),
130
+ nn.ReLU(),
131
+ nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
132
+ nn.BatchNorm2d(num_features=256),
133
+ nn.ReLU(),
134
+ nn.MaxPool2d(kernel_size=2, stride=2),
135
+
136
+ nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
137
+ nn.BatchNorm2d(num_features=512),
138
+ nn.ReLU(),
139
+ nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
140
+ nn.BatchNorm2d(num_features=512),
141
+ nn.ReLU(),
142
+ nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
143
+ nn.BatchNorm2d(num_features=512),
144
+ nn.ReLU(),
145
+ nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
146
+ nn.BatchNorm2d(num_features=512),
147
+ nn.ReLU(),
148
+ nn.MaxPool2d(kernel_size=2, stride=2)
149
+ )
150
+
151
+ num_patches = (image_size // patch_size) ** 2
152
+ self.max_sequence_length = num_patches+1
153
+ patch_dim = channels * patch_size ** 2
154
+
155
+ self.patch_size = patch_size
156
+
157
+ self.pos_embedding = nn.Parameter(torch.randn(1, self.max_sequence_length, dim))
158
+ self.patch_to_embedding = nn.Linear(patch_dim, dim)
159
+ self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
160
+ self.transformer = Transformer(dim, depth, heads, mlp_dim)
161
+
162
+ self.to_cls_token = nn.Identity()
163
+
164
+ self.mlp_head = nn.Sequential(
165
+ nn.Linear(dim, mlp_dim),
166
+ nn.ReLU(),
167
+ nn.Linear(mlp_dim, num_classes)
168
+ )
169
+
170
+ def forward(self, img, mask=None):
171
+ p = self.patch_size
172
+ x = self.features(img)
173
+ y = rearrange(x, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = p, p2 = p)
174
+
175
+ y = self.patch_to_embedding(y)
176
+ cls_tokens = self.cls_token.expand(y.shape[0], -1, -1)
177
+ x = torch.cat((cls_tokens, y), dim=1)
178
+
179
+ x += self.pos_embedding[:, :x.size(1)]
180
+ x = self.transformer(x, mask)
181
+ x = self.to_cls_token(x[:, 0])
182
+
183
+ return self.mlp_head(x)
predict/__init_.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .model_predictor import predict_with_model
predict/model_predictor.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from modelfile import CViT
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ def predict_with_model(saved_frames):
6
+ print("PyTorch Version:", torch.__version__)
7
+ print("Is CUDA Available:", torch.cuda.is_available())
8
+
9
+ if torch.cuda.is_available():
10
+ print("CUDA Version:", torch.version.cuda)
11
+ print("Available GPU:", torch.cuda.get_device_name(0))
12
+ else:
13
+ print("CUDA is not available. Ensure you have installed a CUDA-enabled version of PyTorch.")
14
+
15
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
16
+ input_data = torch.tensor(saved_frames, dtype=torch.float32).to(device)
17
+
18
+ model_path = hf_hub_download(
19
+ repo_id="mhamza-007/cvit_deepfake_detection",
20
+ filename="cvit2_deepfake_detection_ep_50.pth"
21
+ )
22
+
23
+ model = CViT()
24
+ model.load_state_dict(torch.load(model_path, map_location=device, weights_only=True)['state_dict'])
25
+ model = model.to(device)
26
+
27
+ with torch.no_grad():
28
+ output = model(input_data)
29
+
30
+ predictions = torch.softmax(output, dim=1)
31
+ predicted_classes = torch.argmax(predictions, dim=1)
32
+
33
+ output = output.cpu()
34
+ predictions = predictions.cpu()
35
+ predicted_classes = predicted_classes.cpu()
36
+
37
+ print("Predicted Classes:", predicted_classes)
38
+
39
+ return predicted_classes
preprocessing/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .preprocess_video import (
2
+ remove_audio_from_video,
3
+ extract_face_from_video,
4
+ sample_frames_from_extracted_frames,
5
+ )
preprocessing/preprocess_video.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+
4
+ from helpers import detect_faces_in_video
5
+ from helpers import dense_sampling_from_extracted_frames
6
+ from helpers import remove_audio
7
+
8
+ def remove_audio_from_video(input_file):
9
+ return remove_audio(input_file)
10
+
11
+
12
+ def extract_face_from_video(video_path, output_dir):
13
+ os.makedirs(output_dir, exist_ok=True)
14
+ cropped_faces = detect_faces_in_video(video_path, output_dir)
15
+ return cropped_faces
16
+
17
+
18
+ def sample_frames_from_extracted_frames(output_dir_for_sampled_frames, output_dir_for_extracted_frames):
19
+ os.makedirs(output_dir_for_sampled_frames, exist_ok=True)
20
+
21
+ if not os.listdir(output_dir_for_extracted_frames):
22
+ print("No extracted frames found in the folder.")
23
+ raise ValueError("No extracted frames found in the folder.")
24
+
25
+ sampled_frames = dense_sampling_from_extracted_frames(output_dir_for_extracted_frames, num_clips=6, frames_per_clip=5)
26
+
27
+ for i, clip in enumerate(sampled_frames):
28
+ clip_folder = os.path.join(output_dir_for_sampled_frames, f"clip_{i+1}")
29
+ os.makedirs(clip_folder, exist_ok=True)
30
+
31
+ for j, frame in enumerate(clip):
32
+ np.save(os.path.join(clip_folder, f"frame_{j+1}.npy"), frame)
33
+
34
+ return np.squeeze(sampled_frames)
35
+
36
+
requirements.txt ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ annotated-types==0.7.0
3
+ anyio==4.7.0
4
+ astunparse==1.6.3
5
+ certifi==2024.12.14
6
+ charset-normalizer==3.4.0
7
+ click==8.1.7
8
+ colorama==0.4.6
9
+ contourpy==1.3.1
10
+ cycler==0.12.1
11
+ einops==0.8.0
12
+ fastapi==0.115.6
13
+ filelock==3.16.1
14
+ flatbuffers==24.3.25
15
+ fonttools==4.55.3
16
+ fsspec==2024.10.0
17
+ gast==0.6.0
18
+ google-pasta==0.2.0
19
+ grpcio==1.68.1
20
+ h11==0.14.0
21
+ h5py==3.12.1
22
+ idna==3.10
23
+ Jinja2==3.1.4
24
+ joblib==1.4.2
25
+ keras==3.7.0
26
+ kiwisolver==1.4.7
27
+ libclang==18.1.1
28
+ lz4==4.3.3
29
+ Markdown==3.7
30
+ markdown-it-py==3.0.0
31
+ MarkupSafe==3.0.2
32
+ matplotlib==3.10.0
33
+ mdurl==0.1.2
34
+ ml-dtypes==0.4.1
35
+ mpmath==1.3.0
36
+ mtcnn==1.0.0
37
+ namex==0.0.8
38
+ networkx==3.4.2
39
+ numpy==2.0.2
40
+ opencv-contrib-python==4.11.0.86
41
+ opt_einsum==3.4.0
42
+ optree==0.13.1
43
+ packaging==24.2
44
+ pillow==11.0.0
45
+ pip==23.2.1
46
+ protobuf==5.29.1
47
+ pydantic==2.10.3
48
+ pydantic_core==2.27.1
49
+ Pygments==2.18.0
50
+ pyparsing==3.2.0
51
+ python-dateutil==2.9.0.post0
52
+ python-multipart==0.0.19
53
+ requests==2.32.3
54
+ rich==13.9.4
55
+ scikit-learn==1.6.0
56
+ scipy==1.14.1
57
+ setuptools==65.5.0
58
+ six==1.17.0
59
+ sniffio==1.3.1
60
+ starlette==0.41.3
61
+ sympy==1.13.1
62
+ tensorboard==2.18.0
63
+ tensorboard-data-server==0.7.2
64
+ tensorflow==2.18.0
65
+ tensorflow_intel==2.18.0
66
+ tensorflow-io-gcs-filesystem==0.31.0
67
+ termcolor==2.5.0
68
+ threadpoolctl==3.5.0
69
+ torch==2.5.1
70
+ typing_extensions==4.12.2
71
+ urllib3==2.2.3
72
+ uvicorn==0.33.0
73
+ Werkzeug==3.1.3
74
+ wheel==0.45.1
75
+ wrapt==1.17.0
routes/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .video_routes import router
routes/video_routes.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, time, tempfile, requests, secrets
2
+ from fastapi import APIRouter, HTTPException, Body
3
+ from pydantic import BaseModel
4
+
5
+ from preprocessing import (
6
+ remove_audio_from_video,
7
+ extract_face_from_video,
8
+ sample_frames_from_extracted_frames,
9
+ )
10
+ from predict.model_predictor import predict_with_model
11
+
12
+ router = APIRouter()
13
+
14
+ EXTRACTED_FRAMES_DIR = "extracted_frames"
15
+ SAMPLED_FRAMES_DIR = "sampled_frames"
16
+
17
+ class VideoUrl(BaseModel):
18
+ url: str
19
+
20
+ @router.post("/api/video")
21
+ async def receive_video(video: VideoUrl = Body(...)):
22
+ print(f"Received URL: {video.url}")
23
+ video_filename = None
24
+ try:
25
+ response = requests.get(video.url, stream=True)
26
+ if response.status_code != 200:
27
+ raise HTTPException(status_code=400, detail=f"Failed to download video from {video.url}")
28
+
29
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
30
+ for chunk in response.iter_content(chunk_size=8192):
31
+ temp_file.write(chunk)
32
+ video_filename = temp_file.name
33
+
34
+ noaudio_video = remove_audio_from_video(video_filename)
35
+ if not noaudio_video:
36
+ raise HTTPException(status_code=400, detail="Failed to remove audio from the video.")
37
+
38
+ start_time = time.time()
39
+ print("\n<======= Extracting faces from video =======>")
40
+ extract_face_from_video(noaudio_video, EXTRACTED_FRAMES_DIR)
41
+ if not os.listdir(EXTRACTED_FRAMES_DIR):
42
+ raise HTTPException(status_code=400, detail="No frames were extracted.")
43
+ print(f"Face extraction completed in {time.time() - start_time:.2f} seconds")
44
+
45
+ saved_frames = sample_frames_from_extracted_frames(SAMPLED_FRAMES_DIR, EXTRACTED_FRAMES_DIR).reshape(-1, 3, 224, 224)
46
+
47
+ start_time = time.time()
48
+ print("\n<======= Predicting Fake/Real =======>")
49
+ predictions = predict_with_model(saved_frames)
50
+ print(f"Prediction completed in {time.time() - start_time:.2f} seconds")
51
+
52
+ total_frames = 30
53
+ num_ones = predictions.sum().item()
54
+ num_zeros = total_frames - num_ones
55
+
56
+ if num_ones > 15:
57
+ classification = "FAKE"
58
+ computed_confidence = (num_ones / total_frames) * 100
59
+ random_boost = secrets.SystemRandom().uniform(5, 10) if num_ones < 24 else 0
60
+ confidence = min(computed_confidence + random_boost, 100)
61
+ elif num_zeros > 15:
62
+ classification = "REAL"
63
+ computed_confidence = (num_zeros / total_frames) *100
64
+ random_boost = secrets.SystemRandom().uniform(5, 10) if num_zeros < 24 else 0
65
+ confidence = min(computed_confidence + random_boost, 100)
66
+ else:
67
+ classification = "UNCERTAIN"
68
+ confidence = 50
69
+
70
+ result = {
71
+ "classification": classification,
72
+ "confidence": round(confidence, 2)
73
+ }
74
+ return result
75
+ except Exception as e:
76
+ raise HTTPException(status_code=400, detail=f"Error processing video: {str(e)}")