File size: 2,873 Bytes
b1fb35d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import numpy as np
from tqdm import tqdm
from glob import glob
import os


def load_image_folder(folder_path: str, max_nb_images: int) -> list[str]:
    image_path = f"{folder_path}/*"
    image_paths = glob(image_path)
    images = [image_path for image_path in image_paths[:max_nb_images]]
    return images


def compute_embeddings_and_labels(images: list[str], label: int) -> np.ndarray:
    from deepface import DeepFace

    embeddings = []
    labels = []
    for image in tqdm(images):
        try:
            embedding_obj = DeepFace.represent(
                img_path=image,
                model_name="Facenet",
            )
            embedding = embedding_obj[0]["embedding"]
            embeddings.append(embedding)
            labels.append(label)
        except Exception as e:
            print(f"Erreur lors du traitement de {image}: {e}")
    return np.vstack(embeddings), np.array(labels)


def load_embeddings_and_labels(
    folder_path: str, label: int, max_nb_images: int, cache: bool
) -> tuple[np.ndarray, np.ndarray]:
    if (
        not os.path.exists(f"{folder_path}/embeddings.npy")
        or not os.path.exists(f"{folder_path}/labels.npy")
        or (not cache)
    ):
        images = load_image_folder(folder_path, max_nb_images)
        embeddings, labels = compute_embeddings_and_labels(images, label)
        np.save(f"{folder_path}/embeddings.npy", embeddings)
        np.save(f"{folder_path}/labels.npy", labels)
    embeddings = np.load(f"{folder_path}/embeddings.npy")
    labels = np.load(f"{folder_path}/labels.npy")
    return embeddings, labels


def load_dataset(
    target_folder: str,
    max_nb_images=500,
    cache=True,
    deep_fake_folder: str = "./data/deepfake",
) -> tuple[np.ndarray, np.ndarray]:
    deep_fake_images_embeddings, deep_fake_labels = load_embeddings_and_labels(
        deep_fake_folder, max_nb_images=max_nb_images, label=0, cache=cache
    )
    target_images_embeddings, target_images_labels = load_embeddings_and_labels(
        target_folder, max_nb_images=max_nb_images, label=1, cache=cache
    )
    embeddings = np.vstack([target_images_embeddings, deep_fake_images_embeddings])
    labels = np.hstack([target_images_labels, deep_fake_labels])
    return embeddings, labels


def featurisation(embeddings: np.ndarray) -> np.ndarray:
    rms = np.sqrt(np.mean(embeddings**2, axis=1))
    mean = np.mean(embeddings, axis=1)
    median = np.median(embeddings, axis=1)
    features = np.stack([rms, mean, median], axis=1)
    return features


from concrete.ml.torch.compile import compile_torch_model
import torch
import torch.nn as nn


class RegNet(nn.Module):
    def __init__(self, b):
        super().__init__()
        self.b = nn.Parameter(torch.ones(1) * b)

    def forward(self, x):
        X = x[:, :128]
        W = x[:, 128:]
        return ((X @ W.T + self.b) > 0).float()