Spaces:

Jannat24
/

uncovering_deepfake_image

Sleeping

App Files Files Community

Jannat24 commited on Mar 16

Commit

b3cd85c

1 Parent(s): bbb5f33

allmodels

Browse files

Files changed (3) hide show

finetunedvqgan.py +29 -0
modelz.py +155 -0
segmentface.py +75 -0

finetunedvqgan.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import torch
+from torch.utils.checkpoint import checkpoint
+from taming.models.vqgan import VQModel
+from omegaconf import OmegaConf
+from taming.models.vqgan import GumbelVQ
+class Generator:
+    def __init__(self, config_path, device=device):
+        self.config_path = config_path
+        self.device = device
+    def load_models(self):
+        # Load configuration
+        config = OmegaConf.load(self.config_path)
+        # Extract parameters specific to GumbelVQ
+        vq_params = config.model.params
+        # Initialize the GumbelVQ models
+        model_vaq = GumbelVQ(
+            ddconfig=vq_params.ddconfig,
+            lossconfig=vq_params.lossconfig,
+            n_embed=vq_params.n_embed,
+            embed_dim=vq_params.embed_dim,
+            kl_weight=vq_params.kl_weight,
+            temperature_scheduler_config=vq_params.temperature_scheduler_config,
+        ).to(self.device)
+        return model_vaq

modelz.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import numpy as np
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+##_____________________Define:MODEL-F & MODEL-G_________________
+# Positional Encoding
+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model, max_len=1024):
+        super(PositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(0.1)
+        position = torch.arange(max_len).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
+        pe = torch.zeros(max_len, d_model)
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        self.register_buffer('pe', pe.unsqueeze(0))  # (1, max_len, d_model)
+    def forward(self, x):
+        x = x + self.pe[:, :x.size(1)]
+        return self.dropout(x)
+# Transformer Encoder
+class TransformerEncoder(nn.Module):
+    def __init__(self, d_model=256, nhead=8, num_layers=6, dim_feedforward=1024, dropout=0.1):
+        super(TransformerEncoder, self).__init__()
+        self.positional_encoding = PositionalEncoding(d_model)
+        self.encoder = nn.TransformerEncoder(
+            nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout,batch_first=True),
+            num_layers=num_layers
+        )
+    def preprocess_latent(self, Z):
+        batch_size, channels, height, width = Z.shape # (batch_size, 256, 32, 32)
+        seq_len = height * width
+        Z = Z.permute(0, 2, 3, 1).reshape(batch_size, seq_len, channels)  # (batch_size, 1024, 256)
+        return Z
+    def postprocess_latent(self, Z):
+        batch_size, seq_len, channels = Z.shape  # (batch_size, 1024, 256)
+        height = width = int(math.sqrt(seq_len))
+        Z = Z.reshape(batch_size, height, width, channels).permute(0, 3, 1, 2)  # (batch_size, 256, 32, 32)
+        return Z
+    def forward(self, Z):
+        Z = self.preprocess_latent(Z)
+        Z = self.positional_encoding(Z)
+        Z = self.encoder(Z)
+        Z = self.postprocess_latent(Z)
+        return Z # latent of transformer
+class TransformerDecoder(nn.Module):
+    def __init__(self, d_model=256, nhead=8, num_layers=12, dim_feedforward=1024, dropout=0.1):
+        super().__init__()
+        self.d_model = d_model
+        # Enhanced positional encoding
+        self.positional_encoding = PositionalEncoding(d_model)
+        # Multi-layer learnable start tokens
+        self.base_start = nn.Parameter(torch.randn(1, 1024, d_model))
+        self.start_net = nn.Sequential(
+            nn.LayerNorm(d_model),
+            nn.Linear(d_model, dim_feedforward),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(dim_feedforward, d_model),
+            nn.LayerNorm(d_model)
+        )
+        # Context-aware transformer decoder
+        self.decoder = nn.TransformerDecoder(
+            nn.TransformerDecoderLayer(
+                d_model=d_model,
+                nhead=nhead,
+                dim_feedforward=dim_feedforward,
+                dropout=dropout,
+                batch_first=True
+            ),
+            num_layers=num_layers
+        )
+        # Output projection with residual
+        self.output_layer = nn.Sequential(
+            nn.Linear(d_model, d_model*2),
+            nn.GELU(),
+            nn.Linear(d_model*2, d_model))
+        self.init_weights()
+    def init_weights(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        nn.init.normal_(self.base_start, mean=0, std=0.02)
+    def preprocess_latent(self, Z):
+        # Convert (B, C, H, W) to (B, H*W, C)
+        return Z.permute(0, 2, 3, 1).flatten(1, 2)
+    def postprocess_latent(self, Z):
+        # Convert (B, H*W, C) back to (B, C, H, W)
+        B, L, C = Z.shape
+        H = W = int(L**0.5)
+        return Z.view(B, H, W, C).permute(0, 3, 1, 2)
+    def forward(self, Z, Z1_start_tokens=None, teacher_forcing_ratio=0.5):
+        # Process input latent
+        Z = self.preprocess_latent(Z)
+        #Z = self.positional_encoding(Z)
+        # Generate enhanced start tokens
+        B = Z.size(0)
+        base_tokens = self.base_start.expand(B, -1, -1)
+        processed_start = self.start_net(base_tokens)
+        # Teacher forcing integration
+        if Z1_start_tokens is not None and teacher_forcing_ratio > 0:
+            Z1_processed = self.positional_encoding(self.preprocess_latent(Z1_start_tokens))
+            # Create mixing mask
+            mask = torch.rand(B, 1, 1, device=Z.device) < teacher_forcing_ratio
+            processed_start = torch.where(mask, Z1_processed, processed_start)
+        # Decoder processing with residual
+        decoder_input = self.positional_encoding(processed_start)
+        outputs = self.decoder(decoder_input, Z)
+        outputs = self.output_layer(outputs + decoder_input)
+        return self.postprocess_latent(outputs)
+class DeepfakeToSourceTransformer(nn.Module):
+    def __init__(self, d_model=256, encoder_nhead=8, decoder_nhead=8, num_encoder_layers=6, num_decoder_layers=12, dim_feedforward=1024, dropout=0.1):
+        super().__init__()
+        self.encoder = TransformerEncoder(
+            d_model=d_model,
+            nhead=encoder_nhead,
+            num_layers=num_encoder_layers,
+            dim_feedforward=1024,
+            dropout=dropout
+        )
+        self.decoder = TransformerDecoder(
+            d_model=d_model,
+            nhead=decoder_nhead,
+            num_layers=num_decoder_layers,
+            dim_feedforward=dim_feedforward,
+            dropout=dropout
+        )
+    def forward(self, Z, Z1_start_tokens=None, teacher_forcing_ratio=0.5):
+        memory = self.encoder(Z)
+        Z1 = self.decoder(memory, Z1_start_tokens, teacher_forcing_ratio=teacher_forcing_ratio)
+        return Z1

segmentface.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import cv2
+import mediapipe as mp
+import numpy as np
+from rembg import remove
+from PIL import Image
+class FaceSegmenter:
+    def __init__(self, threshold=0.5):
+        self.threshold = threshold
+        # Initialize face detection
+        self.face_detection = mp.solutions.face_detection.FaceDetection(
+            model_selection=1,  # 1 for general use, 0 for close-up faces
+            min_detection_confidence=0.5
+        )
+        # Initialize selfie segmentation (for background removal)
+        self.selfie_segmentation = mp.solutions.selfie_segmentation.SelfieSegmentation(
+            model_selection=1  # 1 for general use, 0 for close-up faces
+        )
+    def segment_face(self, image_path):
+        # Load the image
+        image = cv2.imread(image_path)
+        if image is None:
+            raise ValueError("Image not found or unable to load.")
+        # Convert to RGB (MediaPipe requires RGB input)
+        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # Step 1: Detect the face
+        face_results = self.face_detection.process(rgb_image)
+        if not face_results.detections:
+            # Use rembg to remove the background
+            with open(image_path, "rb") as input_file:
+                input_image = input_file.read()
+                output_image = remove(input_image)
+            # Convert the output image to a numpy array
+            output_image = np.array(Image.open(io.BytesIO(output_image)))
+            # Convert RGBA to RGB (remove alpha channel)
+            if output_image.shape[2] == 4:
+                output_image = cv2.cvtColor(output_image, cv2.COLOR_RGBA2RGB)
+            return output_image
+        # Get the bounding box of the first detected face
+        detection = face_results.detections[0]
+        bboxC = detection.location_data.relative_bounding_box
+        h, w, _ = image.shape
+        x, y, width, height = int(bboxC.xmin * w), int(bboxC.ymin * h), \
+                              int(bboxC.width * w), int(bboxC.height * h)
+        # Step 2: Segment the foreground (selfie segmentation)
+        segmentation_results = self.selfie_segmentation.process(rgb_image)
+        if segmentation_results.segmentation_mask is None:
+            raise ValueError("Segmentation failed.")
+        # Create a binary mask
+        mask = (segmentation_results.segmentation_mask > self.threshold).astype(np.uint8)
+        # Step 3: Crop the face using the bounding box
+        face_mask = np.zeros_like(mask)
+        face_mask[y:y+height, x:x+width] = mask[y:y+height, x:x+width]
+        # Apply the mask to the original image
+        segmented_face = cv2.bitwise_and(image, image, mask=face_mask)
+        return segmented_face
+    def save_segmented_face(self, image_path, output_path):
+        segmented_face = self.segment_face(image_path)
+        cv2.imwrite(output_path, segmented_face)
+    def show_segmented_face(self, image_path):
+        segmented_face = self.segment_face(image_path)
+        cv2.imshow("Segmented Face", segmented_face)
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()