Spaces:

Ayan515
/

deepfake

Runtime error

App Files Files Community

Ayan515 commited on 11 days ago

Commit

eb56a55

verified ·

1 Parent(s): 8ef8972

Upload 10 files

Browse files

Files changed (10) hide show

__init__.py +0 -0
bitplane.py +27 -0
ela.py +64 -0
exif.py +11 -0
gradient.py +50 -0
hf_logger.py +98 -0
minmax.py +82 -0
text_content.py +57 -0
utils.py +27 -0
wavelet.py +20 -0

__init__.py ADDED Viewed

File without changes

bitplane.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import cv2 as cv
+import numpy as np
+from PIL import Image
+def bit_plane_extractor(
+    image: Image.Image,
+    channel: str = "Luminance",
+    bit: int = 0,
+    filter_type: str = "Disabled"
+) -> Image.Image:
+    """Extract and visualize a bit plane from a selected channel of the image."""
+    img = np.array(image.convert("RGB"))
+    if channel == "Luminance":
+        img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
+    elif channel == "RGB Norm":
+        b, g, r = cv.split(img.astype(np.float64))
+        img = np.sqrt(np.power(b, 2) + np.power(g, 2) + np.power(r, 2)).astype(np.uint8)
+    else:
+        idx = {"Red": 0, "Green": 1, "Blue": 2}[channel]
+        img = img[:, :, idx]
+    plane = cv.bitwise_and(np.full_like(img, 2 ** bit), img)
+    plane = cv.normalize(plane, None, 0, 255, cv.NORM_MINMAX).astype(np.uint8)
+    if filter_type == "Median":
+        plane = cv.medianBlur(plane, 3)
+    elif filter_type == "Gaussian":
+        plane = cv.GaussianBlur(plane, (3, 3), 0)
+    return Image.fromarray(plane)

ela.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import numpy as np
+import cv2 as cv
+from time import time
+from PIL import Image
+def compress_jpg(image, quality):
+    """Compress image using JPEG compression."""
+    encode_param = [int(cv.IMWRITE_JPEG_QUALITY), quality]
+    _, buffer = cv.imencode('.jpg', image, encode_param)
+    return cv.imdecode(buffer, cv.IMREAD_COLOR)
+def desaturate(image):
+    """Convert image to grayscale."""
+    return cv.cvtColor(image, cv.COLOR_BGR2GRAY)
+def create_lut(contrast, brightness):
+    """Create lookup table for contrast and brightness adjustment."""
+    lut = np.arange(256, dtype=np.uint8)
+    lut = cv.LUT(lut, lut)
+    lut = cv.convertScaleAbs(lut, None, contrast/128, brightness)
+    return lut
+def elapsed_time(start):
+    """Calculate elapsed time since start."""
+    return f"{time() - start:.3f}s"
+def genELA(img, quality=75, scale=50, contrast=20, linear=False, grayscale=False):
+    """
+    Perform Error Level Analysis on an image.
+    Args:
+        img: Input image (numpy array)
+        quality: JPEG compression quality (1-100)
+        scale: Output multiplicative gain (1-100)
+        contrast: Output tonality compression (0-100)
+        linear: Whether to use linear difference
+        grayscale: Whether to output grayscale image
+    Returns:
+        Processed ELA image
+    """
+    # Convert image to float32 and normalize
+    original = img.astype(np.float32) / 255
+    # Compress image
+    compressed = compress_jpg(img, quality)
+    compressed = compressed.astype(np.float32) / 255
+    # Calculate difference based on mode
+    if not linear:
+        difference = cv.absdiff(original, compressed)
+        ela = cv.convertScaleAbs(cv.sqrt(difference) * 255, None, scale / 20)
+    else:
+        ela = cv.convertScaleAbs(cv.subtract(compressed, img), None, scale)
+    # Apply contrast adjustment
+    contrast_value = int(contrast / 100 * 128)
+    ela = cv.LUT(ela, create_lut(contrast_value, contrast_value))
+    # Convert to grayscale if requested
+    if grayscale:
+        ela = desaturate(ela)
+    return Image.fromarray(ela)

exif.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import tempfile
+import exiftool
+from PIL import Image
+def exif_full_dump(image: Image.Image) -> dict:
+    """Extract all EXIF metadata from an image using exiftool."""
+    with tempfile.NamedTemporaryFile(suffix='.jpg', delete=True) as tmp:
+        image.save(tmp.name)
+        with exiftool.ExifTool() as et:
+            metadata = et.get_metadata(tmp.name)
+    return metadata

gradient.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import numpy as np
+import cv2 as cv
+from PIL import Image
+def norm_mat(mat):
+    return cv.normalize(mat, None, 0, 255, cv.NORM_MINMAX).astype(np.uint8)
+def equalize_img(img):
+    ycrcb = cv.cvtColor(img, cv.COLOR_BGR2YCrCb)
+    ycrcb[:, :, 0] = cv.equalizeHist(ycrcb[:, :, 0])
+    return cv.cvtColor(ycrcb, cv.COLOR_YCrCb2BGR)
+def create_lut(intensity, gamma):
+    lut = np.zeros((256, 1, 3), dtype=np.uint8)
+    for i in range(256):
+        lut[i, 0, 0] = min(255, max(0, i + intensity))
+        lut[i, 0, 1] = min(255, max(0, i + intensity))
+        lut[i, 0, 2] = min(255, max(0, i + intensity))
+    return lut
+def gradient_processing(image, intensity=90, blue_mode="Abs", invert=False, equalize=False):
+    image = np.array(image)
+    dx, dy = cv.spatialGradient(cv.cvtColor(image, cv.COLOR_BGR2GRAY))
+    intensity = int(intensity / 100 * 127)
+    if invert:
+        dx = (-dx).astype(np.float32)
+        dy = (-dy).astype(np.float32)
+    else:
+        dx = (+dx).astype(np.float32)
+        dy = (+dy).astype(np.float32)
+    dx_abs = np.abs(dx)
+    dy_abs = np.abs(dy)
+    red = ((dx / np.max(dx_abs) * 127) + 127).astype(np.uint8)
+    green = ((dy / np.max(dy_abs) * 127) + 127).astype(np.uint8)
+    if blue_mode == "None":
+        blue = np.zeros_like(red)
+    elif blue_mode == "Flat":
+        blue = np.full_like(red, 255)
+    elif blue_mode == "Abs":
+        blue = norm_mat(dx_abs + dy_abs)
+    elif blue_mode == "Norm":
+        blue = norm_mat(np.linalg.norm(cv.merge((red, green)), axis=2))
+    else:
+        blue = None
+    gradient = cv.merge([blue, green, red])
+    if equalize:
+        gradient = equalize_img(gradient)
+    elif intensity > 0:
+        gradient = cv.LUT(gradient, create_lut(intensity, intensity))
+    return Image.fromarray(gradient)

hf_logger.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import os
+import base64
+import json
+import io
+import datetime
+from PIL import Image
+import logging
+from huggingface_hub import HfApi, CommitOperationAdd # Keep HfApi for repo creation, but remove CommitOperationAdd for direct upload
+import numpy as np
+logger = logging.getLogger(__name__)
+HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0"
+LOCAL_LOG_DIR = "./hf_inference_logs" # Define a local directory to store logs
+# Custom JSON Encoder to handle numpy types
+class NumpyEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, np.float32):
+            return float(obj)
+        return json.JSONEncoder.default(self, obj)
+def _pil_to_base64(image: Image.Image) -> str:
+    """Converts a PIL Image to a base64 string."""
+    # Explicitly check if the input is a PIL Image
+    if not isinstance(image, Image.Image):
+        raise TypeError(f"Expected a PIL Image, but received type: {type(image)}")
+    buffered = io.BytesIO()
+    # Ensure image is in RGB mode before saving as JPEG
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    image.save(buffered, format="JPEG", quality=85)
+    return base64.b64encode(buffered.getvalue()).decode('utf-8')
+# The initialize_dataset function will change significantly or be removed/simplified
+# as we are no longer appending to a datasets.Dataset object directly in memory
+def initialize_dataset_repo():
+    """Initializes or ensures the Hugging Face dataset repository exists."""
+    api = HfApi(token=os.getenv("HF_TOKEN"))
+    try:
+        api.repo_info(repo_id=HF_DATASET_NAME, repo_type="dataset")
+        logger.info(f"Hugging Face dataset repository already exists: {HF_DATASET_NAME}")
+    except Exception:
+        logger.info(f"Creating new Hugging Face dataset repository: {HF_DATASET_NAME}")
+        api.create_repo(repo_id=HF_DATASET_NAME, repo_type="dataset", private=True)
+    return api # Return the API object for subsequent operations
+def log_inference_data(
+    original_image: Image.Image,
+    inference_params: dict,
+    model_predictions: list[dict],
+    ensemble_output: dict,
+    forensic_images: list[Image.Image],
+    agent_monitoring_data: dict,
+    human_feedback: dict = None
+):
+    """Logs a single inference event by uploading a JSON file to the Hugging Face dataset repository."""
+    try:
+        api = initialize_dataset_repo() # Get or create the repository
+        original_image_b64 = _pil_to_base64(original_image)
+        forensic_images_b64 = []
+        for img_item in forensic_images:
+            if img_item is not None:
+                if not isinstance(img_item, Image.Image):
+                    try:
+                        img_item = Image.fromarray(img_item)
+                    except Exception as e:
+                        logger.error(f"Error converting forensic image to PIL for base64 encoding: {e}")
+                        continue
+                forensic_images_b64.append(_pil_to_base64(img_item))
+        new_entry = {
+            "timestamp": datetime.datetime.now().isoformat(),
+            "image": original_image_b64,
+            "inference_request": inference_params,
+            "model_predictions": model_predictions,
+            "ensemble_output": ensemble_output,
+            "forensic_outputs": forensic_images_b64,
+            "agent_monitoring_data": agent_monitoring_data,
+            "human_feedback": human_feedback if human_feedback is not None else {}
+        }
+        # Define a unique path for the new log file within the local directory
+        os.makedirs(LOCAL_LOG_DIR, exist_ok=True) # Ensure the local directory exists
+        timestamp_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
+        log_file_path = os.path.join(LOCAL_LOG_DIR, f"log_{timestamp_str}.json")
+        # Serialize the new entry to a JSON file using the custom encoder
+        with open(log_file_path, 'w', encoding='utf-8') as f:
+            json.dump(new_entry, f, cls=NumpyEncoder, indent=2)
+        logger.info(f"Inference data logged successfully to local file: {log_file_path}")
+    except Exception as e:
+        logger.error(f"Failed to log inference data to local file: {e}")

minmax.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import numpy as np
+import cv2 as cv
+from PIL import Image
+def norm_mat(mat):
+    return cv.normalize(mat, None, 0, 255, cv.NORM_MINMAX).astype(np.uint8)
+def minmax_dev(patch, mask):
+    c = patch[1, 1]
+    minimum, maximum, _, _ = cv.minMaxLoc(patch, mask)
+    if c < minimum:
+        return -1
+    if c > maximum:
+        return +1
+    return 0
+def blk_filter(img, radius):
+    result = np.zeros_like(img, np.float32)
+    rows, cols = result.shape
+    block = 2 * radius + 1
+    for i in range(radius, rows, block):
+        for j in range(radius, cols, block):
+            result[
+                i - radius : i + radius + 1, j - radius : j + radius + 1
+            ] = np.std(
+                img[i - radius : i + radius + 1, j - radius : j + radius + 1]
+            )
+    return cv.normalize(result, None, 0, 127, cv.NORM_MINMAX, cv.CV_8UC1)
+def preprocess(image, channel=4, radius=2):
+    if not isinstance(image, np.ndarray):
+        image = np.array(image)  # Ensure image is a NumPy array
+    if channel == 0:
+        img = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
+    elif channel == 4:
+        b, g, r = cv.split(image.astype(np.float64))
+        img = cv.sqrt(cv.pow(b, 2) + cv.pow(g, 2) + cv.pow(r, 2))
+    else:
+        img = image[:, :, 3 - channel]
+    kernel = 3
+    border = kernel // 2
+    shape = (img.shape[0] - kernel + 1, img.shape[1] - kernel + 1, kernel, kernel)
+    strides = 2 * img.strides
+    patches = np.lib.stride_tricks.as_strided(img, shape=shape, strides=strides)
+    patches = patches.reshape((-1, kernel, kernel))
+    mask = np.full((kernel, kernel), 255, dtype=np.uint8)
+    mask[border, border] = 0
+    blocks = [0] * shape[0] * shape[1]
+    for i, patch in enumerate(patches):
+        blocks[i] = minmax_dev(patch, mask)
+    output = np.array(blocks).reshape(shape[:-2])
+    output = cv.copyMakeBorder(
+        output, border, border, border, border, cv.BORDER_CONSTANT
+    )
+    low = output == -1
+    high = output == +1
+    minmax = np.zeros_like(image)
+    if radius > 0:
+        radius += 3
+        low = blk_filter(low, radius)
+        high = blk_filter(high, radius)
+        if channel <= 2:
+            minmax[:, :, 2 - channel] = low
+            minmax[:, :, 2 - channel] += high
+        else:
+            minmax = np.repeat(low[:, :, np.newaxis], 3, axis=2)
+            minmax += np.repeat(high[:, :, np.newaxis], 3, axis=2)
+        minmax = norm_mat(minmax)
+    else:
+        if channel == 0:
+            minmax[low] = [0, 0, 255]
+            minmax[high] = [0, 0, 255]
+        elif channel == 1:
+            minmax[low] = [0, 255, 0]
+            minmax[high] = [0, 255, 0]
+        elif channel == 2:
+            minmax[low] = [255, 0, 0]
+            minmax[high] = [255, 0, 0]
+        elif channel == 3:
+            minmax[low] = [255, 255, 255]
+            minmax[high] = [255, 255, 255]
+    return Image.fromarray(minmax)

text_content.py ADDED Viewed

	@@ -0,0 +1,57 @@

+QUICK_INTRO = """
+### The Detection Dilemma: The Degentic Games
+The cat-and-mouse game between digital forgery and detection reached a tipping point early last year after years of escalating concern and anxiety. The most ambitious, expensive, and resource-intensive detection model was launched with actually impressive results. Impressive… for an embarassing two to three weeks.
+Then came the knockout punches. New SOTA models emerging every few weeks, in every imaginageable domain -- image, audio, video, music. Generated images are now at a level of realism that to an untrained eye, its unable to discern if its real or fake. [TO-DO: Add Citation to the study]
+And let's be honest: we saw this coming. When has humanity ever resisted accelerating technology that promises... *interesting* applications? As the ancients wisely tweeted: 🔞 drives innovation.
+It's time for a reset. Quit crying and get ready. Didn't you hear? The long awaited Degentic Games is starting soon.
+Choose wisely.
+---
+### **Overview of Multi-Model Consensus Methods in ML**
+| **Method**               | **Category**               | **Description**                                  | **Key Advantages**                                | **Key Limitations**                                          | **Weaknesses**                          | **Strengths**                                                                 |
+|--------------------------|----------------------------|--------------------------------------------------|---------------------------------------------------|--------------------------------------------------------------|----------------------------------------|--------------------------------------------------------------------------------|
+| **Bagging (e.g., Random Forest)** | **Traditional Ensembles**  | Trains multiple models on bootstrapped data subsets, aggregating predictions | Reduces overfitting (~variance reduction)           | Computationally costly for large datasets; models can be correlated | Not robust to adversarial attacks      | Simple to implement; robust to noisy data; handles high-dimensional data well     |
+| **Boosting (e.g., XGBoost, LightGBM)** | **Traditional Ensembles**  | Iteratively corrects errors using weighted models | High accuracy on structured/tabular data           | Risk of overfitting; sensitive to noisy data                   | Computationally intensive              | Dominates in competitions (e.g., Kaggle); scalable for medium datasets           |
+| **Stacking**             | **Traditional Ensembles**  | Combines predictions via a meta-learner          | Can outperform individual models; flexible          | Increased complexity and data leakage risk                   | Requires careful hyperparameter tuning | Excels in combining diverse models (e.g., trees + SVMs + linear models)            |
+| **Deep Ensembles**       | **Deep Learning Ensembles**| Multiple independently trained neural networks   | Uncertainty estimation; robust to data shifts        | High computational cost; memory-heavy                        | Model coordination challenges          | State-of-the-art in safety-critical domains (e.g., medical imaging, autonomous vehicles) |
+| **Snapshot Ensembles**   | **Deep Learning Ensembles**| Saves models at different optimization stages    | Efficient (only one training run)                   | Limited diversity (same architecture/init)                   | Requires careful checkpoint selection  | Lightweight for tasks like on-device deployment                                  |
+| **Monte Carlo Dropout**  | **Approximate Ensembles**  | Applies dropout at inference to simulate many models | Free ensemble (during testing)                      | Approximates uncertainty poorly compared to deep ensembles    | Limited diversity                     | Cheap and simple; useful for quick uncertainty estimates                         |
+| **Mixture of Experts (MoE)** | **Scalable Ensembles**  | Specialized sub-models (experts) with a gating mechanism | Efficient scaling (only activate sub-models)        | Training instability; uneven expert utilization              | Requires expert/gate orchestration     | Dominates large-scale applications like Switch Transformers and Hyper-Cloud systems |
+| **Bayesian Neural Networks (BNNs)** | **Probabilistic Ensembles** | Models weights as probability distributions      | Built-in uncertainty quantification                 | Intractable to train exactly; approximations needed            | Difficult optimization                | Essential for risk-averse applications (robotics, finance)                       |
+| **Ensemble Knowledge Distillation** | **Model Compression**   | Trains a single model to mimic an ensemble       | Reduces compute/memory demands                   | Loses some ensemble benefits (diversity, uncertainty)         | Relies on a high-quality teacher ensemble | Enables deployment of ensemble-like performance in compact models (edge devices) |
+| **Noisy Student Training** | **Semi-Supervised Ensembles** | Iterative self-training with teacher-student loops | Uses unlabeled data effectively; improves robustness| Needs large unlabeled data and computational resources         | Vulnerable to error propagation         | State-of-the-art in semi-supervised settings (e.g., NLP)                         |
+| **Evolutionary Ensembles** | **Dynamic Ensembles**    | Uses genetic algorithms to evolve model populations | Adaptive diversity generation                      | High time/cost for evolution; niche use cases                 | Hard to interpret                     | Useful for non-stationary environments/on datasets with drift                  |
+| **Consensus Networks**   | **NLP/Serverless Ensembles** | Distributes models across clients/aggregates votes | Decentralized privacy-preserving predictions     | Communication overhead; non-i.i.d. data conflicts       | Requires synchronized coordination    | Fed into federated learning systems (e.g., healthcare, finance)                 |
+| **Hybrid Systems**       | **Cross-Architecture Ensembles** | Combines models (e.g., CNNs, GNNs, transformers) | Captures multi-modal or heterogeneous patterns     | Integration complexity; delayed inference               | Model conflicts                       | Dominates in tasks requiring domain-specific reasoning (e.g., drug discovery)  |
+| **Self-Supervised Ensembles** | **Vision/NLP**          | Uses contrastive learning with multiple models (e.g., MoCo, SimCLR) | Data-efficient; strong performance on downstream tasks | Training is resource-heavy; requires pre-training at scale | Low interpretability                  | Foundations for modern vision/NLP architectures (e.g., resists data scarcity)   |
+---"""
+IMPLEMENTATION = """
+### 1. **Shift away from the belief that more data leads to better results. Rather, focus on insight-driven and "quality over quantity" datasets in training.**
+* **Move Away from Terabyte-Scale Datasets**: Focus on **quality over quantity** by curating a smaller, highly diverse, and **labeled dataset** emphasizing edge cases and the latest AI generations.
+* **Active Learning**: Implement active learning techniques to iteratively select the most informative samples for human labeling, reducing dataset size while maintaining effectiveness.
+### 2. **Efficient Model Architectures**
+* **Adopt Lightweight, State-of-the-Art Models**: Explore models designed for efficiency like MobileNet, EfficientNet, or recent advancements in vision transformers (ViTs) tailored for forensic analysis.
+* **Transfer Learning with Fine-Tuning**: Leverage pre-trained models fine-tuned on your curated dataset to leverage general knowledge while adapting to specific AI image detection tasks.
+### 3. **Multi-Modal and Hybrid Approaches**
+* **Combine Image Forensics with Metadata Analysis**: Integrate insights from image processing with metadata (e.g., EXIF, XMP) for a more robust detection framework.
+* **Incorporate Knowledge Graphs for AI Model Identification**: If feasible, build or utilize knowledge graphs mapping known AI models to their generation signatures for targeted detection.
+### 4. **Continuous Learning and Update Mechanism**
+* **Online Learning or Incremental Training**: Implement a system that can incrementally update the model with new, strategically selected samples, adapting to new AI generation techniques.
+* **Community-Driven Updates**: Establish a feedback loop with users/community to report undetected AI images, fueling model updates.
+### 5. **Evaluation and Validation**
+* **Robust Validation Protocols**: Regularly test against unseen, diverse datasets including novel AI generations not present during training.
+* **Benchmark Against State-of-the-Art**: Periodically compare performance with newly published detection models or techniques.
+"""

utils.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import numpy as np
+import io
+from PIL import Image, ImageFilter, ImageChops
+from torchvision import transforms
+def softmax(vector):
+    e = np.exp(vector - np.max(vector))  # for numerical stability
+    return e / e.sum()
+def augment_image(img_pil, methods, rotate_degrees=0, noise_level=0, sharpen_strength=1):
+    for method in methods:
+        if method == "rotate":
+            img_pil = img_pil.rotate(rotate_degrees)
+        elif method == "add_noise":
+            noise = np.random.normal(0, noise_level, img_pil.size[::-1] + (3,)).astype(np.uint8)
+            img_pil = Image.fromarray(np.clip(np.array(img_pil) + noise, 0, 255).astype(np.uint8))
+        elif method == "sharpen":
+            img_pil = img_pil.filter(ImageFilter.UnsharpMask(radius=2, percent=sharpen_strength, threshold=3))
+    return img_pil, img_pil
+def convert_pil_to_bytes(image, format='JPEG'):
+    img_byte_arr = io.BytesIO()
+    image.save(img_byte_arr, format=format)
+    img_byte_arr = img_byte_arr.getvalue()
+    return img_byte_arr

wavelet.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import numpy as np
+import pywt
+import cv2
+from PIL import Image
+def wavelet_blocking_noise_estimation(image: Image.Image, blocksize: int = 8) -> Image.Image:
+    """Estimate local noise using wavelet blocking. Returns a PIL image of the noise map."""
+    im = np.array(image.convert('L'))
+    y = np.double(im)
+    cA1, (cH, cV, cD) = pywt.dwt2(y, 'db8')
+    cD = cD[:cD.shape[0] // blocksize * blocksize, :cD.shape[1] // blocksize * blocksize]
+    block = np.zeros((cD.shape[0] // blocksize, cD.shape[1] // blocksize, blocksize ** 2))
+    for ii in range(0, cD.shape[0] - blocksize + 1, blocksize):
+        for jj in range(0, cD.shape[1] - blocksize + 1, blocksize):
+            block_elements = cD[ii:ii+blocksize, jj:jj+blocksize]
+            block[ii // blocksize, jj // blocksize, :] = block_elements.flatten()
+    noise_map = np.median(np.abs(block), axis=2) / 0.6745
+    noise_map_8u = cv2.normalize(noise_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
+    resized_noise_map = cv2.resize(noise_map_8u, (im.shape[1], im.shape[0]), interpolation=cv2.INTER_NEAREST)
+    return Image.fromarray(resized_noise_map)