Ayan515 commited on
Commit
eb56a55
·
verified ·
1 Parent(s): 8ef8972

Upload 10 files

Browse files
Files changed (10) hide show
  1. __init__.py +0 -0
  2. bitplane.py +27 -0
  3. ela.py +64 -0
  4. exif.py +11 -0
  5. gradient.py +50 -0
  6. hf_logger.py +98 -0
  7. minmax.py +82 -0
  8. text_content.py +57 -0
  9. utils.py +27 -0
  10. wavelet.py +20 -0
__init__.py ADDED
File without changes
bitplane.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2 as cv
2
+ import numpy as np
3
+ from PIL import Image
4
+
5
+ def bit_plane_extractor(
6
+ image: Image.Image,
7
+ channel: str = "Luminance",
8
+ bit: int = 0,
9
+ filter_type: str = "Disabled"
10
+ ) -> Image.Image:
11
+ """Extract and visualize a bit plane from a selected channel of the image."""
12
+ img = np.array(image.convert("RGB"))
13
+ if channel == "Luminance":
14
+ img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
15
+ elif channel == "RGB Norm":
16
+ b, g, r = cv.split(img.astype(np.float64))
17
+ img = np.sqrt(np.power(b, 2) + np.power(g, 2) + np.power(r, 2)).astype(np.uint8)
18
+ else:
19
+ idx = {"Red": 0, "Green": 1, "Blue": 2}[channel]
20
+ img = img[:, :, idx]
21
+ plane = cv.bitwise_and(np.full_like(img, 2 ** bit), img)
22
+ plane = cv.normalize(plane, None, 0, 255, cv.NORM_MINMAX).astype(np.uint8)
23
+ if filter_type == "Median":
24
+ plane = cv.medianBlur(plane, 3)
25
+ elif filter_type == "Gaussian":
26
+ plane = cv.GaussianBlur(plane, (3, 3), 0)
27
+ return Image.fromarray(plane)
ela.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2 as cv
3
+ from time import time
4
+ from PIL import Image
5
+
6
+ def compress_jpg(image, quality):
7
+ """Compress image using JPEG compression."""
8
+ encode_param = [int(cv.IMWRITE_JPEG_QUALITY), quality]
9
+ _, buffer = cv.imencode('.jpg', image, encode_param)
10
+ return cv.imdecode(buffer, cv.IMREAD_COLOR)
11
+
12
+ def desaturate(image):
13
+ """Convert image to grayscale."""
14
+ return cv.cvtColor(image, cv.COLOR_BGR2GRAY)
15
+
16
+ def create_lut(contrast, brightness):
17
+ """Create lookup table for contrast and brightness adjustment."""
18
+ lut = np.arange(256, dtype=np.uint8)
19
+ lut = cv.LUT(lut, lut)
20
+ lut = cv.convertScaleAbs(lut, None, contrast/128, brightness)
21
+ return lut
22
+
23
+ def elapsed_time(start):
24
+ """Calculate elapsed time since start."""
25
+ return f"{time() - start:.3f}s"
26
+
27
+ def genELA(img, quality=75, scale=50, contrast=20, linear=False, grayscale=False):
28
+ """
29
+ Perform Error Level Analysis on an image.
30
+
31
+ Args:
32
+ img: Input image (numpy array)
33
+ quality: JPEG compression quality (1-100)
34
+ scale: Output multiplicative gain (1-100)
35
+ contrast: Output tonality compression (0-100)
36
+ linear: Whether to use linear difference
37
+ grayscale: Whether to output grayscale image
38
+
39
+ Returns:
40
+ Processed ELA image
41
+ """
42
+ # Convert image to float32 and normalize
43
+ original = img.astype(np.float32) / 255
44
+
45
+ # Compress image
46
+ compressed = compress_jpg(img, quality)
47
+ compressed = compressed.astype(np.float32) / 255
48
+
49
+ # Calculate difference based on mode
50
+ if not linear:
51
+ difference = cv.absdiff(original, compressed)
52
+ ela = cv.convertScaleAbs(cv.sqrt(difference) * 255, None, scale / 20)
53
+ else:
54
+ ela = cv.convertScaleAbs(cv.subtract(compressed, img), None, scale)
55
+
56
+ # Apply contrast adjustment
57
+ contrast_value = int(contrast / 100 * 128)
58
+ ela = cv.LUT(ela, create_lut(contrast_value, contrast_value))
59
+
60
+ # Convert to grayscale if requested
61
+ if grayscale:
62
+ ela = desaturate(ela)
63
+
64
+ return Image.fromarray(ela)
exif.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import exiftool
3
+ from PIL import Image
4
+
5
+ def exif_full_dump(image: Image.Image) -> dict:
6
+ """Extract all EXIF metadata from an image using exiftool."""
7
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=True) as tmp:
8
+ image.save(tmp.name)
9
+ with exiftool.ExifTool() as et:
10
+ metadata = et.get_metadata(tmp.name)
11
+ return metadata
gradient.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2 as cv
3
+ from PIL import Image
4
+
5
+ def norm_mat(mat):
6
+ return cv.normalize(mat, None, 0, 255, cv.NORM_MINMAX).astype(np.uint8)
7
+
8
+ def equalize_img(img):
9
+ ycrcb = cv.cvtColor(img, cv.COLOR_BGR2YCrCb)
10
+ ycrcb[:, :, 0] = cv.equalizeHist(ycrcb[:, :, 0])
11
+ return cv.cvtColor(ycrcb, cv.COLOR_YCrCb2BGR)
12
+
13
+ def create_lut(intensity, gamma):
14
+ lut = np.zeros((256, 1, 3), dtype=np.uint8)
15
+ for i in range(256):
16
+ lut[i, 0, 0] = min(255, max(0, i + intensity))
17
+ lut[i, 0, 1] = min(255, max(0, i + intensity))
18
+ lut[i, 0, 2] = min(255, max(0, i + intensity))
19
+ return lut
20
+
21
+ def gradient_processing(image, intensity=90, blue_mode="Abs", invert=False, equalize=False):
22
+ image = np.array(image)
23
+ dx, dy = cv.spatialGradient(cv.cvtColor(image, cv.COLOR_BGR2GRAY))
24
+ intensity = int(intensity / 100 * 127)
25
+ if invert:
26
+ dx = (-dx).astype(np.float32)
27
+ dy = (-dy).astype(np.float32)
28
+ else:
29
+ dx = (+dx).astype(np.float32)
30
+ dy = (+dy).astype(np.float32)
31
+ dx_abs = np.abs(dx)
32
+ dy_abs = np.abs(dy)
33
+ red = ((dx / np.max(dx_abs) * 127) + 127).astype(np.uint8)
34
+ green = ((dy / np.max(dy_abs) * 127) + 127).astype(np.uint8)
35
+ if blue_mode == "None":
36
+ blue = np.zeros_like(red)
37
+ elif blue_mode == "Flat":
38
+ blue = np.full_like(red, 255)
39
+ elif blue_mode == "Abs":
40
+ blue = norm_mat(dx_abs + dy_abs)
41
+ elif blue_mode == "Norm":
42
+ blue = norm_mat(np.linalg.norm(cv.merge((red, green)), axis=2))
43
+ else:
44
+ blue = None
45
+ gradient = cv.merge([blue, green, red])
46
+ if equalize:
47
+ gradient = equalize_img(gradient)
48
+ elif intensity > 0:
49
+ gradient = cv.LUT(gradient, create_lut(intensity, intensity))
50
+ return Image.fromarray(gradient)
hf_logger.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import json
4
+ import io
5
+ import datetime
6
+ from PIL import Image
7
+ import logging
8
+ from huggingface_hub import HfApi, CommitOperationAdd # Keep HfApi for repo creation, but remove CommitOperationAdd for direct upload
9
+ import numpy as np
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0"
14
+ LOCAL_LOG_DIR = "./hf_inference_logs" # Define a local directory to store logs
15
+
16
+ # Custom JSON Encoder to handle numpy types
17
+ class NumpyEncoder(json.JSONEncoder):
18
+ def default(self, obj):
19
+ if isinstance(obj, np.float32):
20
+ return float(obj)
21
+ return json.JSONEncoder.default(self, obj)
22
+
23
+ def _pil_to_base64(image: Image.Image) -> str:
24
+ """Converts a PIL Image to a base64 string."""
25
+ # Explicitly check if the input is a PIL Image
26
+ if not isinstance(image, Image.Image):
27
+ raise TypeError(f"Expected a PIL Image, but received type: {type(image)}")
28
+
29
+ buffered = io.BytesIO()
30
+ # Ensure image is in RGB mode before saving as JPEG
31
+ if image.mode != 'RGB':
32
+ image = image.convert('RGB')
33
+ image.save(buffered, format="JPEG", quality=85)
34
+ return base64.b64encode(buffered.getvalue()).decode('utf-8')
35
+
36
+ # The initialize_dataset function will change significantly or be removed/simplified
37
+ # as we are no longer appending to a datasets.Dataset object directly in memory
38
+ def initialize_dataset_repo():
39
+ """Initializes or ensures the Hugging Face dataset repository exists."""
40
+ api = HfApi(token=os.getenv("HF_TOKEN"))
41
+ try:
42
+ api.repo_info(repo_id=HF_DATASET_NAME, repo_type="dataset")
43
+ logger.info(f"Hugging Face dataset repository already exists: {HF_DATASET_NAME}")
44
+ except Exception:
45
+ logger.info(f"Creating new Hugging Face dataset repository: {HF_DATASET_NAME}")
46
+ api.create_repo(repo_id=HF_DATASET_NAME, repo_type="dataset", private=True)
47
+ return api # Return the API object for subsequent operations
48
+
49
+ def log_inference_data(
50
+ original_image: Image.Image,
51
+ inference_params: dict,
52
+ model_predictions: list[dict],
53
+ ensemble_output: dict,
54
+ forensic_images: list[Image.Image],
55
+ agent_monitoring_data: dict,
56
+ human_feedback: dict = None
57
+ ):
58
+ """Logs a single inference event by uploading a JSON file to the Hugging Face dataset repository."""
59
+ try:
60
+ api = initialize_dataset_repo() # Get or create the repository
61
+
62
+ original_image_b64 = _pil_to_base64(original_image)
63
+
64
+ forensic_images_b64 = []
65
+ for img_item in forensic_images:
66
+ if img_item is not None:
67
+ if not isinstance(img_item, Image.Image):
68
+ try:
69
+ img_item = Image.fromarray(img_item)
70
+ except Exception as e:
71
+ logger.error(f"Error converting forensic image to PIL for base64 encoding: {e}")
72
+ continue
73
+ forensic_images_b64.append(_pil_to_base64(img_item))
74
+
75
+ new_entry = {
76
+ "timestamp": datetime.datetime.now().isoformat(),
77
+ "image": original_image_b64,
78
+ "inference_request": inference_params,
79
+ "model_predictions": model_predictions,
80
+ "ensemble_output": ensemble_output,
81
+ "forensic_outputs": forensic_images_b64,
82
+ "agent_monitoring_data": agent_monitoring_data,
83
+ "human_feedback": human_feedback if human_feedback is not None else {}
84
+ }
85
+
86
+ # Define a unique path for the new log file within the local directory
87
+ os.makedirs(LOCAL_LOG_DIR, exist_ok=True) # Ensure the local directory exists
88
+ timestamp_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
89
+ log_file_path = os.path.join(LOCAL_LOG_DIR, f"log_{timestamp_str}.json")
90
+
91
+ # Serialize the new entry to a JSON file using the custom encoder
92
+ with open(log_file_path, 'w', encoding='utf-8') as f:
93
+ json.dump(new_entry, f, cls=NumpyEncoder, indent=2)
94
+
95
+ logger.info(f"Inference data logged successfully to local file: {log_file_path}")
96
+
97
+ except Exception as e:
98
+ logger.error(f"Failed to log inference data to local file: {e}")
minmax.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2 as cv
3
+ from PIL import Image
4
+
5
+ def norm_mat(mat):
6
+ return cv.normalize(mat, None, 0, 255, cv.NORM_MINMAX).astype(np.uint8)
7
+
8
+ def minmax_dev(patch, mask):
9
+ c = patch[1, 1]
10
+ minimum, maximum, _, _ = cv.minMaxLoc(patch, mask)
11
+ if c < minimum:
12
+ return -1
13
+ if c > maximum:
14
+ return +1
15
+ return 0
16
+
17
+ def blk_filter(img, radius):
18
+ result = np.zeros_like(img, np.float32)
19
+ rows, cols = result.shape
20
+ block = 2 * radius + 1
21
+ for i in range(radius, rows, block):
22
+ for j in range(radius, cols, block):
23
+ result[
24
+ i - radius : i + radius + 1, j - radius : j + radius + 1
25
+ ] = np.std(
26
+ img[i - radius : i + radius + 1, j - radius : j + radius + 1]
27
+ )
28
+ return cv.normalize(result, None, 0, 127, cv.NORM_MINMAX, cv.CV_8UC1)
29
+
30
+ def preprocess(image, channel=4, radius=2):
31
+ if not isinstance(image, np.ndarray):
32
+ image = np.array(image) # Ensure image is a NumPy array
33
+ if channel == 0:
34
+ img = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
35
+ elif channel == 4:
36
+ b, g, r = cv.split(image.astype(np.float64))
37
+ img = cv.sqrt(cv.pow(b, 2) + cv.pow(g, 2) + cv.pow(r, 2))
38
+ else:
39
+ img = image[:, :, 3 - channel]
40
+ kernel = 3
41
+ border = kernel // 2
42
+ shape = (img.shape[0] - kernel + 1, img.shape[1] - kernel + 1, kernel, kernel)
43
+ strides = 2 * img.strides
44
+ patches = np.lib.stride_tricks.as_strided(img, shape=shape, strides=strides)
45
+ patches = patches.reshape((-1, kernel, kernel))
46
+ mask = np.full((kernel, kernel), 255, dtype=np.uint8)
47
+ mask[border, border] = 0
48
+ blocks = [0] * shape[0] * shape[1]
49
+ for i, patch in enumerate(patches):
50
+ blocks[i] = minmax_dev(patch, mask)
51
+ output = np.array(blocks).reshape(shape[:-2])
52
+ output = cv.copyMakeBorder(
53
+ output, border, border, border, border, cv.BORDER_CONSTANT
54
+ )
55
+ low = output == -1
56
+ high = output == +1
57
+ minmax = np.zeros_like(image)
58
+ if radius > 0:
59
+ radius += 3
60
+ low = blk_filter(low, radius)
61
+ high = blk_filter(high, radius)
62
+ if channel <= 2:
63
+ minmax[:, :, 2 - channel] = low
64
+ minmax[:, :, 2 - channel] += high
65
+ else:
66
+ minmax = np.repeat(low[:, :, np.newaxis], 3, axis=2)
67
+ minmax += np.repeat(high[:, :, np.newaxis], 3, axis=2)
68
+ minmax = norm_mat(minmax)
69
+ else:
70
+ if channel == 0:
71
+ minmax[low] = [0, 0, 255]
72
+ minmax[high] = [0, 0, 255]
73
+ elif channel == 1:
74
+ minmax[low] = [0, 255, 0]
75
+ minmax[high] = [0, 255, 0]
76
+ elif channel == 2:
77
+ minmax[low] = [255, 0, 0]
78
+ minmax[high] = [255, 0, 0]
79
+ elif channel == 3:
80
+ minmax[low] = [255, 255, 255]
81
+ minmax[high] = [255, 255, 255]
82
+ return Image.fromarray(minmax)
text_content.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ QUICK_INTRO = """
2
+ ### The Detection Dilemma: The Degentic Games
3
+
4
+ The cat-and-mouse game between digital forgery and detection reached a tipping point early last year after years of escalating concern and anxiety. The most ambitious, expensive, and resource-intensive detection model was launched with actually impressive results. Impressive… for an embarassing two to three weeks.
5
+
6
+ Then came the knockout punches. New SOTA models emerging every few weeks, in every imaginageable domain -- image, audio, video, music. Generated images are now at a level of realism that to an untrained eye, its unable to discern if its real or fake. [TO-DO: Add Citation to the study]
7
+
8
+ And let's be honest: we saw this coming. When has humanity ever resisted accelerating technology that promises... *interesting* applications? As the ancients wisely tweeted: 🔞 drives innovation.
9
+
10
+ It's time for a reset. Quit crying and get ready. Didn't you hear? The long awaited Degentic Games is starting soon.
11
+
12
+
13
+ Choose wisely.
14
+
15
+ ---
16
+ ### **Overview of Multi-Model Consensus Methods in ML**
17
+ | **Method** | **Category** | **Description** | **Key Advantages** | **Key Limitations** | **Weaknesses** | **Strengths** |
18
+ |--------------------------|----------------------------|--------------------------------------------------|---------------------------------------------------|--------------------------------------------------------------|----------------------------------------|--------------------------------------------------------------------------------|
19
+ | **Bagging (e.g., Random Forest)** | **Traditional Ensembles** | Trains multiple models on bootstrapped data subsets, aggregating predictions | Reduces overfitting (~variance reduction) | Computationally costly for large datasets; models can be correlated | Not robust to adversarial attacks | Simple to implement; robust to noisy data; handles high-dimensional data well |
20
+ | **Boosting (e.g., XGBoost, LightGBM)** | **Traditional Ensembles** | Iteratively corrects errors using weighted models | High accuracy on structured/tabular data | Risk of overfitting; sensitive to noisy data | Computationally intensive | Dominates in competitions (e.g., Kaggle); scalable for medium datasets |
21
+ | **Stacking** | **Traditional Ensembles** | Combines predictions via a meta-learner | Can outperform individual models; flexible | Increased complexity and data leakage risk | Requires careful hyperparameter tuning | Excels in combining diverse models (e.g., trees + SVMs + linear models) |
22
+ | **Deep Ensembles** | **Deep Learning Ensembles**| Multiple independently trained neural networks | Uncertainty estimation; robust to data shifts | High computational cost; memory-heavy | Model coordination challenges | State-of-the-art in safety-critical domains (e.g., medical imaging, autonomous vehicles) |
23
+ | **Snapshot Ensembles** | **Deep Learning Ensembles**| Saves models at different optimization stages | Efficient (only one training run) | Limited diversity (same architecture/init) | Requires careful checkpoint selection | Lightweight for tasks like on-device deployment |
24
+ | **Monte Carlo Dropout** | **Approximate Ensembles** | Applies dropout at inference to simulate many models | Free ensemble (during testing) | Approximates uncertainty poorly compared to deep ensembles | Limited diversity | Cheap and simple; useful for quick uncertainty estimates |
25
+ | **Mixture of Experts (MoE)** | **Scalable Ensembles** | Specialized sub-models (experts) with a gating mechanism | Efficient scaling (only activate sub-models) | Training instability; uneven expert utilization | Requires expert/gate orchestration | Dominates large-scale applications like Switch Transformers and Hyper-Cloud systems |
26
+ | **Bayesian Neural Networks (BNNs)** | **Probabilistic Ensembles** | Models weights as probability distributions | Built-in uncertainty quantification | Intractable to train exactly; approximations needed | Difficult optimization | Essential for risk-averse applications (robotics, finance) |
27
+ | **Ensemble Knowledge Distillation** | **Model Compression** | Trains a single model to mimic an ensemble | Reduces compute/memory demands | Loses some ensemble benefits (diversity, uncertainty) | Relies on a high-quality teacher ensemble | Enables deployment of ensemble-like performance in compact models (edge devices) |
28
+ | **Noisy Student Training** | **Semi-Supervised Ensembles** | Iterative self-training with teacher-student loops | Uses unlabeled data effectively; improves robustness| Needs large unlabeled data and computational resources | Vulnerable to error propagation | State-of-the-art in semi-supervised settings (e.g., NLP) |
29
+ | **Evolutionary Ensembles** | **Dynamic Ensembles** | Uses genetic algorithms to evolve model populations | Adaptive diversity generation | High time/cost for evolution; niche use cases | Hard to interpret | Useful for non-stationary environments/on datasets with drift |
30
+ | **Consensus Networks** | **NLP/Serverless Ensembles** | Distributes models across clients/aggregates votes | Decentralized privacy-preserving predictions | Communication overhead; non-i.i.d. data conflicts | Requires synchronized coordination | Fed into federated learning systems (e.g., healthcare, finance) |
31
+ | **Hybrid Systems** | **Cross-Architecture Ensembles** | Combines models (e.g., CNNs, GNNs, transformers) | Captures multi-modal or heterogeneous patterns | Integration complexity; delayed inference | Model conflicts | Dominates in tasks requiring domain-specific reasoning (e.g., drug discovery) |
32
+ | **Self-Supervised Ensembles** | **Vision/NLP** | Uses contrastive learning with multiple models (e.g., MoCo, SimCLR) | Data-efficient; strong performance on downstream tasks | Training is resource-heavy; requires pre-training at scale | Low interpretability | Foundations for modern vision/NLP architectures (e.g., resists data scarcity) |
33
+ ---"""
34
+
35
+ IMPLEMENTATION = """
36
+ ### 1. **Shift away from the belief that more data leads to better results. Rather, focus on insight-driven and "quality over quantity" datasets in training.**
37
+ * **Move Away from Terabyte-Scale Datasets**: Focus on **quality over quantity** by curating a smaller, highly diverse, and **labeled dataset** emphasizing edge cases and the latest AI generations.
38
+ * **Active Learning**: Implement active learning techniques to iteratively select the most informative samples for human labeling, reducing dataset size while maintaining effectiveness.
39
+
40
+ ### 2. **Efficient Model Architectures**
41
+ * **Adopt Lightweight, State-of-the-Art Models**: Explore models designed for efficiency like MobileNet, EfficientNet, or recent advancements in vision transformers (ViTs) tailored for forensic analysis.
42
+ * **Transfer Learning with Fine-Tuning**: Leverage pre-trained models fine-tuned on your curated dataset to leverage general knowledge while adapting to specific AI image detection tasks.
43
+
44
+ ### 3. **Multi-Modal and Hybrid Approaches**
45
+ * **Combine Image Forensics with Metadata Analysis**: Integrate insights from image processing with metadata (e.g., EXIF, XMP) for a more robust detection framework.
46
+ * **Incorporate Knowledge Graphs for AI Model Identification**: If feasible, build or utilize knowledge graphs mapping known AI models to their generation signatures for targeted detection.
47
+
48
+ ### 4. **Continuous Learning and Update Mechanism**
49
+ * **Online Learning or Incremental Training**: Implement a system that can incrementally update the model with new, strategically selected samples, adapting to new AI generation techniques.
50
+ * **Community-Driven Updates**: Establish a feedback loop with users/community to report undetected AI images, fueling model updates.
51
+
52
+ ### 5. **Evaluation and Validation**
53
+ * **Robust Validation Protocols**: Regularly test against unseen, diverse datasets including novel AI generations not present during training.
54
+ * **Benchmark Against State-of-the-Art**: Periodically compare performance with newly published detection models or techniques.
55
+
56
+
57
+ """
utils.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import io
3
+ from PIL import Image, ImageFilter, ImageChops
4
+ from torchvision import transforms
5
+
6
+ def softmax(vector):
7
+ e = np.exp(vector - np.max(vector)) # for numerical stability
8
+ return e / e.sum()
9
+
10
+ def augment_image(img_pil, methods, rotate_degrees=0, noise_level=0, sharpen_strength=1):
11
+ for method in methods:
12
+ if method == "rotate":
13
+ img_pil = img_pil.rotate(rotate_degrees)
14
+ elif method == "add_noise":
15
+ noise = np.random.normal(0, noise_level, img_pil.size[::-1] + (3,)).astype(np.uint8)
16
+ img_pil = Image.fromarray(np.clip(np.array(img_pil) + noise, 0, 255).astype(np.uint8))
17
+ elif method == "sharpen":
18
+ img_pil = img_pil.filter(ImageFilter.UnsharpMask(radius=2, percent=sharpen_strength, threshold=3))
19
+ return img_pil, img_pil
20
+
21
+ def convert_pil_to_bytes(image, format='JPEG'):
22
+ img_byte_arr = io.BytesIO()
23
+ image.save(img_byte_arr, format=format)
24
+ img_byte_arr = img_byte_arr.getvalue()
25
+ return img_byte_arr
26
+
27
+
wavelet.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pywt
3
+ import cv2
4
+ from PIL import Image
5
+
6
+ def wavelet_blocking_noise_estimation(image: Image.Image, blocksize: int = 8) -> Image.Image:
7
+ """Estimate local noise using wavelet blocking. Returns a PIL image of the noise map."""
8
+ im = np.array(image.convert('L'))
9
+ y = np.double(im)
10
+ cA1, (cH, cV, cD) = pywt.dwt2(y, 'db8')
11
+ cD = cD[:cD.shape[0] // blocksize * blocksize, :cD.shape[1] // blocksize * blocksize]
12
+ block = np.zeros((cD.shape[0] // blocksize, cD.shape[1] // blocksize, blocksize ** 2))
13
+ for ii in range(0, cD.shape[0] - blocksize + 1, blocksize):
14
+ for jj in range(0, cD.shape[1] - blocksize + 1, blocksize):
15
+ block_elements = cD[ii:ii+blocksize, jj:jj+blocksize]
16
+ block[ii // blocksize, jj // blocksize, :] = block_elements.flatten()
17
+ noise_map = np.median(np.abs(block), axis=2) / 0.6745
18
+ noise_map_8u = cv2.normalize(noise_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
19
+ resized_noise_map = cv2.resize(noise_map_8u, (im.shape[1], im.shape[0]), interpolation=cv2.INTER_NEAREST)
20
+ return Image.fromarray(resized_noise_map)