Upload 10 files
Browse files- __init__.py +0 -0
- bitplane.py +27 -0
- ela.py +64 -0
- exif.py +11 -0
- gradient.py +50 -0
- hf_logger.py +98 -0
- minmax.py +82 -0
- text_content.py +57 -0
- utils.py +27 -0
- wavelet.py +20 -0
__init__.py
ADDED
File without changes
|
bitplane.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2 as cv
|
2 |
+
import numpy as np
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
def bit_plane_extractor(
|
6 |
+
image: Image.Image,
|
7 |
+
channel: str = "Luminance",
|
8 |
+
bit: int = 0,
|
9 |
+
filter_type: str = "Disabled"
|
10 |
+
) -> Image.Image:
|
11 |
+
"""Extract and visualize a bit plane from a selected channel of the image."""
|
12 |
+
img = np.array(image.convert("RGB"))
|
13 |
+
if channel == "Luminance":
|
14 |
+
img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
|
15 |
+
elif channel == "RGB Norm":
|
16 |
+
b, g, r = cv.split(img.astype(np.float64))
|
17 |
+
img = np.sqrt(np.power(b, 2) + np.power(g, 2) + np.power(r, 2)).astype(np.uint8)
|
18 |
+
else:
|
19 |
+
idx = {"Red": 0, "Green": 1, "Blue": 2}[channel]
|
20 |
+
img = img[:, :, idx]
|
21 |
+
plane = cv.bitwise_and(np.full_like(img, 2 ** bit), img)
|
22 |
+
plane = cv.normalize(plane, None, 0, 255, cv.NORM_MINMAX).astype(np.uint8)
|
23 |
+
if filter_type == "Median":
|
24 |
+
plane = cv.medianBlur(plane, 3)
|
25 |
+
elif filter_type == "Gaussian":
|
26 |
+
plane = cv.GaussianBlur(plane, (3, 3), 0)
|
27 |
+
return Image.fromarray(plane)
|
ela.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2 as cv
|
3 |
+
from time import time
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
def compress_jpg(image, quality):
|
7 |
+
"""Compress image using JPEG compression."""
|
8 |
+
encode_param = [int(cv.IMWRITE_JPEG_QUALITY), quality]
|
9 |
+
_, buffer = cv.imencode('.jpg', image, encode_param)
|
10 |
+
return cv.imdecode(buffer, cv.IMREAD_COLOR)
|
11 |
+
|
12 |
+
def desaturate(image):
|
13 |
+
"""Convert image to grayscale."""
|
14 |
+
return cv.cvtColor(image, cv.COLOR_BGR2GRAY)
|
15 |
+
|
16 |
+
def create_lut(contrast, brightness):
|
17 |
+
"""Create lookup table for contrast and brightness adjustment."""
|
18 |
+
lut = np.arange(256, dtype=np.uint8)
|
19 |
+
lut = cv.LUT(lut, lut)
|
20 |
+
lut = cv.convertScaleAbs(lut, None, contrast/128, brightness)
|
21 |
+
return lut
|
22 |
+
|
23 |
+
def elapsed_time(start):
|
24 |
+
"""Calculate elapsed time since start."""
|
25 |
+
return f"{time() - start:.3f}s"
|
26 |
+
|
27 |
+
def genELA(img, quality=75, scale=50, contrast=20, linear=False, grayscale=False):
|
28 |
+
"""
|
29 |
+
Perform Error Level Analysis on an image.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
img: Input image (numpy array)
|
33 |
+
quality: JPEG compression quality (1-100)
|
34 |
+
scale: Output multiplicative gain (1-100)
|
35 |
+
contrast: Output tonality compression (0-100)
|
36 |
+
linear: Whether to use linear difference
|
37 |
+
grayscale: Whether to output grayscale image
|
38 |
+
|
39 |
+
Returns:
|
40 |
+
Processed ELA image
|
41 |
+
"""
|
42 |
+
# Convert image to float32 and normalize
|
43 |
+
original = img.astype(np.float32) / 255
|
44 |
+
|
45 |
+
# Compress image
|
46 |
+
compressed = compress_jpg(img, quality)
|
47 |
+
compressed = compressed.astype(np.float32) / 255
|
48 |
+
|
49 |
+
# Calculate difference based on mode
|
50 |
+
if not linear:
|
51 |
+
difference = cv.absdiff(original, compressed)
|
52 |
+
ela = cv.convertScaleAbs(cv.sqrt(difference) * 255, None, scale / 20)
|
53 |
+
else:
|
54 |
+
ela = cv.convertScaleAbs(cv.subtract(compressed, img), None, scale)
|
55 |
+
|
56 |
+
# Apply contrast adjustment
|
57 |
+
contrast_value = int(contrast / 100 * 128)
|
58 |
+
ela = cv.LUT(ela, create_lut(contrast_value, contrast_value))
|
59 |
+
|
60 |
+
# Convert to grayscale if requested
|
61 |
+
if grayscale:
|
62 |
+
ela = desaturate(ela)
|
63 |
+
|
64 |
+
return Image.fromarray(ela)
|
exif.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tempfile
|
2 |
+
import exiftool
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
def exif_full_dump(image: Image.Image) -> dict:
|
6 |
+
"""Extract all EXIF metadata from an image using exiftool."""
|
7 |
+
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=True) as tmp:
|
8 |
+
image.save(tmp.name)
|
9 |
+
with exiftool.ExifTool() as et:
|
10 |
+
metadata = et.get_metadata(tmp.name)
|
11 |
+
return metadata
|
gradient.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2 as cv
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
def norm_mat(mat):
|
6 |
+
return cv.normalize(mat, None, 0, 255, cv.NORM_MINMAX).astype(np.uint8)
|
7 |
+
|
8 |
+
def equalize_img(img):
|
9 |
+
ycrcb = cv.cvtColor(img, cv.COLOR_BGR2YCrCb)
|
10 |
+
ycrcb[:, :, 0] = cv.equalizeHist(ycrcb[:, :, 0])
|
11 |
+
return cv.cvtColor(ycrcb, cv.COLOR_YCrCb2BGR)
|
12 |
+
|
13 |
+
def create_lut(intensity, gamma):
|
14 |
+
lut = np.zeros((256, 1, 3), dtype=np.uint8)
|
15 |
+
for i in range(256):
|
16 |
+
lut[i, 0, 0] = min(255, max(0, i + intensity))
|
17 |
+
lut[i, 0, 1] = min(255, max(0, i + intensity))
|
18 |
+
lut[i, 0, 2] = min(255, max(0, i + intensity))
|
19 |
+
return lut
|
20 |
+
|
21 |
+
def gradient_processing(image, intensity=90, blue_mode="Abs", invert=False, equalize=False):
|
22 |
+
image = np.array(image)
|
23 |
+
dx, dy = cv.spatialGradient(cv.cvtColor(image, cv.COLOR_BGR2GRAY))
|
24 |
+
intensity = int(intensity / 100 * 127)
|
25 |
+
if invert:
|
26 |
+
dx = (-dx).astype(np.float32)
|
27 |
+
dy = (-dy).astype(np.float32)
|
28 |
+
else:
|
29 |
+
dx = (+dx).astype(np.float32)
|
30 |
+
dy = (+dy).astype(np.float32)
|
31 |
+
dx_abs = np.abs(dx)
|
32 |
+
dy_abs = np.abs(dy)
|
33 |
+
red = ((dx / np.max(dx_abs) * 127) + 127).astype(np.uint8)
|
34 |
+
green = ((dy / np.max(dy_abs) * 127) + 127).astype(np.uint8)
|
35 |
+
if blue_mode == "None":
|
36 |
+
blue = np.zeros_like(red)
|
37 |
+
elif blue_mode == "Flat":
|
38 |
+
blue = np.full_like(red, 255)
|
39 |
+
elif blue_mode == "Abs":
|
40 |
+
blue = norm_mat(dx_abs + dy_abs)
|
41 |
+
elif blue_mode == "Norm":
|
42 |
+
blue = norm_mat(np.linalg.norm(cv.merge((red, green)), axis=2))
|
43 |
+
else:
|
44 |
+
blue = None
|
45 |
+
gradient = cv.merge([blue, green, red])
|
46 |
+
if equalize:
|
47 |
+
gradient = equalize_img(gradient)
|
48 |
+
elif intensity > 0:
|
49 |
+
gradient = cv.LUT(gradient, create_lut(intensity, intensity))
|
50 |
+
return Image.fromarray(gradient)
|
hf_logger.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import base64
|
3 |
+
import json
|
4 |
+
import io
|
5 |
+
import datetime
|
6 |
+
from PIL import Image
|
7 |
+
import logging
|
8 |
+
from huggingface_hub import HfApi, CommitOperationAdd # Keep HfApi for repo creation, but remove CommitOperationAdd for direct upload
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0"
|
14 |
+
LOCAL_LOG_DIR = "./hf_inference_logs" # Define a local directory to store logs
|
15 |
+
|
16 |
+
# Custom JSON Encoder to handle numpy types
|
17 |
+
class NumpyEncoder(json.JSONEncoder):
|
18 |
+
def default(self, obj):
|
19 |
+
if isinstance(obj, np.float32):
|
20 |
+
return float(obj)
|
21 |
+
return json.JSONEncoder.default(self, obj)
|
22 |
+
|
23 |
+
def _pil_to_base64(image: Image.Image) -> str:
|
24 |
+
"""Converts a PIL Image to a base64 string."""
|
25 |
+
# Explicitly check if the input is a PIL Image
|
26 |
+
if not isinstance(image, Image.Image):
|
27 |
+
raise TypeError(f"Expected a PIL Image, but received type: {type(image)}")
|
28 |
+
|
29 |
+
buffered = io.BytesIO()
|
30 |
+
# Ensure image is in RGB mode before saving as JPEG
|
31 |
+
if image.mode != 'RGB':
|
32 |
+
image = image.convert('RGB')
|
33 |
+
image.save(buffered, format="JPEG", quality=85)
|
34 |
+
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
35 |
+
|
36 |
+
# The initialize_dataset function will change significantly or be removed/simplified
|
37 |
+
# as we are no longer appending to a datasets.Dataset object directly in memory
|
38 |
+
def initialize_dataset_repo():
|
39 |
+
"""Initializes or ensures the Hugging Face dataset repository exists."""
|
40 |
+
api = HfApi(token=os.getenv("HF_TOKEN"))
|
41 |
+
try:
|
42 |
+
api.repo_info(repo_id=HF_DATASET_NAME, repo_type="dataset")
|
43 |
+
logger.info(f"Hugging Face dataset repository already exists: {HF_DATASET_NAME}")
|
44 |
+
except Exception:
|
45 |
+
logger.info(f"Creating new Hugging Face dataset repository: {HF_DATASET_NAME}")
|
46 |
+
api.create_repo(repo_id=HF_DATASET_NAME, repo_type="dataset", private=True)
|
47 |
+
return api # Return the API object for subsequent operations
|
48 |
+
|
49 |
+
def log_inference_data(
|
50 |
+
original_image: Image.Image,
|
51 |
+
inference_params: dict,
|
52 |
+
model_predictions: list[dict],
|
53 |
+
ensemble_output: dict,
|
54 |
+
forensic_images: list[Image.Image],
|
55 |
+
agent_monitoring_data: dict,
|
56 |
+
human_feedback: dict = None
|
57 |
+
):
|
58 |
+
"""Logs a single inference event by uploading a JSON file to the Hugging Face dataset repository."""
|
59 |
+
try:
|
60 |
+
api = initialize_dataset_repo() # Get or create the repository
|
61 |
+
|
62 |
+
original_image_b64 = _pil_to_base64(original_image)
|
63 |
+
|
64 |
+
forensic_images_b64 = []
|
65 |
+
for img_item in forensic_images:
|
66 |
+
if img_item is not None:
|
67 |
+
if not isinstance(img_item, Image.Image):
|
68 |
+
try:
|
69 |
+
img_item = Image.fromarray(img_item)
|
70 |
+
except Exception as e:
|
71 |
+
logger.error(f"Error converting forensic image to PIL for base64 encoding: {e}")
|
72 |
+
continue
|
73 |
+
forensic_images_b64.append(_pil_to_base64(img_item))
|
74 |
+
|
75 |
+
new_entry = {
|
76 |
+
"timestamp": datetime.datetime.now().isoformat(),
|
77 |
+
"image": original_image_b64,
|
78 |
+
"inference_request": inference_params,
|
79 |
+
"model_predictions": model_predictions,
|
80 |
+
"ensemble_output": ensemble_output,
|
81 |
+
"forensic_outputs": forensic_images_b64,
|
82 |
+
"agent_monitoring_data": agent_monitoring_data,
|
83 |
+
"human_feedback": human_feedback if human_feedback is not None else {}
|
84 |
+
}
|
85 |
+
|
86 |
+
# Define a unique path for the new log file within the local directory
|
87 |
+
os.makedirs(LOCAL_LOG_DIR, exist_ok=True) # Ensure the local directory exists
|
88 |
+
timestamp_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
|
89 |
+
log_file_path = os.path.join(LOCAL_LOG_DIR, f"log_{timestamp_str}.json")
|
90 |
+
|
91 |
+
# Serialize the new entry to a JSON file using the custom encoder
|
92 |
+
with open(log_file_path, 'w', encoding='utf-8') as f:
|
93 |
+
json.dump(new_entry, f, cls=NumpyEncoder, indent=2)
|
94 |
+
|
95 |
+
logger.info(f"Inference data logged successfully to local file: {log_file_path}")
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
logger.error(f"Failed to log inference data to local file: {e}")
|
minmax.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2 as cv
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
def norm_mat(mat):
|
6 |
+
return cv.normalize(mat, None, 0, 255, cv.NORM_MINMAX).astype(np.uint8)
|
7 |
+
|
8 |
+
def minmax_dev(patch, mask):
|
9 |
+
c = patch[1, 1]
|
10 |
+
minimum, maximum, _, _ = cv.minMaxLoc(patch, mask)
|
11 |
+
if c < minimum:
|
12 |
+
return -1
|
13 |
+
if c > maximum:
|
14 |
+
return +1
|
15 |
+
return 0
|
16 |
+
|
17 |
+
def blk_filter(img, radius):
|
18 |
+
result = np.zeros_like(img, np.float32)
|
19 |
+
rows, cols = result.shape
|
20 |
+
block = 2 * radius + 1
|
21 |
+
for i in range(radius, rows, block):
|
22 |
+
for j in range(radius, cols, block):
|
23 |
+
result[
|
24 |
+
i - radius : i + radius + 1, j - radius : j + radius + 1
|
25 |
+
] = np.std(
|
26 |
+
img[i - radius : i + radius + 1, j - radius : j + radius + 1]
|
27 |
+
)
|
28 |
+
return cv.normalize(result, None, 0, 127, cv.NORM_MINMAX, cv.CV_8UC1)
|
29 |
+
|
30 |
+
def preprocess(image, channel=4, radius=2):
|
31 |
+
if not isinstance(image, np.ndarray):
|
32 |
+
image = np.array(image) # Ensure image is a NumPy array
|
33 |
+
if channel == 0:
|
34 |
+
img = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
|
35 |
+
elif channel == 4:
|
36 |
+
b, g, r = cv.split(image.astype(np.float64))
|
37 |
+
img = cv.sqrt(cv.pow(b, 2) + cv.pow(g, 2) + cv.pow(r, 2))
|
38 |
+
else:
|
39 |
+
img = image[:, :, 3 - channel]
|
40 |
+
kernel = 3
|
41 |
+
border = kernel // 2
|
42 |
+
shape = (img.shape[0] - kernel + 1, img.shape[1] - kernel + 1, kernel, kernel)
|
43 |
+
strides = 2 * img.strides
|
44 |
+
patches = np.lib.stride_tricks.as_strided(img, shape=shape, strides=strides)
|
45 |
+
patches = patches.reshape((-1, kernel, kernel))
|
46 |
+
mask = np.full((kernel, kernel), 255, dtype=np.uint8)
|
47 |
+
mask[border, border] = 0
|
48 |
+
blocks = [0] * shape[0] * shape[1]
|
49 |
+
for i, patch in enumerate(patches):
|
50 |
+
blocks[i] = minmax_dev(patch, mask)
|
51 |
+
output = np.array(blocks).reshape(shape[:-2])
|
52 |
+
output = cv.copyMakeBorder(
|
53 |
+
output, border, border, border, border, cv.BORDER_CONSTANT
|
54 |
+
)
|
55 |
+
low = output == -1
|
56 |
+
high = output == +1
|
57 |
+
minmax = np.zeros_like(image)
|
58 |
+
if radius > 0:
|
59 |
+
radius += 3
|
60 |
+
low = blk_filter(low, radius)
|
61 |
+
high = blk_filter(high, radius)
|
62 |
+
if channel <= 2:
|
63 |
+
minmax[:, :, 2 - channel] = low
|
64 |
+
minmax[:, :, 2 - channel] += high
|
65 |
+
else:
|
66 |
+
minmax = np.repeat(low[:, :, np.newaxis], 3, axis=2)
|
67 |
+
minmax += np.repeat(high[:, :, np.newaxis], 3, axis=2)
|
68 |
+
minmax = norm_mat(minmax)
|
69 |
+
else:
|
70 |
+
if channel == 0:
|
71 |
+
minmax[low] = [0, 0, 255]
|
72 |
+
minmax[high] = [0, 0, 255]
|
73 |
+
elif channel == 1:
|
74 |
+
minmax[low] = [0, 255, 0]
|
75 |
+
minmax[high] = [0, 255, 0]
|
76 |
+
elif channel == 2:
|
77 |
+
minmax[low] = [255, 0, 0]
|
78 |
+
minmax[high] = [255, 0, 0]
|
79 |
+
elif channel == 3:
|
80 |
+
minmax[low] = [255, 255, 255]
|
81 |
+
minmax[high] = [255, 255, 255]
|
82 |
+
return Image.fromarray(minmax)
|
text_content.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
QUICK_INTRO = """
|
2 |
+
### The Detection Dilemma: The Degentic Games
|
3 |
+
|
4 |
+
The cat-and-mouse game between digital forgery and detection reached a tipping point early last year after years of escalating concern and anxiety. The most ambitious, expensive, and resource-intensive detection model was launched with actually impressive results. Impressive… for an embarassing two to three weeks.
|
5 |
+
|
6 |
+
Then came the knockout punches. New SOTA models emerging every few weeks, in every imaginageable domain -- image, audio, video, music. Generated images are now at a level of realism that to an untrained eye, its unable to discern if its real or fake. [TO-DO: Add Citation to the study]
|
7 |
+
|
8 |
+
And let's be honest: we saw this coming. When has humanity ever resisted accelerating technology that promises... *interesting* applications? As the ancients wisely tweeted: 🔞 drives innovation.
|
9 |
+
|
10 |
+
It's time for a reset. Quit crying and get ready. Didn't you hear? The long awaited Degentic Games is starting soon.
|
11 |
+
|
12 |
+
|
13 |
+
Choose wisely.
|
14 |
+
|
15 |
+
---
|
16 |
+
### **Overview of Multi-Model Consensus Methods in ML**
|
17 |
+
| **Method** | **Category** | **Description** | **Key Advantages** | **Key Limitations** | **Weaknesses** | **Strengths** |
|
18 |
+
|--------------------------|----------------------------|--------------------------------------------------|---------------------------------------------------|--------------------------------------------------------------|----------------------------------------|--------------------------------------------------------------------------------|
|
19 |
+
| **Bagging (e.g., Random Forest)** | **Traditional Ensembles** | Trains multiple models on bootstrapped data subsets, aggregating predictions | Reduces overfitting (~variance reduction) | Computationally costly for large datasets; models can be correlated | Not robust to adversarial attacks | Simple to implement; robust to noisy data; handles high-dimensional data well |
|
20 |
+
| **Boosting (e.g., XGBoost, LightGBM)** | **Traditional Ensembles** | Iteratively corrects errors using weighted models | High accuracy on structured/tabular data | Risk of overfitting; sensitive to noisy data | Computationally intensive | Dominates in competitions (e.g., Kaggle); scalable for medium datasets |
|
21 |
+
| **Stacking** | **Traditional Ensembles** | Combines predictions via a meta-learner | Can outperform individual models; flexible | Increased complexity and data leakage risk | Requires careful hyperparameter tuning | Excels in combining diverse models (e.g., trees + SVMs + linear models) |
|
22 |
+
| **Deep Ensembles** | **Deep Learning Ensembles**| Multiple independently trained neural networks | Uncertainty estimation; robust to data shifts | High computational cost; memory-heavy | Model coordination challenges | State-of-the-art in safety-critical domains (e.g., medical imaging, autonomous vehicles) |
|
23 |
+
| **Snapshot Ensembles** | **Deep Learning Ensembles**| Saves models at different optimization stages | Efficient (only one training run) | Limited diversity (same architecture/init) | Requires careful checkpoint selection | Lightweight for tasks like on-device deployment |
|
24 |
+
| **Monte Carlo Dropout** | **Approximate Ensembles** | Applies dropout at inference to simulate many models | Free ensemble (during testing) | Approximates uncertainty poorly compared to deep ensembles | Limited diversity | Cheap and simple; useful for quick uncertainty estimates |
|
25 |
+
| **Mixture of Experts (MoE)** | **Scalable Ensembles** | Specialized sub-models (experts) with a gating mechanism | Efficient scaling (only activate sub-models) | Training instability; uneven expert utilization | Requires expert/gate orchestration | Dominates large-scale applications like Switch Transformers and Hyper-Cloud systems |
|
26 |
+
| **Bayesian Neural Networks (BNNs)** | **Probabilistic Ensembles** | Models weights as probability distributions | Built-in uncertainty quantification | Intractable to train exactly; approximations needed | Difficult optimization | Essential for risk-averse applications (robotics, finance) |
|
27 |
+
| **Ensemble Knowledge Distillation** | **Model Compression** | Trains a single model to mimic an ensemble | Reduces compute/memory demands | Loses some ensemble benefits (diversity, uncertainty) | Relies on a high-quality teacher ensemble | Enables deployment of ensemble-like performance in compact models (edge devices) |
|
28 |
+
| **Noisy Student Training** | **Semi-Supervised Ensembles** | Iterative self-training with teacher-student loops | Uses unlabeled data effectively; improves robustness| Needs large unlabeled data and computational resources | Vulnerable to error propagation | State-of-the-art in semi-supervised settings (e.g., NLP) |
|
29 |
+
| **Evolutionary Ensembles** | **Dynamic Ensembles** | Uses genetic algorithms to evolve model populations | Adaptive diversity generation | High time/cost for evolution; niche use cases | Hard to interpret | Useful for non-stationary environments/on datasets with drift |
|
30 |
+
| **Consensus Networks** | **NLP/Serverless Ensembles** | Distributes models across clients/aggregates votes | Decentralized privacy-preserving predictions | Communication overhead; non-i.i.d. data conflicts | Requires synchronized coordination | Fed into federated learning systems (e.g., healthcare, finance) |
|
31 |
+
| **Hybrid Systems** | **Cross-Architecture Ensembles** | Combines models (e.g., CNNs, GNNs, transformers) | Captures multi-modal or heterogeneous patterns | Integration complexity; delayed inference | Model conflicts | Dominates in tasks requiring domain-specific reasoning (e.g., drug discovery) |
|
32 |
+
| **Self-Supervised Ensembles** | **Vision/NLP** | Uses contrastive learning with multiple models (e.g., MoCo, SimCLR) | Data-efficient; strong performance on downstream tasks | Training is resource-heavy; requires pre-training at scale | Low interpretability | Foundations for modern vision/NLP architectures (e.g., resists data scarcity) |
|
33 |
+
---"""
|
34 |
+
|
35 |
+
IMPLEMENTATION = """
|
36 |
+
### 1. **Shift away from the belief that more data leads to better results. Rather, focus on insight-driven and "quality over quantity" datasets in training.**
|
37 |
+
* **Move Away from Terabyte-Scale Datasets**: Focus on **quality over quantity** by curating a smaller, highly diverse, and **labeled dataset** emphasizing edge cases and the latest AI generations.
|
38 |
+
* **Active Learning**: Implement active learning techniques to iteratively select the most informative samples for human labeling, reducing dataset size while maintaining effectiveness.
|
39 |
+
|
40 |
+
### 2. **Efficient Model Architectures**
|
41 |
+
* **Adopt Lightweight, State-of-the-Art Models**: Explore models designed for efficiency like MobileNet, EfficientNet, or recent advancements in vision transformers (ViTs) tailored for forensic analysis.
|
42 |
+
* **Transfer Learning with Fine-Tuning**: Leverage pre-trained models fine-tuned on your curated dataset to leverage general knowledge while adapting to specific AI image detection tasks.
|
43 |
+
|
44 |
+
### 3. **Multi-Modal and Hybrid Approaches**
|
45 |
+
* **Combine Image Forensics with Metadata Analysis**: Integrate insights from image processing with metadata (e.g., EXIF, XMP) for a more robust detection framework.
|
46 |
+
* **Incorporate Knowledge Graphs for AI Model Identification**: If feasible, build or utilize knowledge graphs mapping known AI models to their generation signatures for targeted detection.
|
47 |
+
|
48 |
+
### 4. **Continuous Learning and Update Mechanism**
|
49 |
+
* **Online Learning or Incremental Training**: Implement a system that can incrementally update the model with new, strategically selected samples, adapting to new AI generation techniques.
|
50 |
+
* **Community-Driven Updates**: Establish a feedback loop with users/community to report undetected AI images, fueling model updates.
|
51 |
+
|
52 |
+
### 5. **Evaluation and Validation**
|
53 |
+
* **Robust Validation Protocols**: Regularly test against unseen, diverse datasets including novel AI generations not present during training.
|
54 |
+
* **Benchmark Against State-of-the-Art**: Periodically compare performance with newly published detection models or techniques.
|
55 |
+
|
56 |
+
|
57 |
+
"""
|
utils.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import io
|
3 |
+
from PIL import Image, ImageFilter, ImageChops
|
4 |
+
from torchvision import transforms
|
5 |
+
|
6 |
+
def softmax(vector):
|
7 |
+
e = np.exp(vector - np.max(vector)) # for numerical stability
|
8 |
+
return e / e.sum()
|
9 |
+
|
10 |
+
def augment_image(img_pil, methods, rotate_degrees=0, noise_level=0, sharpen_strength=1):
|
11 |
+
for method in methods:
|
12 |
+
if method == "rotate":
|
13 |
+
img_pil = img_pil.rotate(rotate_degrees)
|
14 |
+
elif method == "add_noise":
|
15 |
+
noise = np.random.normal(0, noise_level, img_pil.size[::-1] + (3,)).astype(np.uint8)
|
16 |
+
img_pil = Image.fromarray(np.clip(np.array(img_pil) + noise, 0, 255).astype(np.uint8))
|
17 |
+
elif method == "sharpen":
|
18 |
+
img_pil = img_pil.filter(ImageFilter.UnsharpMask(radius=2, percent=sharpen_strength, threshold=3))
|
19 |
+
return img_pil, img_pil
|
20 |
+
|
21 |
+
def convert_pil_to_bytes(image, format='JPEG'):
|
22 |
+
img_byte_arr = io.BytesIO()
|
23 |
+
image.save(img_byte_arr, format=format)
|
24 |
+
img_byte_arr = img_byte_arr.getvalue()
|
25 |
+
return img_byte_arr
|
26 |
+
|
27 |
+
|
wavelet.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pywt
|
3 |
+
import cv2
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
def wavelet_blocking_noise_estimation(image: Image.Image, blocksize: int = 8) -> Image.Image:
|
7 |
+
"""Estimate local noise using wavelet blocking. Returns a PIL image of the noise map."""
|
8 |
+
im = np.array(image.convert('L'))
|
9 |
+
y = np.double(im)
|
10 |
+
cA1, (cH, cV, cD) = pywt.dwt2(y, 'db8')
|
11 |
+
cD = cD[:cD.shape[0] // blocksize * blocksize, :cD.shape[1] // blocksize * blocksize]
|
12 |
+
block = np.zeros((cD.shape[0] // blocksize, cD.shape[1] // blocksize, blocksize ** 2))
|
13 |
+
for ii in range(0, cD.shape[0] - blocksize + 1, blocksize):
|
14 |
+
for jj in range(0, cD.shape[1] - blocksize + 1, blocksize):
|
15 |
+
block_elements = cD[ii:ii+blocksize, jj:jj+blocksize]
|
16 |
+
block[ii // blocksize, jj // blocksize, :] = block_elements.flatten()
|
17 |
+
noise_map = np.median(np.abs(block), axis=2) / 0.6745
|
18 |
+
noise_map_8u = cv2.normalize(noise_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
|
19 |
+
resized_noise_map = cv2.resize(noise_map_8u, (im.shape[1], im.shape[0]), interpolation=cv2.INTER_NEAREST)
|
20 |
+
return Image.fromarray(resized_noise_map)
|