Spaces:
Runtime error
Runtime error
import os | |
from typing import Literal | |
import spaces | |
import gradio as gr | |
import modelscope_studio.components.antd as antd | |
import modelscope_studio.components.antdx as antdx | |
import modelscope_studio.components.base as ms | |
from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification | |
from torchvision import transforms | |
import torch | |
from PIL import Image | |
import numpy as np | |
import io | |
import logging | |
from utils.utils import softmax, augment_image, convert_pil_to_bytes | |
from utils.gradient import gradient_processing | |
from utils.minmax import preprocess as minmax_preprocess | |
from utils.ela import genELA as ELA | |
from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Ensure using GPU if available | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
header_style = { | |
"textAlign": 'center', | |
"color": '#fff', | |
"height": 64, | |
"paddingInline": 48, | |
"lineHeight": '64px', | |
"backgroundColor": '#4096ff', | |
} | |
content_style = { | |
"textAlign": 'center', | |
"minHeight": 120, | |
"lineHeight": '120px', | |
"color": '#fff', | |
"backgroundColor": '#0958d9', | |
} | |
sider_style = { | |
"textAlign": 'center', | |
"lineHeight": '120px', | |
"color": '#fff', | |
"backgroundColor": '#1677ff', | |
} | |
footer_style = { | |
"textAlign": 'center', | |
"color": '#fff', | |
"backgroundColor": '#4096ff', | |
} | |
layout_style = { | |
"borderRadius": 8, | |
"overflow": 'hidden', | |
"width": 'calc(100% - 8px)', | |
"maxWidth": 'calc(100% - 8px)', | |
} | |
# Model paths and class names | |
MODEL_PATHS = { | |
"model_1": "haywoodsloan/ai-image-detector-deploy", | |
"model_2": "Heem2/AI-vs-Real-Image-Detection", | |
"model_3": "Organika/sdxl-detector", | |
"model_4": "cmckinle/sdxl-flux-detector_v1.1", | |
"model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model", | |
"model_5b": "prithivMLmods/Deepfake-Detection-Exp-02-22", | |
"model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL", | |
"model_7": "date3k2/vit-real-fake-classification-v4" | |
} | |
CLASS_NAMES = { | |
"model_1": ['artificial', 'real'], | |
"model_2": ['AI Image', 'Real Image'], | |
"model_3": ['AI', 'Real'], | |
"model_4": ['AI', 'Real'], | |
"model_5": ['Realism', 'Deepfake'], | |
"model_5b": ['Real', 'Deepfake'], | |
"model_6": ['ai_gen', 'human'], | |
"model_7": ['Fake', 'Real'], | |
} | |
def preprocess_resize_256(image): | |
if image.mode != 'RGB': | |
image = image.convert('RGB') | |
return transforms.Resize((256, 256))(image) | |
def preprocess_resize_224(image): | |
if image.mode != 'RGB': | |
image = image.convert('RGB') | |
return transforms.Resize((224, 224))(image) | |
def postprocess_pipeline(prediction, class_names): | |
# Assumes HuggingFace pipeline output | |
return {pred['label']: pred['score'] for pred in prediction} | |
def postprocess_logits(outputs, class_names): | |
# Assumes model output with logits | |
logits = outputs.logits.cpu().numpy()[0] | |
probabilities = softmax(logits) | |
return {class_names[i]: probabilities[i] for i in range(len(class_names))} | |
# Expand ModelEntry to include metadata | |
# (Assume ModelEntry is updated in registry.py to accept display_name, contributor, model_path) | |
# If not, we will update registry.py accordingly after this. | |
def register_model_with_metadata(model_id, model, preprocess, postprocess, class_names, display_name, contributor, model_path): | |
entry = ModelEntry(model, preprocess, postprocess, class_names) | |
entry.display_name = display_name | |
entry.contributor = contributor | |
entry.model_path = model_path | |
MODEL_REGISTRY[model_id] = entry | |
# Load and register models (example for two models) | |
image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True) | |
model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]).to(device) | |
clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device) | |
register_model_with_metadata( | |
"model_1", clf_1, preprocess_resize_256, postprocess_pipeline, CLASS_NAMES["model_1"], | |
display_name="SwinV2 Based", contributor="haywoodsloan", model_path=MODEL_PATHS["model_1"] | |
) | |
clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device) | |
register_model_with_metadata( | |
"model_2", clf_2, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_2"], | |
display_name="ViT Based", contributor="Heem2", model_path=MODEL_PATHS["model_2"] | |
) | |
# Register remaining models | |
feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device) | |
model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device) | |
def preprocess_256(image): | |
if image.mode != 'RGB': | |
image = image.convert('RGB') | |
return transforms.Resize((256, 256))(image) | |
def postprocess_logits_model3(outputs, class_names): | |
logits = outputs.logits.cpu().numpy()[0] | |
probabilities = softmax(logits) | |
return {class_names[i]: probabilities[i] for i in range(len(class_names))} | |
def model3_infer(image): | |
inputs = feature_extractor_3(image, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
outputs = model_3(**inputs) | |
return outputs | |
register_model_with_metadata( | |
"model_3", model3_infer, preprocess_256, postprocess_logits_model3, CLASS_NAMES["model_3"], | |
display_name="SDXL Dataset", contributor="Organika", model_path=MODEL_PATHS["model_3"] | |
) | |
feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device) | |
model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device) | |
def model4_infer(image): | |
inputs = feature_extractor_4(image, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
outputs = model_4(**inputs) | |
return outputs | |
def postprocess_logits_model4(outputs, class_names): | |
logits = outputs.logits.cpu().numpy()[0] | |
probabilities = softmax(logits) | |
return {class_names[i]: probabilities[i] for i in range(len(class_names))} | |
register_model_with_metadata( | |
"model_4", model4_infer, preprocess_256, postprocess_logits_model4, CLASS_NAMES["model_4"], | |
display_name="SDXL + FLUX", contributor="cmckinle", model_path=MODEL_PATHS["model_4"] | |
) | |
clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device) | |
register_model_with_metadata( | |
"model_5", clf_5, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_5"], | |
display_name="Vit Based", contributor="prithivMLmods", model_path=MODEL_PATHS["model_5"] | |
) | |
clf_5b = pipeline("image-classification", model=MODEL_PATHS["model_5b"], device=device) | |
register_model_with_metadata( | |
"model_5b", clf_5b, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_5b"], | |
display_name="Vit Based, Newer Dataset", contributor="prithivMLmods", model_path=MODEL_PATHS["model_5b"] | |
) | |
image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True) | |
model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device) | |
clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device) | |
register_model_with_metadata( | |
"model_6", clf_6, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_6"], | |
display_name="Swin, Midj + SDXL", contributor="ideepankarsharma2003", model_path=MODEL_PATHS["model_6"] | |
) | |
image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True) | |
model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device) | |
clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device) | |
register_model_with_metadata( | |
"model_7", clf_7, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_7"], | |
display_name="ViT", contributor="temp", model_path=MODEL_PATHS["model_7"] | |
) | |
# Generic inference function | |
def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict: | |
entry = MODEL_REGISTRY[model_id] | |
img = entry.preprocess(image) | |
try: | |
result = entry.model(img) | |
scores = entry.postprocess(result, entry.class_names) | |
# Flatten output for Dataframe: include metadata and both class scores | |
ai_score = scores.get(entry.class_names[0], 0.0) | |
real_score = scores.get(entry.class_names[1], 0.0) | |
label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN") | |
return { | |
"Model": entry.display_name, | |
"Contributor": entry.contributor, | |
"HF Model Path": entry.model_path, | |
"AI Score": ai_score, | |
"Real Score": real_score, | |
"Label": label | |
} | |
except Exception as e: | |
return { | |
"Model": entry.display_name, | |
"Contributor": entry.contributor, | |
"HF Model Path": entry.model_path, | |
"AI Score": None, | |
"Real Score": None, | |
"Label": f"Error: {str(e)}" | |
} | |
# Update predict_image to use all registered models in order | |
def predict_image(img, confidence_threshold): | |
model_ids = [ | |
"model_1", "model_2", "model_3", "model_4", "model_5", "model_5b", "model_6", "model_7" | |
] | |
results = [infer(img, model_id, confidence_threshold) for model_id in model_ids] | |
return img, results | |
# Update predict_image_with_json to return results as a list of dicts | |
def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength): | |
if augment_methods: | |
img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength) | |
else: | |
img_pil = img | |
img_pil, results = predict_image(img_pil, confidence_threshold) | |
img_np = np.array(img_pil) # Convert PIL Image to NumPy array | |
img_np_og = np.array(img) # Convert PIL Image to NumPy array | |
gradient_image = gradient_processing(img_np) # Added gradient processing | |
minmax_image = minmax_preprocess(img_np) # Added MinMax processing | |
# First pass - standard analysis | |
ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True) | |
# Second pass - enhanced visibility | |
ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True) | |
ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False) | |
forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, minmax_image] | |
return img_pil, forensics_images, results | |
with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as iface: | |
with ms.Application() as app: | |
with antd.ConfigProvider(): | |
antdx.Welcome( | |
icon= | |
"https://cdn-avatars.huggingface.co/v1/production/uploads/639daf827270667011153fbc/WpeSFhuB81DY-1TjNUmV_.png", | |
title="Welcome to Project OpenSight", | |
description= | |
"The OpenSight aims to be an open-source SOTA generated image detection model. This HF Space is not only an introduction but a educational playground for the public to evaluate and challenge current open source models. **Space will be upgraded shortly; inference on all 6 models should take about 1.2~ seconds.** " | |
) | |
with gr.Tab("π Detection Models Eval / Playground"): | |
gr.Markdown("# Open Source Detection Models Found on the Hub\n\n - **Space will be upgraded shortly;** inference on all 6 models should take about 1.2~ seconds once we're back on CUDA.\n - The **Community Forensics** mother of all detection models is now available for inference, head to the middle tab above this.\n - Lots of exciting things coming up, stay tuned!") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
image_input = gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='pil') | |
with gr.Accordion("Settings (Optional)", open=False, elem_id="settings_accordion"): | |
augment_checkboxgroup = gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods") | |
rotate_slider = gr.Slider(0, 45, value=2, step=1, label="Rotate Degrees", visible=False) | |
noise_slider = gr.Slider(0, 50, value=4, step=1, label="Noise Level", visible=False) | |
sharpen_slider = gr.Slider(0, 50, value=11, step=1, label="Sharpen Strength", visible=False) | |
confidence_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Confidence Threshold") | |
inputs = [image_input, confidence_slider, augment_checkboxgroup, rotate_slider, noise_slider, sharpen_slider] | |
predict_button = gr.Button("Predict") | |
augment_button = gr.Button("Augment & Predict") | |
image_output = gr.Image(label="Processed Image", visible=False) | |
with gr.Column(scale=2): | |
# Use Gradio-native Dataframe to display results with headers | |
results_table = gr.Dataframe( | |
label="Model Predictions", | |
headers=["Model", "Contributor", "HF Model Path", "AI Score", "Real Score", "Label"], | |
datatype=["str", "str", "str", "number", "number", "str"] | |
) | |
forensics_gallery = gr.Gallery(label="Post Processed Images", visible=True, columns=[4], rows=[2], container=False, height="auto", object_fit="contain", elem_id="post-gallery") | |
outputs = [image_output, forensics_gallery, results_table] | |
# Show/hide rotate slider based on selected augmentation method | |
augment_checkboxgroup.change(lambda methods: gr.update(visible="rotate" in methods), inputs=[augment_checkboxgroup], outputs=[rotate_slider]) | |
augment_checkboxgroup.change(lambda methods: gr.update(visible="add_noise" in methods), inputs=[augment_checkboxgroup], outputs=[noise_slider]) | |
augment_checkboxgroup.change(lambda methods: gr.update(visible="sharpen" in methods), inputs=[augment_checkboxgroup], outputs=[sharpen_slider]) | |
predict_button.click( | |
fn=predict_image_with_json, | |
inputs=inputs, | |
outputs=outputs | |
) | |
augment_button.click( # Connect Augment button to the function | |
fn=predict_image_with_json, | |
inputs=[ | |
image_input, | |
confidence_slider, | |
gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], value=["rotate", "add_noise", "sharpen"], visible=False), # Default values | |
rotate_slider, | |
noise_slider, | |
sharpen_slider | |
], | |
outputs=outputs | |
) | |
with gr.Tab("π Community Forensics Preview"): | |
temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces") | |
# preview # no idea if this will work | |
with gr.Tab("π₯ Leaderboard"): | |
gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soonβ’") | |
# Launch the interface | |
iface.launch() |