Spaces:

aznasut
/

ViT_Deepfake_Detection

Running

App Files Files Community

aznasut commited on Oct 22, 2024

Commit

507cd9a

1 Parent(s): b2dfed5

Add application file

Browse files

Files changed (5) hide show

.gitignore +160 -0
Dockerfile +25 -0
main.py +224 -0
models.py +49 -0
requirements.txt +10 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+# Use an official Python runtime as a parent image
+FROM python:3.12-slim
+# Set the working directory in the container
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    build-essential \
+    pkg-config \
+    libhdf5-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Copy the current directory contents into the container at /app
+COPY . /app
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Make port 8000 available to the world outside this container
+EXPOSE 8000
+# Command to run the Uvicorn server
+CMD ["uvicorn", "main:app", "--host", "localhost", "--port", "8000"]

main.py ADDED Viewed

	@@ -0,0 +1,224 @@

+"""Module providing an API for NSFW image detection."""
+import io
+import hashlib
+import logging
+import aiohttp
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import JSONResponse
+from transformers import pipeline
+from transformers.pipelines import PipelineException
+from transformers import AutoImageProcessor, ViTForImageClassification
+from PIL import Image
+from cachetools import Cache
+import tensorflow as tf
+import torch
+import torch.nn.functional as F
+from models import (
+    FileImageDetectionResponse,
+    UrlImageDetectionResponse,
+    ImageUrlsRequest,
+)
+app = FastAPI()
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+# Initialize Cache with no TTL
+cache = Cache(maxsize=1000)
+# Load the model using the transformers pipeline
+# model = pipeline("image-classification", model="Wvolf/ViT_Deepfake_Detection")
+# Detect the device used by TensorFlow
+DEVICE = "GPU" if tf.config.list_physical_devices("GPU") else "CPU"
+logging.info("TensorFlow version: %s", tf.__version__)
+logging.info("Model is using: %s", DEVICE)
+if DEVICE == "GPU":
+    logging.info("GPUs available: %d", len(tf.config.list_physical_devices("GPU")))
+async def download_image(image_url: str) -> bytes:
+    """Download an image from a URL."""
+    async with aiohttp.ClientSession() as session:
+        async with session.get(image_url) as response:
+            if response.status != 200:
+                raise HTTPException(
+                    status_code=response.status, detail="Image could not be retrieved."
+                )
+            return await response.read()
+def hash_data(data):
+    """Function for hashing image data."""
+    return hashlib.sha256(data).hexdigest()
+@app.post("/v1/detect", response_model=FileImageDetectionResponse)
+async def classify_image(file: UploadFile = File(None)):
+    """Function analyzing image."""
+    if file is None:
+        raise HTTPException(
+            status_code=400,
+            detail="An image file must be provided.",
+        )
+    try:
+        logging.info("Processing %s", file.filename)
+        # Read the image file
+        image_data = await file.read()
+        image_hash = hash_data(image_data)
+        if image_hash in cache:
+            # Return cached entry
+            logging.info("Returning cached entry for %s", file.filename)
+            cached_response = cache[image_hash]
+            response_data = {**cached_response, "file_name": file.filename}
+            return FileImageDetectionResponse(**response_data)
+        image = Image.open(io.BytesIO(image_data))
+        # Use the model to classify the image
+        # results = model(image)
+        image_processor = AutoImageProcessor.from_pretrained("dima806/ai_vs_real_image_detection")
+        model = ViTForImageClassification.from_pretrained("dima806/ai_vs_real_image_detection")
+        inputs = image_processor(image, return_tensors="pt")
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        # model predicts one of the 1000 ImageNet classes
+        predicted_label = logits.argmax(-1).item()
+        logging.info("model.config.id2label[predicted_label] %s", model.config.id2label[predicted_label])
+        # print(model.config.id2label[predicted_label])
+        # Find the prediction with the highest confidence using the max() function
+        # best_prediction = max(results, key=lambda x: x["score"])
+        # logging.info("best_prediction %s", best_prediction)
+        # best_prediction2 = results[1]["label"]
+        # logging.info("best_prediction2 %s", best_prediction2)
+        # # Calculate the confidence score, rounded to the nearest tenth and as a percentage
+        # confidence_percentage = round(best_prediction["score"] * 100, 1)
+        # # Prepare the custom response data
+        response_data = {
+            "prediction": model.config.id2label[predicted_label],
+            "confidence_percentage":model.config.id2label[predicted_label],
+        }
+        # Populate hash
+        cache[image_hash] = response_data.copy()
+        # Add file_name to the API response
+        response_data["file_name"] = file.filename
+        return FileImageDetectionResponse(**response_data)
+    except PipelineException as e:
+        logging.error("Error processing image: %s", str(e))
+        raise HTTPException(
+            status_code=500, detail=f"Error processing image: {str(e)}"
+        ) from e
+@app.post("/v1/detect/urls", response_model=list[UrlImageDetectionResponse])
+async def classify_images(request: ImageUrlsRequest):
+    """Function analyzing images from URLs."""
+    response_data = []
+    for image_url in request.urls:
+        try:
+            logging.info("Downloading image from URL: %s", image_url)
+            image_data = await download_image(image_url)
+            image_hash = hash_data(image_data)
+            if image_hash in cache:
+                # Return cached entry
+                logging.info("Returning cached entry for %s", image_url)
+                cached_response = cache[image_hash]
+                response = {**cached_response, "url": image_url}
+                response_data.append(response)
+                continue
+            image = Image.open(io.BytesIO(image_data))
+            image_processor = AutoImageProcessor.from_pretrained("Wvolf/ViT_Deepfake_Detection")
+            model = ViTForImageClassification.from_pretrained("Wvolf/ViT_Deepfake_Detection")
+            inputs = image_processor(image, return_tensors="pt")
+            with torch.no_grad():
+                logits = model(**inputs).logits
+                probs = F.softmax(logits, dim=-1)
+                predicted_label_id = probs.argmax(-1).item()
+                predicted_label = model.config.id2label[predicted_label_id]
+                confidence = probs.max().item()
+        # model predicts one of the 1000 ImageNet classes
+        #     predicted_label = logits.argmax(-1).item()
+        #     logging.info("predicted_label", predicted_label)
+        #     logging.info("model.config.id2label[predicted_label] %s", model.config.id2label[predicted_label])
+        # # print(model.config.id2label[predicted_label])
+        # Find the prediction with the highest confidence using the max() function
+        # best_prediction = max(results, key=lambda x: x["score"])
+        # logging.info("best_prediction %s", best_prediction)
+        # best_prediction2 = results[1]["label"]
+        # logging.info("best_prediction2 %s", best_prediction2)
+        # # Calculate the confidence score, rounded to the nearest tenth and as a percentage
+        # confidence_percentage = round(best_prediction["score"] * 100, 1)
+        # # Prepare the custom response data
+            detection_result = {
+                "prediction": predicted_label,
+                "confidence_percentage":confidence,
+            }
+            # Use the model to classify the image
+            # results = model(image)
+            # Find the prediction with the highest confidence using the max() function
+            # best_prediction = max(results, key=lambda x: x["score"])
+            # Calculate the confidence score, rounded to the nearest tenth and as a percentage
+            # confidence_percentage = round(best_prediction["score"] * 100, 1)
+            # Prepare the custom response data
+            # detection_result = {
+            #     "is_nsfw": best_prediction["label"] == "nsfw",
+            #     "confidence_percentage": confidence_percentage,
+            # }
+            # Populate hash
+            cache[image_hash] = detection_result.copy()
+            # Add url to the API response
+            detection_result["url"] = image_url
+            response_data.append(detection_result)
+        except PipelineException as e:
+            logging.error("Error processing image from %s: %s", image_url, str(e))
+            raise HTTPException(
+                status_code=500,
+                detail=f"Error processing image from {image_url}: {str(e)}",
+            ) from e
+    return JSONResponse(status_code=200, content=response_data)
+@app.get("/hello")
+async def hello_world():
+   return {"message": "hello_world"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="127.0.0.1", port=8000)

models.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""Module providing base models."""
+from pydantic import BaseModel
+class ImageUrlsRequest(BaseModel):
+    """
+    Model representing the request body for the /v1/detect/urls endpoint.
+    Attributes:
+        urls (list[str]): List of image URLs to be processed.
+    """
+    urls: list[str]
+class ImageDetectionResponse(BaseModel):
+    """
+    Base model representing the response body for image detection.
+    Attributes:
+        is_nsfw (bool): Whether the image is classified as NSFW.
+        confidence_percentage (float): Confidence level of the NSFW classification.
+    """
+    is_nsfw: bool
+    confidence_percentage: float
+class FileImageDetectionResponse(ImageDetectionResponse):
+    """
+    Model extending ImageDetectionResponse with a file attribute.
+    Attributes:
+        file (str): The name of the file that was processed.
+    """
+    file_name: str
+class UrlImageDetectionResponse(ImageDetectionResponse):
+    """
+    Model extending ImageDetectionResponse with a URL attribute.
+    Attributes:
+        url (str): The URL of the image that was processed.
+    """
+    url: str

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+fastapi==0.110.2
+uvicorn[standard]==0.29.0
+transformers==4.40.0
+aiohttp==3.9.5
+pillow==10.3.0
+python-multipart==0.0.9
+tensorflow==2.16.1
+tf-keras==2.16.0
+cachetools===5.3.3
+pydantic===2.7.2