Spaces:

noumanjavaid
/

centurion

Running

App Files Files Community

noumanjavaid commited on Nov 18, 2024

Commit

3052559

verified ·

1 Parent(s): 100ee48

Create app.py

Browse files

Files changed (1) hide show

app.py +681 -0

app.py ADDED Viewed

	@@ -0,0 +1,681 @@

+import streamlit as st
+from PIL import Image, ImageDraw, ImageFont, ExifTags
+import cv2
+import numpy as np
+from skimage.metrics import structural_similarity as ssim
+import pandas as pd
+import fitz  # PyMuPDF
+import docx
+from difflib import HtmlDiff, SequenceMatcher
+import os
+import uuid
+import logging
+import requests
+import zipfile
+from typing import Union, Dict, Any
+import time
+import base64
+import io
+from io import BytesIO
+icon_url = "https://raw.githubusercontent.com/noumanjavaid96/ai-as-an-api/refs/heads/master/image%20(39).png"
+response = requests.get(icon_url)
+icon_image = Image.open(BytesIO(response.content))
+# Page configuration
+st.set_page_config(
+    page_title="Centurion Analysis Tool",
+    page_icon=icon_image,
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS
+st.html(
+    """
+    <style>
+    .title-container {
+        display: flex;
+        align-items: center;
+        margin-bottom: 20px; /* Add margin for spacing */
+    }
+    .title-icon {
+        width: 50px;
+        height: 50px;
+        margin-right: 10px; /* Add margin between icon and title */
+    }
+    .title-text {
+        font-size: 36px; /* Adjust font size as needed */
+        font-weight: bold;
+    }
+    </style>
+    """,
+)
+st.markdown(
+    f"""
+    <div class="title-container">
+        <img class="title-icon" src="{icon_url}" alt="Icon">
+        <div class="title-text">Centurion Analysis Tool</div>
+    </div>
+    """,
+    unsafe_allow_html=True
+)
+st.write("Welcome to the Centurion Analysis Tool! Use the tabs above to navigate.")
+# Constants
+UPLOAD_DIR = "uploaded_files"
+NVIDIA_API_KEY = "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"
+# Create upload directory if it doesn't exist
+if not os.path.exists(UPLOAD_DIR):
+    os.makedirs(UPLOAD_DIR)
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def main():
+    # Title and icon using HTML for better control
+    st.markdown(
+        """
+        <div class="title-container">
+            <img class="title-icon" src="https://raw.githubusercontent.com/noumanjavaid96/ai-as-an-api/refs/heads/master/image%20(39).png">
+            <span class="title-text">CENTURION</span>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+    # Create tabs for different functionalities
+    tabs = st.tabs(["Image Comparison", "Image Comparison with Watermarking", "Document Comparison Tool"])
+    with tabs[0]:
+        image_comparison()
+    with tabs[1]:
+        image_comparison_and_watermarking()
+    with tabs[2]:
+        document_comparison_tool()
+def image_comparison():
+    st.header("Image Comparison")
+    st.write("""
+    Upload two images to compare them and find differences.
+    """)
+    # Upload images
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("Original Image")
+        uploaded_file1 = st.file_uploader("Choose the original image", type=["png", "jpg", "jpeg"], key="comp1")
+    with col2:
+        st.subheader("Image to Compare")
+        uploaded_file2 = st.file_uploader("Choose the image to compare", type=["png", "jpg", "jpeg"], key="comp2")
+    if uploaded_file1 is not None and uploaded_file2 is not None:
+        # Read images
+        image1 = Image.open(uploaded_file1)
+        image2 = Image.open(uploaded_file2)
+        # Convert images to OpenCV format
+        img1 = cv2.cvtColor(np.array(image1), cv2.COLOR_RGB2BGR)
+        img2 = cv2.cvtColor(np.array(image2), cv2.COLOR_RGB2BGR)
+        # Resize images to the same size if necessary
+        if img1.shape != img2.shape:
+            st.warning("Images are not the same size. Resizing the second image to match the first.")
+            img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
+        # Convert to grayscale
+        gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
+        gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
+        # Compute SSIM between two images
+        score, diff = ssim(gray1, gray2, full=True)
+        st.write(f"**Structural Similarity Index (SSIM): {score:.4f}**")
+        diff = (diff * 255).astype("uint8")
+        # Threshold the difference image
+        thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
+        # Find contours of the differences
+        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        # Create copies of the images to draw on
+        img1_diff = img1.copy()
+        img2_diff = img2.copy()
+        # Draw rectangles around differences
+        for cnt in contours:
+            x, y, w, h = cv2.boundingRect(cnt)
+            cv2.rectangle(img1_diff, (x, y), (x + w, y + h), (0, 0, 255), 2)
+            cv2.rectangle(img2_diff, (x, y), (x + w, y + h), (0, 0, 255), 2)
+        # Convert images back to RGB for displaying with Streamlit
+        img1_display = cv2.cvtColor(img1_diff, cv2.COLOR_BGR2RGB)
+        img2_display = cv2.cvtColor(img2_diff, cv2.COLOR_BGR2RGB)
+        diff_display = cv2.cvtColor(diff, cv2.COLOR_GRAY2RGB)
+        thresh_display = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
+        # Display images
+        st.write("## Results")
+        st.write("Differences are highlighted in red boxes.")
+        st.image([img1_display, img2_display], caption=["Original Image with Differences", "Compared Image with Differences"], width=300)
+        st.write("## Difference Image")
+        st.image(diff_display, caption="Difference Image", width=300)
+        st.write("## Thresholded Difference Image")
+        st.image(thresh_display, caption="Thresholded Difference Image", width=300)
+    else:
+        st.info("Please upload both images.")
+def image_comparison_and_watermarking():
+    st.header("Image Comparison and Watermarking")
+    st.write("""
+    Upload two images to compare them, find differences, add a watermark, and compare metadata.
+    """)
+    # Upload images
+    st.subheader("Upload Images")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("Original Image")
+        uploaded_file1 = st.file_uploader("Choose the original image", type=["png", "jpg", "jpeg"], key="wm1")
+    with col2:
+        st.subheader("Image to Compare")
+        uploaded_file2 = st.file_uploader("Choose the image to compare", type=["png", "jpg", "jpeg"], key="wm2")
+    watermark_text = st.text_input("Enter watermark text (optional):", value="")
+    if uploaded_file1 is not None and uploaded_file2 is not None:
+        # Read images
+        image1 = Image.open(uploaded_file1).convert("RGB")
+        image2 = Image.open(uploaded_file2).convert("RGB")
+        # Display original images
+        st.write("### Uploaded Images")
+        st.image([image1, image2], caption=["Original Image", "Image to Compare"], width=300)
+        # Add watermark if text is provided
+        if watermark_text:
+            st.write("### Watermarked Original Image")
+            image1_watermarked = add_watermark(image1, watermark_text)
+            st.image(image1_watermarked, caption="Original Image with Watermark", width=300)
+        else:
+            image1_watermarked = image1.copy()
+        # Convert images to OpenCV format
+        img1 = cv2.cvtColor(np.array(image1_watermarked), cv2.COLOR_RGB2BGR)
+        img2 = cv2.cvtColor(np.array(image2), cv2.COLOR_RGB2BGR)
+        # Resize images to the same size if necessary
+        if img1.shape != img2.shape:
+            st.warning("Images are not the same size. Resizing the second image to match the first.")
+            img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
+        # Convert to grayscale
+        gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
+        gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
+        # Compute SSIM between two images
+        score, diff = ssim(gray1, gray2, full=True)
+        st.write(f"**Structural Similarity Index (SSIM): {score:.4f}**")
+        diff = (diff * 255).astype("uint8")
+        # Threshold the difference image
+        thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
+        # Find contours of the differences
+        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        # Create copies of the images to draw on
+        img1_diff = img1.copy()
+        img2_diff = img2.copy()
+        # Draw rectangles around differences
+        for cnt in contours:
+            x, y, w, h = cv2.boundingRect(cnt)
+            cv2.rectangle(img1_diff, (x, y), (x + w, y + h), (0, 0, 255), 2)
+            cv2.rectangle(img2_diff, (x, y), (x + w, y + h), (0, 0, 255), 2)
+        # Convert images back to RGB for displaying with Streamlit
+        img1_display = cv2.cvtColor(img1_diff, cv2.COLOR_BGR2RGB)
+        img2_display = cv2.cvtColor(img2_diff, cv2.COLOR_BGR2RGB)
+        diff_display = cv2.cvtColor(diff, cv2.COLOR_GRAY2RGB)
+        thresh_display = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
+        # Display images with differences highlighted
+        st.write("## Results")
+        st.write("Differences are highlighted in red boxes.")
+        st.image([img1_display, img2_display], caption=["Original Image with Differences", "Compared Image with Differences"], width=300)
+        st.write("## Difference Image")
+        st.image(diff_display, caption="Difference Image", width=300)
+        st.write("## Thresholded Difference Image")
+        st.image(thresh_display, caption="Thresholded Difference Image", width=300)
+        # Metadata comparison
+        st.write("## Metadata Comparison")
+        metadata1 = get_metadata(image1)
+        metadata2 = get_metadata(image2)
+        if metadata1 and metadata2:
+            metadata_df = compare_metadata(metadata1, metadata2)
+            if metadata_df is not None:
+                st.write("### Metadata Differences")
+                st.dataframe(metadata_df)
+            else:
+                st.write("No differences in metadata.")
+        else:
+            st.write("Metadata not available for one or both images.")
+    else:
+        st.info("Please upload both images.")
+def add_watermark(image, text):
+    # Create a blank image for the text with transparent background
+    txt = Image.new('RGBA', image.size, (255, 255, 255, 0))
+    draw = ImageDraw.Draw(txt)
+    # Choose a font and size
+    font_size = max(20, image.size[0] // 20)
+    try:
+        font = ImageFont.truetype("arial.ttf", font_size)
+    except IOError:
+        font = ImageFont.load_default()
+    # Calculate text bounding box
+    bbox = font.getbbox(text)
+    textwidth = bbox[2] - bbox[0]
+    textheight = bbox[3] - bbox[1]
+    # Position the text at the bottom right
+    x = image.size[0] - textwidth - 10
+    y = image.size[1] - textheight - 10
+    # Draw text with semi-transparent fill
+    draw.text((x, y), text, font=font, fill=(255, 255, 255, 128))
+    # Combine the original image with the text overlay
+    watermarked = Image.alpha_composite(image.convert('RGBA'), txt)
+    return watermarked.convert('RGB')
+def get_metadata(image):
+    exif_data = {}
+    info = image.getexif()
+    if info:
+        for tag, value in info.items():
+            decoded = ExifTags.TAGS.get(tag, tag)
+            exif_data[decoded] = value
+    return exif_data
+def compare_metadata(meta1, meta2):
+    keys = set(meta1.keys()).union(set(meta2.keys()))
+    data = []
+    for key in keys:
+        value1 = meta1.get(key, "Not Available")
+        value2 = meta2.get(key, "Not Available")
+        if value1 != value2:
+            data.append({"Metadata Field": key, "Original Image": value1, "Compared Image": value2})
+    if data:
+        df = pd.DataFrame(data)
+        return df
+    else:
+        return None
+def document_comparison_tool():
+    st.header("📄 Advanced Document Comparison Tool")
+    st.markdown("### Compare documents and detect changes with AI-powered OCR")
+    # Sidebar settings
+    with st.sidebar:
+        st.header("ℹ️ About")
+        st.markdown("""
+        This tool allows you to:
+        - Compare PDF and Word documents
+        - Process images using NVIDIA's OCR
+        - Detect and highlight changes
+        - Generate similarity metrics
+        """)
+        st.header("🛠️ Settings")
+        show_metadata = st.checkbox("Show Metadata", value=True, key='doc_show_metadata')
+        show_detailed_diff = st.checkbox("Show Detailed Differences", value=True, key='doc_show_detailed_diff')
+    # Main content
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("### Original Document")
+        original_file = st.file_uploader(
+            "Upload original document",
+            type=["pdf", "docx", "jpg", "jpeg", "png"],
+            key='doc_original_file',
+            help="Supported formats: PDF, DOCX, JPG, PNG"
+        )
+    with col2:
+        st.markdown("### Modified Document")
+        modified_file = st.file_uploader(
+            "Upload modified document",
+            type=["pdf", "docx", "jpg", "jpeg", "png"],
+            key='doc_modified_file',
+            help="Supported formats: PDF, DOCX, JPG, PNG"
+        )
+    if original_file and modified_file:
+        try:
+            with st.spinner("Processing documents..."):
+                # Initialize OCR handler
+                ocr_handler = NVIDIAOCRHandler()
+                # Process files
+                original_file_path = save_uploaded_file(original_file)
+                modified_file_path = save_uploaded_file(modified_file)
+                # Extract text based on file type
+                original_ext = os.path.splitext(original_file.name)[1].lower()
+                modified_ext = os.path.splitext(modified_file.name)[1].lower()
+                # Process original document
+                if original_ext in ['.jpg', '.jpeg', '.png']:
+                    original_result = ocr_handler.process_image(original_file_path, f"{UPLOAD_DIR}/original_ocr")
+                    with open(f"{UPLOAD_DIR}/original_ocr/text.txt", "r") as f:
+                        original_text = f.read()
+                elif original_ext == '.pdf':
+                    original_text = extract_text_pdf(original_file_path)
+                else:
+                    original_text = extract_text_word(original_file_path)
+                # Process modified document
+                if modified_ext in ['.jpg', '.jpeg', '.png']:
+                    modified_result = ocr_handler.process_image(modified_file_path, f"{UPLOAD_DIR}/modified_ocr")
+                    with open(f"{UPLOAD_DIR}/modified_ocr/text.txt", "r") as f:
+                        modified_text = f.read()
+                elif modified_ext == '.pdf':
+                    modified_text = extract_text_pdf(modified_file_path)
+                else:
+                    modified_text = extract_text_word(modified_file_path)
+                # Calculate similarity
+                similarity_score = calculate_similarity(original_text, modified_text)
+                # Display results
+                st.markdown("### 📊 Analysis Results")
+                metrics_col1, metrics_col2, metrics_col3 = st.columns(3)
+                with metrics_col1:
+                    st.metric("Similarity Score", f"{similarity_score:.2%}")
+                with metrics_col2:
+                    st.metric("Changes Detected", "Yes" if similarity_score < 1 else "No")
+                with metrics_col3:
+                    st.metric("Processing Status", "Complete ✅")
+                if show_detailed_diff:
+                    st.markdown("### 🔍 Detailed Comparison")
+                    diff_html = compare_texts(original_text, modified_text)
+                    st.components.v1.html(diff_html, height=600, scrolling=True)
+                # Download results
+                st.markdown("### 💾 Download Results")
+                if st.button("Generate Report"):
+                    with st.spinner("Generating report..."):
+                        # Simulate report generation
+                        time.sleep(2)
+                        st.success("Report generated successfully!")
+                        st.download_button(
+                            label="Download Report",
+                            data=diff_html,
+                            file_name="comparison_report.html",
+                            mime="text/html"
+                        )
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+            logger.error(f"Error processing documents: {str(e)}")
+    else:
+        st.info("👆 Please upload both documents to begin comparison")
+class NVIDIAOCRHandler:
+    def __init__(self):
+        self.api_key = NVIDIA_API_KEY
+        self.nvai_url = "https://ai.api.nvidia.com/v1/cv/nvidia/ocdrnet"
+        self.assets_url = "https://api.nvcf.nvidia.com/v2/nvcf/assets"
+        self.header_auth = f"Bearer {self.api_key}"
+    def upload_asset(self, input_data: bytes, description: str) -> uuid.UUID:
+        try:
+            with st.spinner("Uploading document to NVIDIA OCR service..."):
+                headers = {
+                    "Authorization": self.header_auth,
+                    "Content-Type": "application/json",
+                    "accept": "application/json",
+                }
+                s3_headers = {
+                    "x-amz-meta-nvcf-asset-description": description,
+                    "content-type": "image/jpeg",
+                }
+                payload = {"contentType": "image/jpeg", "description": description}
+                response = requests.post(self.assets_url, headers=headers, json=payload, timeout=30)
+                response.raise_for_status()
+                upload_data = response.json()
+                response = requests.put(
+                    upload_data["uploadUrl"],
+                    data=input_data,
+                    headers=s3_headers,
+                    timeout=300,
+                )
+                response.raise_for_status()
+                return uuid.UUID(upload_data["assetId"])
+        except Exception as e:
+            st.error(f"Error uploading asset: {str(e)}")
+            raise
+    def process_image(self, image_path: str, output_dir: str) -> Dict[str, Any]:
+        try:
+            with st.spinner("Processing document with OCR..."):
+                with open(image_path, "rb") as f:
+                    asset_id = self.upload_asset(f.read(), "Input Image")
+                inputs = {"image": f"{asset_id}", "render_label": False}
+                asset_list = f"{asset_id}"
+                headers = {
+                    "Content-Type": "application/json",
+                    "NVCF-INPUT-ASSET-REFERENCES": asset_list,
+                    "NVCF-FUNCTION-ASSET-IDS": asset_list,
+                    "Authorization": self.header_auth,
+                }
+                response = requests.post(self.nvai_url, headers=headers, json=inputs)
+                response.raise_for_status()
+                zip_path = f"{output_dir}.zip"
+                with open(zip_path, "wb") as out:
+                    out.write(response.content)
+                with zipfile.ZipFile(zip_path, "r") as z:
+                    z.extractall(output_dir)
+                os.remove(zip_path)
+                return {
+                    "status": "success",
+                    "output_directory": output_dir,
+                    "files": os.listdir(output_dir)
+                }
+        except Exception as e:
+            st.error(f"Error processing image: {str(e)}")
+            raise
+def save_uploaded_file(uploaded_file):
+    file_path = os.path.join(UPLOAD_DIR, uploaded_file.name)
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    return file_path
+def extract_text_pdf(file_path):
+    doc = fitz.open(file_path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+def extract_text_word(file_path):
+    doc = docx.Document(file_path)
+    text = "\n".join([para.text for para in doc.paragraphs])
+    return text
+def compare_texts(text1, text2):
+    differ = HtmlDiff()
+    return differ.make_file(
+        text1.splitlines(),
+        text2.splitlines(),
+        fromdesc="Original",
+        todesc="Modified",
+        context=True,
+        numlines=2
+    )
+def draw_bounding_box(image, vertices, confidence, is_deepfake):
+    img = np.array(image)
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+    # Extract coordinates
+    x1, y1 = int(vertices[0]['x']), int(vertices[0]['y'])
+    x2, y2 = int(vertices[1]['x']), int(vertices[1]['y'])
+    # Calculate confidence percentages
+    deepfake_conf = is_deepfake * 100
+    bbox_conf = confidence * 100
+    # Choose color based on deepfake confidence (red for high confidence)
+    color = (0, 0, 255) if deepfake_conf > 70 else (0, 255, 0)
+    # Draw bounding box
+    cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
+    # Add text with confidence scores
+    label = f"Deepfake ({deepfake_conf:.1f}%), Face ({bbox_conf:.1f}%)"
+    cv2.putText(img, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+    # Convert back to RGB for Streamlit
+    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+def process_image(image_bytes):
+    """Process image through NVIDIA's deepfake detection API"""
+    image_b64 = base64.b64encode(image_bytes).decode()
+    headers = {
+        "Authorization": f"Bearer {NVIDIA_API_KEY}",
+        "Content-Type": "application/json",
+        "Accept": "application/json"
+    }
+    payload = {
+        "input": [f"data:image/png;base64,{image_b64}"]
+    }
+    try:
+        response = requests.post(
+            "https://ai.api.nvidia.com/v1/cv/hive/deepfake-image-detection",
+            headers=headers,
+            json=payload
+        )
+        response.raise_for_status()
+        return response.json()
+    except Exception as e:
+        st.error(f"Error processing image: {str(e)}")
+        return None
+def main():
+    st.title("Deepfake Detection")
+    st.write("Upload an image to detect potential deepfakes")
+    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+    if uploaded_file is not None:
+        # Display original image
+        image_bytes = uploaded_file.getvalue()
+        image = Image.open(io.BytesIO(image_bytes))
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("Original Image")
+            st.image(image, use_container_width=True)
+        # Process image
+        with st.spinner("Analyzing image..."):
+            result = process_image(image_bytes)
+        if result and 'data' in result:
+            data = result['data'][0]
+            # Display results
+            if 'bounding_boxes' in data:
+                for box in data['bounding_boxes']:
+                    # Draw bounding box on image
+                    annotated_image = draw_bounding_box(
+                        image,
+                        box['vertices'],
+                        box['bbox_confidence'],
+                        box['is_deepfake']
+                    )
+                    with col2:
+                        st.subheader("Analysis Result")
+                        st.image(annotated_image, use_container_width=True)
+                    # Display confidence metrics
+                    deepfake_conf = box['is_deepfake'] * 100
+                    bbox_conf = box['bbox_confidence'] * 100
+                    st.write("### Detection Confidence")
+                    col3, col4 = st.columns(2)
+                    with col3:
+                        st.metric("Deepfake Confidence", f"{deepfake_conf:.1f}%")
+                        st.progress(deepfake_conf/100)
+                    with col4:
+                        st.metric("Face Detection Confidence", f"{bbox_conf:.1f}%")
+                        st.progress(bbox_conf/100)
+                    if deepfake_conf > 90:
+                        st.error("⚠️ High probability of deepfake detected!")
+                    elif deepfake_conf > 70:
+                        st.warning("⚠️ Moderate probability of deepfake detected!")
+                    else:
+                        st.success("✅ Low probability of deepfake")
+                    # Display raw JSON data in expander
+                    with st.expander("View Raw JSON Response"):
+                        st.json(result)
+            else:
+                st.warning("No faces detected in the image")
+        else:
+            st.error("Failed to process image")
+def calculate_similarity(text1, text2):
+    matcher = SequenceMatcher(None, text1, text2)
+    return matcher.ratio()
+if __name__ == "__main__":
+    main()