noumanjavaid commited on
Commit
3052559
·
verified ·
1 Parent(s): 100ee48

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +681 -0
app.py ADDED
@@ -0,0 +1,681 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image, ImageDraw, ImageFont, ExifTags
3
+ import cv2
4
+ import numpy as np
5
+ from skimage.metrics import structural_similarity as ssim
6
+ import pandas as pd
7
+ import fitz # PyMuPDF
8
+ import docx
9
+ from difflib import HtmlDiff, SequenceMatcher
10
+ import os
11
+ import uuid
12
+ import logging
13
+ import requests
14
+ import zipfile
15
+ from typing import Union, Dict, Any
16
+ import time
17
+ import base64
18
+ import io
19
+ from io import BytesIO
20
+
21
+ icon_url = "https://raw.githubusercontent.com/noumanjavaid96/ai-as-an-api/refs/heads/master/image%20(39).png"
22
+
23
+ response = requests.get(icon_url)
24
+ icon_image = Image.open(BytesIO(response.content))
25
+
26
+
27
+ # Page configuration
28
+ st.set_page_config(
29
+ page_title="Centurion Analysis Tool",
30
+ page_icon=icon_image,
31
+ layout="wide",
32
+ initial_sidebar_state="expanded"
33
+ )
34
+
35
+ # Custom CSS
36
+ st.html(
37
+ """
38
+ <style>
39
+ .title-container {
40
+ display: flex;
41
+ align-items: center;
42
+ margin-bottom: 20px; /* Add margin for spacing */
43
+ }
44
+ .title-icon {
45
+ width: 50px;
46
+ height: 50px;
47
+ margin-right: 10px; /* Add margin between icon and title */
48
+ }
49
+ .title-text {
50
+ font-size: 36px; /* Adjust font size as needed */
51
+ font-weight: bold;
52
+ }
53
+ </style>
54
+ """,
55
+
56
+ )
57
+ st.markdown(
58
+ f"""
59
+ <div class="title-container">
60
+ <img class="title-icon" src="{icon_url}" alt="Icon">
61
+ <div class="title-text">Centurion Analysis Tool</div>
62
+ </div>
63
+ """,
64
+ unsafe_allow_html=True
65
+ )
66
+
67
+
68
+ st.write("Welcome to the Centurion Analysis Tool! Use the tabs above to navigate.")
69
+
70
+ # Constants
71
+ UPLOAD_DIR = "uploaded_files"
72
+ NVIDIA_API_KEY = "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"
73
+
74
+ # Create upload directory if it doesn't exist
75
+ if not os.path.exists(UPLOAD_DIR):
76
+ os.makedirs(UPLOAD_DIR)
77
+
78
+ # Configure logging
79
+ logging.basicConfig(level=logging.INFO)
80
+ logger = logging.getLogger(__name__)
81
+
82
+ def main():
83
+ # Title and icon using HTML for better control
84
+ st.markdown(
85
+ """
86
+ <div class="title-container">
87
+ <img class="title-icon" src="https://raw.githubusercontent.com/noumanjavaid96/ai-as-an-api/refs/heads/master/image%20(39).png">
88
+ <span class="title-text">CENTURION</span>
89
+ </div>
90
+ """,
91
+ unsafe_allow_html=True,
92
+ )
93
+
94
+ # Create tabs for different functionalities
95
+ tabs = st.tabs(["Image Comparison", "Image Comparison with Watermarking", "Document Comparison Tool"])
96
+
97
+ with tabs[0]:
98
+ image_comparison()
99
+
100
+ with tabs[1]:
101
+ image_comparison_and_watermarking()
102
+
103
+ with tabs[2]:
104
+ document_comparison_tool()
105
+
106
+
107
+ def image_comparison():
108
+ st.header("Image Comparison")
109
+ st.write("""
110
+ Upload two images to compare them and find differences.
111
+ """)
112
+
113
+ # Upload images
114
+ col1, col2 = st.columns(2)
115
+
116
+ with col1:
117
+ st.subheader("Original Image")
118
+ uploaded_file1 = st.file_uploader("Choose the original image", type=["png", "jpg", "jpeg"], key="comp1")
119
+
120
+ with col2:
121
+ st.subheader("Image to Compare")
122
+ uploaded_file2 = st.file_uploader("Choose the image to compare", type=["png", "jpg", "jpeg"], key="comp2")
123
+
124
+ if uploaded_file1 is not None and uploaded_file2 is not None:
125
+ # Read images
126
+ image1 = Image.open(uploaded_file1)
127
+ image2 = Image.open(uploaded_file2)
128
+
129
+ # Convert images to OpenCV format
130
+ img1 = cv2.cvtColor(np.array(image1), cv2.COLOR_RGB2BGR)
131
+ img2 = cv2.cvtColor(np.array(image2), cv2.COLOR_RGB2BGR)
132
+
133
+ # Resize images to the same size if necessary
134
+ if img1.shape != img2.shape:
135
+ st.warning("Images are not the same size. Resizing the second image to match the first.")
136
+ img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
137
+
138
+ # Convert to grayscale
139
+ gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
140
+ gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
141
+
142
+ # Compute SSIM between two images
143
+ score, diff = ssim(gray1, gray2, full=True)
144
+ st.write(f"**Structural Similarity Index (SSIM): {score:.4f}**")
145
+ diff = (diff * 255).astype("uint8")
146
+
147
+ # Threshold the difference image
148
+ thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
149
+
150
+ # Find contours of the differences
151
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
152
+
153
+ # Create copies of the images to draw on
154
+ img1_diff = img1.copy()
155
+ img2_diff = img2.copy()
156
+
157
+ # Draw rectangles around differences
158
+ for cnt in contours:
159
+ x, y, w, h = cv2.boundingRect(cnt)
160
+ cv2.rectangle(img1_diff, (x, y), (x + w, y + h), (0, 0, 255), 2)
161
+ cv2.rectangle(img2_diff, (x, y), (x + w, y + h), (0, 0, 255), 2)
162
+
163
+ # Convert images back to RGB for displaying with Streamlit
164
+ img1_display = cv2.cvtColor(img1_diff, cv2.COLOR_BGR2RGB)
165
+ img2_display = cv2.cvtColor(img2_diff, cv2.COLOR_BGR2RGB)
166
+ diff_display = cv2.cvtColor(diff, cv2.COLOR_GRAY2RGB)
167
+ thresh_display = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
168
+
169
+ # Display images
170
+ st.write("## Results")
171
+ st.write("Differences are highlighted in red boxes.")
172
+
173
+ st.image([img1_display, img2_display], caption=["Original Image with Differences", "Compared Image with Differences"], width=300)
174
+
175
+ st.write("## Difference Image")
176
+ st.image(diff_display, caption="Difference Image", width=300)
177
+
178
+ st.write("## Thresholded Difference Image")
179
+ st.image(thresh_display, caption="Thresholded Difference Image", width=300)
180
+
181
+ else:
182
+ st.info("Please upload both images.")
183
+
184
+ def image_comparison_and_watermarking():
185
+ st.header("Image Comparison and Watermarking")
186
+ st.write("""
187
+ Upload two images to compare them, find differences, add a watermark, and compare metadata.
188
+ """)
189
+
190
+ # Upload images
191
+ st.subheader("Upload Images")
192
+ col1, col2 = st.columns(2)
193
+
194
+ with col1:
195
+ st.subheader("Original Image")
196
+ uploaded_file1 = st.file_uploader("Choose the original image", type=["png", "jpg", "jpeg"], key="wm1")
197
+
198
+ with col2:
199
+ st.subheader("Image to Compare")
200
+ uploaded_file2 = st.file_uploader("Choose the image to compare", type=["png", "jpg", "jpeg"], key="wm2")
201
+
202
+ watermark_text = st.text_input("Enter watermark text (optional):", value="")
203
+
204
+ if uploaded_file1 is not None and uploaded_file2 is not None:
205
+ # Read images
206
+ image1 = Image.open(uploaded_file1).convert("RGB")
207
+ image2 = Image.open(uploaded_file2).convert("RGB")
208
+
209
+ # Display original images
210
+ st.write("### Uploaded Images")
211
+ st.image([image1, image2], caption=["Original Image", "Image to Compare"], width=300)
212
+
213
+ # Add watermark if text is provided
214
+ if watermark_text:
215
+ st.write("### Watermarked Original Image")
216
+ image1_watermarked = add_watermark(image1, watermark_text)
217
+ st.image(image1_watermarked, caption="Original Image with Watermark", width=300)
218
+ else:
219
+ image1_watermarked = image1.copy()
220
+
221
+ # Convert images to OpenCV format
222
+ img1 = cv2.cvtColor(np.array(image1_watermarked), cv2.COLOR_RGB2BGR)
223
+ img2 = cv2.cvtColor(np.array(image2), cv2.COLOR_RGB2BGR)
224
+
225
+ # Resize images to the same size if necessary
226
+ if img1.shape != img2.shape:
227
+ st.warning("Images are not the same size. Resizing the second image to match the first.")
228
+ img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
229
+
230
+ # Convert to grayscale
231
+ gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
232
+ gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
233
+
234
+ # Compute SSIM between two images
235
+ score, diff = ssim(gray1, gray2, full=True)
236
+ st.write(f"**Structural Similarity Index (SSIM): {score:.4f}**")
237
+ diff = (diff * 255).astype("uint8")
238
+
239
+ # Threshold the difference image
240
+ thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
241
+
242
+ # Find contours of the differences
243
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
244
+
245
+ # Create copies of the images to draw on
246
+ img1_diff = img1.copy()
247
+ img2_diff = img2.copy()
248
+
249
+ # Draw rectangles around differences
250
+ for cnt in contours:
251
+ x, y, w, h = cv2.boundingRect(cnt)
252
+ cv2.rectangle(img1_diff, (x, y), (x + w, y + h), (0, 0, 255), 2)
253
+ cv2.rectangle(img2_diff, (x, y), (x + w, y + h), (0, 0, 255), 2)
254
+
255
+ # Convert images back to RGB for displaying with Streamlit
256
+ img1_display = cv2.cvtColor(img1_diff, cv2.COLOR_BGR2RGB)
257
+ img2_display = cv2.cvtColor(img2_diff, cv2.COLOR_BGR2RGB)
258
+ diff_display = cv2.cvtColor(diff, cv2.COLOR_GRAY2RGB)
259
+ thresh_display = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
260
+
261
+ # Display images with differences highlighted
262
+ st.write("## Results")
263
+ st.write("Differences are highlighted in red boxes.")
264
+
265
+ st.image([img1_display, img2_display], caption=["Original Image with Differences", "Compared Image with Differences"], width=300)
266
+
267
+ st.write("## Difference Image")
268
+ st.image(diff_display, caption="Difference Image", width=300)
269
+
270
+ st.write("## Thresholded Difference Image")
271
+ st.image(thresh_display, caption="Thresholded Difference Image", width=300)
272
+
273
+ # Metadata comparison
274
+ st.write("## Metadata Comparison")
275
+ metadata1 = get_metadata(image1)
276
+ metadata2 = get_metadata(image2)
277
+
278
+ if metadata1 and metadata2:
279
+ metadata_df = compare_metadata(metadata1, metadata2)
280
+ if metadata_df is not None:
281
+ st.write("### Metadata Differences")
282
+ st.dataframe(metadata_df)
283
+ else:
284
+ st.write("No differences in metadata.")
285
+ else:
286
+ st.write("Metadata not available for one or both images.")
287
+
288
+ else:
289
+ st.info("Please upload both images.")
290
+
291
+ def add_watermark(image, text):
292
+ # Create a blank image for the text with transparent background
293
+ txt = Image.new('RGBA', image.size, (255, 255, 255, 0))
294
+ draw = ImageDraw.Draw(txt)
295
+
296
+ # Choose a font and size
297
+ font_size = max(20, image.size[0] // 20)
298
+ try:
299
+ font = ImageFont.truetype("arial.ttf", font_size)
300
+ except IOError:
301
+ font = ImageFont.load_default()
302
+
303
+ # Calculate text bounding box
304
+ bbox = font.getbbox(text)
305
+ textwidth = bbox[2] - bbox[0]
306
+ textheight = bbox[3] - bbox[1]
307
+
308
+ # Position the text at the bottom right
309
+ x = image.size[0] - textwidth - 10
310
+ y = image.size[1] - textheight - 10
311
+
312
+ # Draw text with semi-transparent fill
313
+ draw.text((x, y), text, font=font, fill=(255, 255, 255, 128))
314
+
315
+ # Combine the original image with the text overlay
316
+ watermarked = Image.alpha_composite(image.convert('RGBA'), txt)
317
+
318
+ return watermarked.convert('RGB')
319
+
320
+ def get_metadata(image):
321
+ exif_data = {}
322
+ info = image.getexif()
323
+ if info:
324
+ for tag, value in info.items():
325
+ decoded = ExifTags.TAGS.get(tag, tag)
326
+ exif_data[decoded] = value
327
+ return exif_data
328
+
329
+ def compare_metadata(meta1, meta2):
330
+ keys = set(meta1.keys()).union(set(meta2.keys()))
331
+ data = []
332
+ for key in keys:
333
+ value1 = meta1.get(key, "Not Available")
334
+ value2 = meta2.get(key, "Not Available")
335
+ if value1 != value2:
336
+ data.append({"Metadata Field": key, "Original Image": value1, "Compared Image": value2})
337
+ if data:
338
+ df = pd.DataFrame(data)
339
+ return df
340
+ else:
341
+ return None
342
+
343
+ def document_comparison_tool():
344
+ st.header("📄 Advanced Document Comparison Tool")
345
+ st.markdown("### Compare documents and detect changes with AI-powered OCR")
346
+
347
+ # Sidebar settings
348
+ with st.sidebar:
349
+ st.header("ℹ️ About")
350
+ st.markdown("""
351
+ This tool allows you to:
352
+ - Compare PDF and Word documents
353
+ - Process images using NVIDIA's OCR
354
+ - Detect and highlight changes
355
+ - Generate similarity metrics
356
+ """)
357
+
358
+ st.header("🛠️ Settings")
359
+ show_metadata = st.checkbox("Show Metadata", value=True, key='doc_show_metadata')
360
+ show_detailed_diff = st.checkbox("Show Detailed Differences", value=True, key='doc_show_detailed_diff')
361
+
362
+ # Main content
363
+ col1, col2 = st.columns(2)
364
+
365
+ with col1:
366
+ st.markdown("### Original Document")
367
+ original_file = st.file_uploader(
368
+ "Upload original document",
369
+ type=["pdf", "docx", "jpg", "jpeg", "png"],
370
+ key='doc_original_file',
371
+ help="Supported formats: PDF, DOCX, JPG, PNG"
372
+ )
373
+
374
+ with col2:
375
+ st.markdown("### Modified Document")
376
+ modified_file = st.file_uploader(
377
+ "Upload modified document",
378
+ type=["pdf", "docx", "jpg", "jpeg", "png"],
379
+ key='doc_modified_file',
380
+ help="Supported formats: PDF, DOCX, JPG, PNG"
381
+ )
382
+
383
+ if original_file and modified_file:
384
+ try:
385
+ with st.spinner("Processing documents..."):
386
+ # Initialize OCR handler
387
+ ocr_handler = NVIDIAOCRHandler()
388
+
389
+ # Process files
390
+ original_file_path = save_uploaded_file(original_file)
391
+ modified_file_path = save_uploaded_file(modified_file)
392
+
393
+ # Extract text based on file type
394
+ original_ext = os.path.splitext(original_file.name)[1].lower()
395
+ modified_ext = os.path.splitext(modified_file.name)[1].lower()
396
+
397
+ # Process original document
398
+ if original_ext in ['.jpg', '.jpeg', '.png']:
399
+ original_result = ocr_handler.process_image(original_file_path, f"{UPLOAD_DIR}/original_ocr")
400
+ with open(f"{UPLOAD_DIR}/original_ocr/text.txt", "r") as f:
401
+ original_text = f.read()
402
+ elif original_ext == '.pdf':
403
+ original_text = extract_text_pdf(original_file_path)
404
+ else:
405
+ original_text = extract_text_word(original_file_path)
406
+
407
+ # Process modified document
408
+ if modified_ext in ['.jpg', '.jpeg', '.png']:
409
+ modified_result = ocr_handler.process_image(modified_file_path, f"{UPLOAD_DIR}/modified_ocr")
410
+ with open(f"{UPLOAD_DIR}/modified_ocr/text.txt", "r") as f:
411
+ modified_text = f.read()
412
+ elif modified_ext == '.pdf':
413
+ modified_text = extract_text_pdf(modified_file_path)
414
+ else:
415
+ modified_text = extract_text_word(modified_file_path)
416
+
417
+ # Calculate similarity
418
+ similarity_score = calculate_similarity(original_text, modified_text)
419
+
420
+ # Display results
421
+ st.markdown("### 📊 Analysis Results")
422
+
423
+ metrics_col1, metrics_col2, metrics_col3 = st.columns(3)
424
+ with metrics_col1:
425
+ st.metric("Similarity Score", f"{similarity_score:.2%}")
426
+ with metrics_col2:
427
+ st.metric("Changes Detected", "Yes" if similarity_score < 1 else "No")
428
+ with metrics_col3:
429
+ st.metric("Processing Status", "Complete ✅")
430
+
431
+ if show_detailed_diff:
432
+ st.markdown("### 🔍 Detailed Comparison")
433
+ diff_html = compare_texts(original_text, modified_text)
434
+ st.components.v1.html(diff_html, height=600, scrolling=True)
435
+
436
+ # Download results
437
+ st.markdown("### 💾 Download Results")
438
+ if st.button("Generate Report"):
439
+ with st.spinner("Generating report..."):
440
+ # Simulate report generation
441
+ time.sleep(2)
442
+ st.success("Report generated successfully!")
443
+ st.download_button(
444
+ label="Download Report",
445
+ data=diff_html,
446
+ file_name="comparison_report.html",
447
+ mime="text/html"
448
+ )
449
+
450
+ except Exception as e:
451
+ st.error(f"An error occurred: {str(e)}")
452
+ logger.error(f"Error processing documents: {str(e)}")
453
+ else:
454
+ st.info("👆 Please upload both documents to begin comparison")
455
+
456
+ class NVIDIAOCRHandler:
457
+ def __init__(self):
458
+ self.api_key = NVIDIA_API_KEY
459
+ self.nvai_url = "https://ai.api.nvidia.com/v1/cv/nvidia/ocdrnet"
460
+ self.assets_url = "https://api.nvcf.nvidia.com/v2/nvcf/assets"
461
+ self.header_auth = f"Bearer {self.api_key}"
462
+
463
+ def upload_asset(self, input_data: bytes, description: str) -> uuid.UUID:
464
+ try:
465
+ with st.spinner("Uploading document to NVIDIA OCR service..."):
466
+ headers = {
467
+ "Authorization": self.header_auth,
468
+ "Content-Type": "application/json",
469
+ "accept": "application/json",
470
+ }
471
+ s3_headers = {
472
+ "x-amz-meta-nvcf-asset-description": description,
473
+ "content-type": "image/jpeg",
474
+ }
475
+ payload = {"contentType": "image/jpeg", "description": description}
476
+
477
+ response = requests.post(self.assets_url, headers=headers, json=payload, timeout=30)
478
+ response.raise_for_status()
479
+
480
+ upload_data = response.json()
481
+ response = requests.put(
482
+ upload_data["uploadUrl"],
483
+ data=input_data,
484
+ headers=s3_headers,
485
+ timeout=300,
486
+ )
487
+ response.raise_for_status()
488
+ return uuid.UUID(upload_data["assetId"])
489
+ except Exception as e:
490
+ st.error(f"Error uploading asset: {str(e)}")
491
+ raise
492
+
493
+ def process_image(self, image_path: str, output_dir: str) -> Dict[str, Any]:
494
+ try:
495
+ with st.spinner("Processing document with OCR..."):
496
+ with open(image_path, "rb") as f:
497
+ asset_id = self.upload_asset(f.read(), "Input Image")
498
+
499
+ inputs = {"image": f"{asset_id}", "render_label": False}
500
+ asset_list = f"{asset_id}"
501
+ headers = {
502
+ "Content-Type": "application/json",
503
+ "NVCF-INPUT-ASSET-REFERENCES": asset_list,
504
+ "NVCF-FUNCTION-ASSET-IDS": asset_list,
505
+ "Authorization": self.header_auth,
506
+ }
507
+
508
+ response = requests.post(self.nvai_url, headers=headers, json=inputs)
509
+ response.raise_for_status()
510
+
511
+ zip_path = f"{output_dir}.zip"
512
+ with open(zip_path, "wb") as out:
513
+ out.write(response.content)
514
+
515
+ with zipfile.ZipFile(zip_path, "r") as z:
516
+ z.extractall(output_dir)
517
+
518
+ os.remove(zip_path)
519
+ return {
520
+ "status": "success",
521
+ "output_directory": output_dir,
522
+ "files": os.listdir(output_dir)
523
+ }
524
+ except Exception as e:
525
+ st.error(f"Error processing image: {str(e)}")
526
+ raise
527
+
528
+ def save_uploaded_file(uploaded_file):
529
+ file_path = os.path.join(UPLOAD_DIR, uploaded_file.name)
530
+ with open(file_path, "wb") as f:
531
+ f.write(uploaded_file.getbuffer())
532
+ return file_path
533
+
534
+ def extract_text_pdf(file_path):
535
+ doc = fitz.open(file_path)
536
+ text = ""
537
+ for page in doc:
538
+ text += page.get_text()
539
+ return text
540
+
541
+ def extract_text_word(file_path):
542
+ doc = docx.Document(file_path)
543
+ text = "\n".join([para.text for para in doc.paragraphs])
544
+ return text
545
+
546
+ def compare_texts(text1, text2):
547
+ differ = HtmlDiff()
548
+ return differ.make_file(
549
+ text1.splitlines(),
550
+ text2.splitlines(),
551
+ fromdesc="Original",
552
+ todesc="Modified",
553
+ context=True,
554
+ numlines=2
555
+ )
556
+
557
+ def draw_bounding_box(image, vertices, confidence, is_deepfake):
558
+ img = np.array(image)
559
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
560
+
561
+ # Extract coordinates
562
+ x1, y1 = int(vertices[0]['x']), int(vertices[0]['y'])
563
+ x2, y2 = int(vertices[1]['x']), int(vertices[1]['y'])
564
+
565
+ # Calculate confidence percentages
566
+ deepfake_conf = is_deepfake * 100
567
+ bbox_conf = confidence * 100
568
+
569
+ # Choose color based on deepfake confidence (red for high confidence)
570
+ color = (0, 0, 255) if deepfake_conf > 70 else (0, 255, 0)
571
+
572
+ # Draw bounding box
573
+ cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
574
+
575
+ # Add text with confidence scores
576
+ label = f"Deepfake ({deepfake_conf:.1f}%), Face ({bbox_conf:.1f}%)"
577
+ cv2.putText(img, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
578
+
579
+ # Convert back to RGB for Streamlit
580
+ return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
581
+
582
+ def process_image(image_bytes):
583
+ """Process image through NVIDIA's deepfake detection API"""
584
+ image_b64 = base64.b64encode(image_bytes).decode()
585
+
586
+ headers = {
587
+ "Authorization": f"Bearer {NVIDIA_API_KEY}",
588
+ "Content-Type": "application/json",
589
+ "Accept": "application/json"
590
+ }
591
+
592
+ payload = {
593
+ "input": [f"data:image/png;base64,{image_b64}"]
594
+ }
595
+
596
+ try:
597
+ response = requests.post(
598
+ "https://ai.api.nvidia.com/v1/cv/hive/deepfake-image-detection",
599
+ headers=headers,
600
+ json=payload
601
+ )
602
+ response.raise_for_status()
603
+ return response.json()
604
+ except Exception as e:
605
+ st.error(f"Error processing image: {str(e)}")
606
+ return None
607
+
608
+ def main():
609
+ st.title("Deepfake Detection")
610
+ st.write("Upload an image to detect potential deepfakes")
611
+
612
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
613
+
614
+ if uploaded_file is not None:
615
+ # Display original image
616
+ image_bytes = uploaded_file.getvalue()
617
+ image = Image.open(io.BytesIO(image_bytes))
618
+ col1, col2 = st.columns(2)
619
+
620
+ with col1:
621
+ st.subheader("Original Image")
622
+ st.image(image, use_container_width=True)
623
+
624
+ # Process image
625
+ with st.spinner("Analyzing image..."):
626
+ result = process_image(image_bytes)
627
+
628
+ if result and 'data' in result:
629
+ data = result['data'][0]
630
+
631
+ # Display results
632
+ if 'bounding_boxes' in data:
633
+ for box in data['bounding_boxes']:
634
+ # Draw bounding box on image
635
+ annotated_image = draw_bounding_box(
636
+ image,
637
+ box['vertices'],
638
+ box['bbox_confidence'],
639
+ box['is_deepfake']
640
+ )
641
+
642
+ with col2:
643
+ st.subheader("Analysis Result")
644
+ st.image(annotated_image, use_container_width=True)
645
+
646
+ # Display confidence metrics
647
+ deepfake_conf = box['is_deepfake'] * 100
648
+ bbox_conf = box['bbox_confidence'] * 100
649
+
650
+ st.write("### Detection Confidence")
651
+ col3, col4 = st.columns(2)
652
+
653
+ with col3:
654
+ st.metric("Deepfake Confidence", f"{deepfake_conf:.1f}%")
655
+ st.progress(deepfake_conf/100)
656
+
657
+ with col4:
658
+ st.metric("Face Detection Confidence", f"{bbox_conf:.1f}%")
659
+ st.progress(bbox_conf/100)
660
+
661
+ if deepfake_conf > 90:
662
+ st.error("⚠️ High probability of deepfake detected!")
663
+ elif deepfake_conf > 70:
664
+ st.warning("⚠️ Moderate probability of deepfake detected!")
665
+ else:
666
+ st.success("✅ Low probability of deepfake")
667
+
668
+ # Display raw JSON data in expander
669
+ with st.expander("View Raw JSON Response"):
670
+ st.json(result)
671
+ else:
672
+ st.warning("No faces detected in the image")
673
+ else:
674
+ st.error("Failed to process image")
675
+
676
+ def calculate_similarity(text1, text2):
677
+ matcher = SequenceMatcher(None, text1, text2)
678
+ return matcher.ratio()
679
+
680
+ if __name__ == "__main__":
681
+ main()