Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,7 @@ import speech_recognition as sr
|
|
| 13 |
from gtts import gTTS
|
| 14 |
import tempfile
|
| 15 |
import torch.nn.utils.prune as prune
|
|
|
|
| 16 |
|
| 17 |
# Configure logging
|
| 18 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
@@ -38,28 +39,28 @@ class ImageSearchSystem:
|
|
| 38 |
self.initialized = False
|
| 39 |
|
| 40 |
def initialize_dataset(self) -> None:
|
| 41 |
-
"""Automatically download and process the dataset."""
|
| 42 |
try:
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
# Download dataset if not already present
|
| 47 |
-
if not os.path.exists(dataset_path):
|
| 48 |
-
logger.info("Downloading dataset from Kaggle...")
|
| 49 |
-
dataset_path = kagglehub.dataset_download("alessandrasala79/ai-vs-human-generated-dataset")
|
| 50 |
|
|
|
|
|
|
|
| 51 |
# Validate dataset
|
| 52 |
if not os.path.exists(image_folder):
|
| 53 |
raise FileNotFoundError(f"Expected dataset folder not found: {image_folder}")
|
| 54 |
|
| 55 |
# Load images dynamically
|
| 56 |
-
|
| 57 |
|
| 58 |
-
if not
|
| 59 |
raise ValueError("No images found in the dataset!")
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
| 63 |
# Create image index
|
| 64 |
self._create_image_index()
|
| 65 |
self.initialized = True
|
|
|
|
| 13 |
from gtts import gTTS
|
| 14 |
import tempfile
|
| 15 |
import torch.nn.utils.prune as prune
|
| 16 |
+
import random
|
| 17 |
|
| 18 |
# Configure logging
|
| 19 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
|
|
| 39 |
self.initialized = False
|
| 40 |
|
| 41 |
def initialize_dataset(self) -> None:
|
| 42 |
+
"""Automatically download and process the dataset with a 500-sample limit."""
|
| 43 |
try:
|
| 44 |
+
logger.info("Downloading dataset from KaggleHub...")
|
| 45 |
+
dataset_path = kagglehub.dataset_download("alessandrasala79/ai-vs-human-generated-dataset")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
image_folder = os.path.join(dataset_path, 'test_data_v2') # Adjust if needed
|
| 48 |
+
|
| 49 |
# Validate dataset
|
| 50 |
if not os.path.exists(image_folder):
|
| 51 |
raise FileNotFoundError(f"Expected dataset folder not found: {image_folder}")
|
| 52 |
|
| 53 |
# Load images dynamically
|
| 54 |
+
all_images = [f for f in Path(image_folder).glob("**/*") if f.suffix.lower() in ['.jpg', '.jpeg', '.png']]
|
| 55 |
|
| 56 |
+
if not all_images:
|
| 57 |
raise ValueError("No images found in the dataset!")
|
| 58 |
|
| 59 |
+
# Limit dataset to 500 randomly selected samples
|
| 60 |
+
self.image_paths = random.sample(all_images, min(500, len(all_images)))
|
| 61 |
+
|
| 62 |
+
logger.info(f"Loaded {len(self.image_paths)} images (limited to 500 samples).")
|
| 63 |
+
|
| 64 |
# Create image index
|
| 65 |
self._create_image_index()
|
| 66 |
self.initialized = True
|