Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,7 @@ import speech_recognition as sr
|
|
13 |
from gtts import gTTS
|
14 |
import tempfile
|
15 |
import torch.nn.utils.prune as prune
|
|
|
16 |
|
17 |
# Configure logging
|
18 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
@@ -38,28 +39,28 @@ class ImageSearchSystem:
|
|
38 |
self.initialized = False
|
39 |
|
40 |
def initialize_dataset(self) -> None:
|
41 |
-
"""Automatically download and process the dataset."""
|
42 |
try:
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
# Download dataset if not already present
|
47 |
-
if not os.path.exists(dataset_path):
|
48 |
-
logger.info("Downloading dataset from Kaggle...")
|
49 |
-
dataset_path = kagglehub.dataset_download("alessandrasala79/ai-vs-human-generated-dataset")
|
50 |
|
|
|
|
|
51 |
# Validate dataset
|
52 |
if not os.path.exists(image_folder):
|
53 |
raise FileNotFoundError(f"Expected dataset folder not found: {image_folder}")
|
54 |
|
55 |
# Load images dynamically
|
56 |
-
|
57 |
|
58 |
-
if not
|
59 |
raise ValueError("No images found in the dataset!")
|
60 |
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
63 |
# Create image index
|
64 |
self._create_image_index()
|
65 |
self.initialized = True
|
|
|
13 |
from gtts import gTTS
|
14 |
import tempfile
|
15 |
import torch.nn.utils.prune as prune
|
16 |
+
import random
|
17 |
|
18 |
# Configure logging
|
19 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
|
39 |
self.initialized = False
|
40 |
|
41 |
def initialize_dataset(self) -> None:
|
42 |
+
"""Automatically download and process the dataset with a 500-sample limit."""
|
43 |
try:
|
44 |
+
logger.info("Downloading dataset from KaggleHub...")
|
45 |
+
dataset_path = kagglehub.dataset_download("alessandrasala79/ai-vs-human-generated-dataset")
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
image_folder = os.path.join(dataset_path, 'test_data_v2') # Adjust if needed
|
48 |
+
|
49 |
# Validate dataset
|
50 |
if not os.path.exists(image_folder):
|
51 |
raise FileNotFoundError(f"Expected dataset folder not found: {image_folder}")
|
52 |
|
53 |
# Load images dynamically
|
54 |
+
all_images = [f for f in Path(image_folder).glob("**/*") if f.suffix.lower() in ['.jpg', '.jpeg', '.png']]
|
55 |
|
56 |
+
if not all_images:
|
57 |
raise ValueError("No images found in the dataset!")
|
58 |
|
59 |
+
# Limit dataset to 500 randomly selected samples
|
60 |
+
self.image_paths = random.sample(all_images, min(500, len(all_images)))
|
61 |
+
|
62 |
+
logger.info(f"Loaded {len(self.image_paths)} images (limited to 500 samples).")
|
63 |
+
|
64 |
# Create image index
|
65 |
self._create_image_index()
|
66 |
self.initialized = True
|