joy1515 commited on
Commit
9ee7c56
·
verified ·
1 Parent(s): 38d364b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -13,6 +13,7 @@ import speech_recognition as sr
13
  from gtts import gTTS
14
  import tempfile
15
  import torch.nn.utils.prune as prune
 
16
 
17
  # Configure logging
18
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -38,28 +39,28 @@ class ImageSearchSystem:
38
  self.initialized = False
39
 
40
  def initialize_dataset(self) -> None:
41
- """Automatically download and process the dataset."""
42
  try:
43
- dataset_path = os.path.expanduser("~/.kagglehub/datasets/alessandrasala79/ai-vs-human-generated-dataset")
44
- image_folder = os.path.join(dataset_path, 'test_data_v2')
45
-
46
- # Download dataset if not already present
47
- if not os.path.exists(dataset_path):
48
- logger.info("Downloading dataset from Kaggle...")
49
- dataset_path = kagglehub.dataset_download("alessandrasala79/ai-vs-human-generated-dataset")
50
 
 
 
51
  # Validate dataset
52
  if not os.path.exists(image_folder):
53
  raise FileNotFoundError(f"Expected dataset folder not found: {image_folder}")
54
 
55
  # Load images dynamically
56
- self.image_paths = [f for f in Path(image_folder).glob("**/*") if f.suffix.lower() in ['.jpg', '.jpeg', '.png']]
57
 
58
- if not self.image_paths:
59
  raise ValueError("No images found in the dataset!")
60
 
61
- logger.info(f"Successfully loaded {len(self.image_paths)} images.")
62
-
 
 
 
63
  # Create image index
64
  self._create_image_index()
65
  self.initialized = True
 
13
  from gtts import gTTS
14
  import tempfile
15
  import torch.nn.utils.prune as prune
16
+ import random
17
 
18
  # Configure logging
19
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 
39
  self.initialized = False
40
 
41
  def initialize_dataset(self) -> None:
42
+ """Automatically download and process the dataset with a 500-sample limit."""
43
  try:
44
+ logger.info("Downloading dataset from KaggleHub...")
45
+ dataset_path = kagglehub.dataset_download("alessandrasala79/ai-vs-human-generated-dataset")
 
 
 
 
 
46
 
47
+ image_folder = os.path.join(dataset_path, 'test_data_v2') # Adjust if needed
48
+
49
  # Validate dataset
50
  if not os.path.exists(image_folder):
51
  raise FileNotFoundError(f"Expected dataset folder not found: {image_folder}")
52
 
53
  # Load images dynamically
54
+ all_images = [f for f in Path(image_folder).glob("**/*") if f.suffix.lower() in ['.jpg', '.jpeg', '.png']]
55
 
56
+ if not all_images:
57
  raise ValueError("No images found in the dataset!")
58
 
59
+ # Limit dataset to 500 randomly selected samples
60
+ self.image_paths = random.sample(all_images, min(500, len(all_images)))
61
+
62
+ logger.info(f"Loaded {len(self.image_paths)} images (limited to 500 samples).")
63
+
64
  # Create image index
65
  self._create_image_index()
66
  self.initialized = True