Spaces:

joy1515
/

retrieval-ai

Sleeping

App Files Files Community

joy1515 commited on Feb 10

Commit

38d364b

verified ·

1 Parent(s): 3c3d449

adjusted code

Browse files

Files changed (1) hide show

app.py +27 -24

app.py CHANGED Viewed

@@ -27,16 +27,10 @@ class ImageSearchSystem:
         self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
         self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to(self.device)
-        # Prune the model (access vision module correctly)
-        parameters_to_prune = (
-            (self.model.vision_model.encoder.layers[0].attention.self.query.weight, 'attention.self.query.weight'),
-            (self.model.vision_model.encoder.layers[0].attention.self.key.weight, 'attention.self.key.weight'),
-            (self.model.vision_model.encoder.layers[0].attention.self.value.weight, 'attention.self.value.weight')
-        )
-        # Prune the weights
-        for param, name in parameters_to_prune:
-            prune.l1_unstructured(param, amount=0.2)
         # Initialize dataset
         self.image_paths = []
@@ -44,30 +38,38 @@ class ImageSearchSystem:
         self.initialized = False
     def initialize_dataset(self) -> None:
-        """Download and process dataset"""
         try:
-            path = kagglehub.dataset_download("alessandrasala79/ai-vs-human-generated-dataset")
-            image_folder = os.path.join(path, 'test_data_v2')
-            self.image_paths = [
-                f for f in Path(image_folder).glob("**/*")
-                if f.suffix.lower() in ['.jpg', '.jpeg', '.png']
-            ]
             if not self.image_paths:
-                raise ValueError(f"No images found in {image_folder}")
-            logger.info(f"Found {len(self.image_paths)} images")
             self._create_image_index()
             self.initialized = True
         except Exception as e:
             logger.error(f"Dataset initialization failed: {str(e)}")
             raise
     def _create_image_index(self, batch_size: int = 32) -> None:
-        """Create FAISS index"""
         try:
             all_features = []
@@ -95,7 +97,7 @@ class ImageSearchSystem:
             raise
     def search(self, query: str, audio_path: str = None, k: int = 5):
-        """Search for images using text or speech"""
         try:
             if not self.initialized:
                 raise RuntimeError("System not initialized. Call initialize_dataset() first.")
@@ -134,7 +136,7 @@ class ImageSearchSystem:
             return [], "Error during search.", None
 def create_demo_interface() -> gr.Interface:
-    """Create Gradio interface with dark mode & speech support"""
     system = ImageSearchSystem()
     try:
@@ -177,3 +179,4 @@ if __name__ == "__main__":
     except Exception as e:
         logger.error(f"Failed to launch app: {str(e)}")
         raise

         self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
         self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to(self.device)
+        # Prune the model (optimize memory usage)
+        for name, module in self.model.named_modules():
+            if isinstance(module, torch.nn.Linear):
+                prune.l1_unstructured(module, name='weight', amount=0.2)
         # Initialize dataset
         self.image_paths = []
         self.initialized = False
     def initialize_dataset(self) -> None:
+        """Automatically download and process the dataset."""
         try:
+            dataset_path = os.path.expanduser("~/.kagglehub/datasets/alessandrasala79/ai-vs-human-generated-dataset")
+            image_folder = os.path.join(dataset_path, 'test_data_v2')
+            # Download dataset if not already present
+            if not os.path.exists(dataset_path):
+                logger.info("Downloading dataset from Kaggle...")
+                dataset_path = kagglehub.dataset_download("alessandrasala79/ai-vs-human-generated-dataset")
+            # Validate dataset
+            if not os.path.exists(image_folder):
+                raise FileNotFoundError(f"Expected dataset folder not found: {image_folder}")
+            # Load images dynamically
+            self.image_paths = [f for f in Path(image_folder).glob("**/*") if f.suffix.lower() in ['.jpg', '.jpeg', '.png']]
             if not self.image_paths:
+                raise ValueError("No images found in the dataset!")
+            logger.info(f"Successfully loaded {len(self.image_paths)} images.")
+            # Create image index
             self._create_image_index()
             self.initialized = True
         except Exception as e:
             logger.error(f"Dataset initialization failed: {str(e)}")
             raise
     def _create_image_index(self, batch_size: int = 32) -> None:
+        """Create FAISS index for fast image retrieval."""
         try:
             all_features = []
             raise
     def search(self, query: str, audio_path: str = None, k: int = 5):
+        """Search for images using text or speech."""
         try:
             if not self.initialized:
                 raise RuntimeError("System not initialized. Call initialize_dataset() first.")
             return [], "Error during search.", None
 def create_demo_interface() -> gr.Interface:
+    """Create Gradio interface with dark mode & speech support."""
     system = ImageSearchSystem()
     try:
     except Exception as e:
         logger.error(f"Failed to launch app: {str(e)}")
         raise