Spaces:

becteur92
/

smollvm

Paused

App Files Files Community

youssef commited on Feb 23

Commit

be20973

1 Parent(s): 36900cf

install at runtime

Browse files

Files changed (2) hide show

requirements.txt +0 -1
src/video_processor/processor.py +23 -2

requirements.txt CHANGED Viewed

@@ -1,7 +1,6 @@
 torch==2.1.2
 torchvision==0.16.2
 transformers @ git+https://github.com/huggingface/[email protected]
-flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.2cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
 num2words==0.5.13
 gradio==4.19.2
 av==10.0.0

 torch==2.1.2
 torchvision==0.16.2
 transformers @ git+https://github.com/huggingface/[email protected]
 num2words==0.5.13
 gradio==4.19.2
 av==10.0.0

src/video_processor/processor.py CHANGED Viewed

@@ -3,6 +3,8 @@ from transformers import AutoProcessor, AutoModelForImageTextToText
 from typing import List, Dict
 import numpy as np
 import logging
 logger = logging.getLogger(__name__)
@@ -15,6 +17,25 @@ class VideoAnalyzer:
             raise RuntimeError("CUDA is required but not available!")
         logger.info("Initializing VideoAnalyzer")
         self.model_path = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
         logger.info(f"Loading model from {self.model_path}")
@@ -27,9 +48,9 @@ class VideoAnalyzer:
         self.model = AutoModelForImageTextToText.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
-            _attn_implementation="flash_attention_2"
         ).to(DEVICE)
-        logger.info(f"Model loaded on device: {self.model.device}")
     def process_video(self, video_path: str, frame_interval: int = 30) -> List[Dict]:
         logger.info(f"Processing video: {video_path} with frame_interval={frame_interval}")

 from typing import List, Dict
 import numpy as np
 import logging
+import subprocess
+import os
 logger = logging.getLogger(__name__)
             raise RuntimeError("CUDA is required but not available!")
         logger.info("Initializing VideoAnalyzer")
+        # Try to install flash-attention at runtime
+        logger.info("Attempting to install flash-attention...")
+        try:
+            env = os.environ.copy()
+            env['FLASH_ATTENTION_SKIP_CUDA_BUILD'] = "TRUE"
+            subprocess.run(
+                'pip install flash-attn --no-build-isolation',
+                env=env,
+                shell=True,
+                check=True,
+                capture_output=True
+            )
+            logger.info("Successfully installed flash-attention")
+            use_flash_attn = True
+        except subprocess.CalledProcessError as e:
+            logger.warning(f"Failed to install flash-attention: {e.stderr.decode()}")
+            use_flash_attn = False
         self.model_path = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
         logger.info(f"Loading model from {self.model_path}")
         self.model = AutoModelForImageTextToText.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
+            _attn_implementation="flash_attention_2" if use_flash_attn else "sdpa"
         ).to(DEVICE)
+        logger.info(f"Model loaded on device: {self.model.device} using attention implementation: {'flash_attention_2' if use_flash_attn else 'sdpa'}")
     def process_video(self, video_path: str, frame_interval: int = 30) -> List[Dict]:
         logger.info(f"Processing video: {video_path} with frame_interval={frame_interval}")