youssef commited on
Commit
be20973
·
1 Parent(s): 36900cf

install at runtime

Browse files
requirements.txt CHANGED
@@ -1,7 +1,6 @@
1
  torch==2.1.2
2
  torchvision==0.16.2
3
  transformers @ git+https://github.com/huggingface/[email protected]
4
- flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.2cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
5
  num2words==0.5.13
6
  gradio==4.19.2
7
  av==10.0.0
 
1
  torch==2.1.2
2
  torchvision==0.16.2
3
  transformers @ git+https://github.com/huggingface/[email protected]
 
4
  num2words==0.5.13
5
  gradio==4.19.2
6
  av==10.0.0
src/video_processor/processor.py CHANGED
@@ -3,6 +3,8 @@ from transformers import AutoProcessor, AutoModelForImageTextToText
3
  from typing import List, Dict
4
  import numpy as np
5
  import logging
 
 
6
 
7
  logger = logging.getLogger(__name__)
8
 
@@ -15,6 +17,25 @@ class VideoAnalyzer:
15
  raise RuntimeError("CUDA is required but not available!")
16
 
17
  logger.info("Initializing VideoAnalyzer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  self.model_path = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
19
  logger.info(f"Loading model from {self.model_path}")
20
 
@@ -27,9 +48,9 @@ class VideoAnalyzer:
27
  self.model = AutoModelForImageTextToText.from_pretrained(
28
  self.model_path,
29
  torch_dtype=torch.bfloat16,
30
- _attn_implementation="flash_attention_2"
31
  ).to(DEVICE)
32
- logger.info(f"Model loaded on device: {self.model.device}")
33
 
34
  def process_video(self, video_path: str, frame_interval: int = 30) -> List[Dict]:
35
  logger.info(f"Processing video: {video_path} with frame_interval={frame_interval}")
 
3
  from typing import List, Dict
4
  import numpy as np
5
  import logging
6
+ import subprocess
7
+ import os
8
 
9
  logger = logging.getLogger(__name__)
10
 
 
17
  raise RuntimeError("CUDA is required but not available!")
18
 
19
  logger.info("Initializing VideoAnalyzer")
20
+
21
+ # Try to install flash-attention at runtime
22
+ logger.info("Attempting to install flash-attention...")
23
+ try:
24
+ env = os.environ.copy()
25
+ env['FLASH_ATTENTION_SKIP_CUDA_BUILD'] = "TRUE"
26
+ subprocess.run(
27
+ 'pip install flash-attn --no-build-isolation',
28
+ env=env,
29
+ shell=True,
30
+ check=True,
31
+ capture_output=True
32
+ )
33
+ logger.info("Successfully installed flash-attention")
34
+ use_flash_attn = True
35
+ except subprocess.CalledProcessError as e:
36
+ logger.warning(f"Failed to install flash-attention: {e.stderr.decode()}")
37
+ use_flash_attn = False
38
+
39
  self.model_path = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
40
  logger.info(f"Loading model from {self.model_path}")
41
 
 
48
  self.model = AutoModelForImageTextToText.from_pretrained(
49
  self.model_path,
50
  torch_dtype=torch.bfloat16,
51
+ _attn_implementation="flash_attention_2" if use_flash_attn else "sdpa"
52
  ).to(DEVICE)
53
+ logger.info(f"Model loaded on device: {self.model.device} using attention implementation: {'flash_attention_2' if use_flash_attn else 'sdpa'}")
54
 
55
  def process_video(self, video_path: str, frame_interval: int = 30) -> List[Dict]:
56
  logger.info(f"Processing video: {video_path} with frame_interval={frame_interval}")