Spaces:

becteur92
/

smollvm

Paused

youssef commited on Feb 23

Commit

d200533

1 Parent(s): bd727fa

remove flash attn

Files changed (1) hide show

src/video_processor/processor.py CHANGED Viewed

@@ -34,7 +34,7 @@ class VideoAnalyzer:
         self.model = AutoModelForImageTextToText.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
-            _attn_implementation="flash_attention_2"
         ).to(DEVICE)
         logger.info(f"Model loaded on device: {self.model.device} using attention implementation: flash_attention_2")
@@ -70,6 +70,11 @@ class VideoAnalyzer:
                 return_tensors="pt"
             ).to(self.model.device)
             # Generate description with increased token limit
             generated_ids = self.model.generate(
                 **inputs,

         self.model = AutoModelForImageTextToText.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
+            # _attn_implementation="flash_attention_2"
         ).to(DEVICE)
         logger.info(f"Model loaded on device: {self.model.device} using attention implementation: flash_attention_2")
                 return_tensors="pt"
             ).to(self.model.device)
+            # Convert inputs to bfloat16 before moving to GPU
+            #for key in inputs:
+             #   if torch.is_tensor(inputs[key]):
+              #      inputs[key] = inputs[key].to(dtype=torch.bfloat16, device=self.model.device)
             # Generate description with increased token limit
             generated_ids = self.model.generate(
                 **inputs,