JacobLinCool commited on
Commit
8488f1e
·
1 Parent(s): 487ed33

fix: flash attention

Browse files
Files changed (2) hide show
  1. model.py +14 -1
  2. requirements.txt +0 -1
model.py CHANGED
@@ -15,10 +15,23 @@ logging.basicConfig(
15
  )
16
  logger = logging.getLogger(__name__)
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # Model constants
19
  MODEL_ID = "JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW"
20
  PHI_MODEL_ID = "JacobLinCool/Phi-4-multimodal-instruct-commonvoice-zh-tw"
21
- USE_FA = torch.cuda.is_available() # Use Flash Attention if CUDA is available
22
 
23
  # Model instances (initialized lazily)
24
  pipe: Optional[Pipeline] = None
 
15
  )
16
  logger = logging.getLogger(__name__)
17
 
18
+ try:
19
+ import subprocess
20
+
21
+ subprocess.run(
22
+ "pip install flash-attn --no-build-isolation",
23
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
24
+ shell=True,
25
+ )
26
+ logger.info("Flash Attention installed successfully.")
27
+ USE_FA = True
28
+ except:
29
+ USE_FA = False
30
+ logger.warning("Flash Attention not available. Using standard attention instead.")
31
+
32
  # Model constants
33
  MODEL_ID = "JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW"
34
  PHI_MODEL_ID = "JacobLinCool/Phi-4-multimodal-instruct-commonvoice-zh-tw"
 
35
 
36
  # Model instances (initialized lazily)
37
  pipe: Optional[Pipeline] = None
requirements.txt CHANGED
@@ -4,4 +4,3 @@ transformers
4
  accelerate
5
  spaces
6
  librosa
7
- flash-attn
 
4
  accelerate
5
  spaces
6
  librosa