Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
8488f1e
1
Parent(s):
487ed33
fix: flash attention
Browse files- model.py +14 -1
- requirements.txt +0 -1
model.py
CHANGED
@@ -15,10 +15,23 @@ logging.basicConfig(
|
|
15 |
)
|
16 |
logger = logging.getLogger(__name__)
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
# Model constants
|
19 |
MODEL_ID = "JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW"
|
20 |
PHI_MODEL_ID = "JacobLinCool/Phi-4-multimodal-instruct-commonvoice-zh-tw"
|
21 |
-
USE_FA = torch.cuda.is_available() # Use Flash Attention if CUDA is available
|
22 |
|
23 |
# Model instances (initialized lazily)
|
24 |
pipe: Optional[Pipeline] = None
|
|
|
15 |
)
|
16 |
logger = logging.getLogger(__name__)
|
17 |
|
18 |
+
try:
|
19 |
+
import subprocess
|
20 |
+
|
21 |
+
subprocess.run(
|
22 |
+
"pip install flash-attn --no-build-isolation",
|
23 |
+
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
24 |
+
shell=True,
|
25 |
+
)
|
26 |
+
logger.info("Flash Attention installed successfully.")
|
27 |
+
USE_FA = True
|
28 |
+
except:
|
29 |
+
USE_FA = False
|
30 |
+
logger.warning("Flash Attention not available. Using standard attention instead.")
|
31 |
+
|
32 |
# Model constants
|
33 |
MODEL_ID = "JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW"
|
34 |
PHI_MODEL_ID = "JacobLinCool/Phi-4-multimodal-instruct-commonvoice-zh-tw"
|
|
|
35 |
|
36 |
# Model instances (initialized lazily)
|
37 |
pipe: Optional[Pipeline] = None
|
requirements.txt
CHANGED
@@ -4,4 +4,3 @@ transformers
|
|
4 |
accelerate
|
5 |
spaces
|
6 |
librosa
|
7 |
-
flash-attn
|
|
|
4 |
accelerate
|
5 |
spaces
|
6 |
librosa
|
|