KingNish commited on
Commit
8d01bbb
·
verified ·
1 Parent(s): e7a6563

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -7,13 +7,19 @@ import uuid
7
  import scipy.io.wavfile
8
  import time
9
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperTokenizer, pipeline
 
 
 
 
 
 
10
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  torch_dtype = torch.float16
13
  MODEL_NAME = "openai/whisper-large-v3-turbo"
14
 
15
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
16
- MODEL_NAME, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
17
  )
18
  model.to(device)
19
 
 
7
  import scipy.io.wavfile
8
  import time
9
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperTokenizer, pipeline
10
+ import subprocess
11
+ subprocess.run(
12
+ "pip install flash-attn --no-build-isolation",
13
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
14
+ shell=True,
15
+ )
16
 
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
  torch_dtype = torch.float16
19
  MODEL_NAME = "openai/whisper-large-v3-turbo"
20
 
21
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
22
+ MODEL_NAME, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation="flash_attention_2"
23
  )
24
  model.to(device)
25