Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import torch
|
|
6 |
import gradio as gr
|
7 |
import pytube as pt
|
8 |
import spaces
|
9 |
-
from transformers import pipeline
|
10 |
from huggingface_hub import model_info
|
11 |
try:
|
12 |
import flash_attn
|
@@ -25,9 +25,14 @@ print(f"Using device: {device}")
|
|
25 |
|
26 |
@spaces.GPU(duration=60)
|
27 |
def pipe(file, return_timestamps=False):
|
|
|
|
|
|
|
|
|
|
|
28 |
asr = pipeline(
|
29 |
task="automatic-speech-recognition",
|
30 |
-
model=
|
31 |
chunk_length_s=30,
|
32 |
device=device,
|
33 |
token=auth_token,
|
|
|
6 |
import gradio as gr
|
7 |
import pytube as pt
|
8 |
import spaces
|
9 |
+
from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
|
10 |
from huggingface_hub import model_info
|
11 |
try:
|
12 |
import flash_attn
|
|
|
25 |
|
26 |
@spaces.GPU(duration=60)
|
27 |
def pipe(file, return_timestamps=False):
|
28 |
+
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, low_cpu_mem_usage=True)
|
29 |
+
model.to(device)
|
30 |
+
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
31 |
+
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
32 |
+
model.generation_config.cache_implementation = "static"
|
33 |
asr = pipeline(
|
34 |
task="automatic-speech-recognition",
|
35 |
+
model=model,
|
36 |
chunk_length_s=30,
|
37 |
device=device,
|
38 |
token=auth_token,
|