Spaces:
Runtime error
Runtime error
Add large-v1 and large-v2 to CLIs
Browse files
app.py
CHANGED
|
@@ -57,6 +57,8 @@ LANGUAGES = [
|
|
| 57 |
"Hausa", "Bashkir", "Javanese", "Sundanese"
|
| 58 |
]
|
| 59 |
|
|
|
|
|
|
|
| 60 |
class WhisperTranscriber:
|
| 61 |
def __init__(self, input_audio_max_duration: float = DEFAULT_INPUT_AUDIO_MAX_DURATION, vad_process_timeout: float = None, vad_cpu_cores: int = 1, delete_uploaded_files: bool = DELETE_UPLOADED_FILES):
|
| 62 |
self.model_cache = ModelCache()
|
|
@@ -349,7 +351,7 @@ def create_ui(input_audio_max_duration, share=False, server_name: str = None, se
|
|
| 349 |
ui_article = "Read the [documentation here](https://huggingface.co/spaces/aadnk/whisper-webui/blob/main/docs/options.md)"
|
| 350 |
|
| 351 |
demo = gr.Interface(fn=ui.transcribe_webui, description=ui_description, article=ui_article, inputs=[
|
| 352 |
-
gr.Dropdown(choices=
|
| 353 |
gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
|
| 354 |
gr.Text(label="URL (YouTube, etc.)"),
|
| 355 |
gr.File(label="Upload Files", file_count="multiple"),
|
|
@@ -377,7 +379,7 @@ if __name__ == '__main__':
|
|
| 377 |
parser.add_argument("--share", type=bool, default=False, help="True to share the app on HuggingFace.")
|
| 378 |
parser.add_argument("--server_name", type=str, default=None, help="The host or IP to bind to. If None, bind to localhost.")
|
| 379 |
parser.add_argument("--server_port", type=int, default=7860, help="The port to bind to.")
|
| 380 |
-
parser.add_argument("--default_model_name", type=str, default="medium", help="The default model name.")
|
| 381 |
parser.add_argument("--default_vad", type=str, default="silero-vad", help="The default VAD.")
|
| 382 |
parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
|
| 383 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
|
|
|
| 57 |
"Hausa", "Bashkir", "Javanese", "Sundanese"
|
| 58 |
]
|
| 59 |
|
| 60 |
+
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2"]
|
| 61 |
+
|
| 62 |
class WhisperTranscriber:
|
| 63 |
def __init__(self, input_audio_max_duration: float = DEFAULT_INPUT_AUDIO_MAX_DURATION, vad_process_timeout: float = None, vad_cpu_cores: int = 1, delete_uploaded_files: bool = DELETE_UPLOADED_FILES):
|
| 64 |
self.model_cache = ModelCache()
|
|
|
|
| 351 |
ui_article = "Read the [documentation here](https://huggingface.co/spaces/aadnk/whisper-webui/blob/main/docs/options.md)"
|
| 352 |
|
| 353 |
demo = gr.Interface(fn=ui.transcribe_webui, description=ui_description, article=ui_article, inputs=[
|
| 354 |
+
gr.Dropdown(choices=WHISPER_MODELS, value=default_model_name, label="Model"),
|
| 355 |
gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
|
| 356 |
gr.Text(label="URL (YouTube, etc.)"),
|
| 357 |
gr.File(label="Upload Files", file_count="multiple"),
|
|
|
|
| 379 |
parser.add_argument("--share", type=bool, default=False, help="True to share the app on HuggingFace.")
|
| 380 |
parser.add_argument("--server_name", type=str, default=None, help="The host or IP to bind to. If None, bind to localhost.")
|
| 381 |
parser.add_argument("--server_port", type=int, default=7860, help="The port to bind to.")
|
| 382 |
+
parser.add_argument("--default_model_name", type=str, choices=WHISPER_MODELS, default="medium", help="The default model name.")
|
| 383 |
parser.add_argument("--default_vad", type=str, default="silero-vad", help="The default VAD.")
|
| 384 |
parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
|
| 385 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
cli.py
CHANGED
|
@@ -6,7 +6,7 @@ import warnings
|
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
import torch
|
| 9 |
-
from app import LANGUAGES, WhisperTranscriber
|
| 10 |
from src.download import download_url
|
| 11 |
|
| 12 |
from src.utils import optional_float, optional_int, str2bool
|
|
@@ -15,7 +15,7 @@ from src.whisperContainer import WhisperContainer
|
|
| 15 |
def cli():
|
| 16 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 17 |
parser.add_argument("audio", nargs="+", type=str, help="audio file(s) to transcribe")
|
| 18 |
-
parser.add_argument("--model", default="small", choices=
|
| 19 |
parser.add_argument("--model_dir", type=str, default=None, help="the path to save model files; uses ~/.cache/whisper by default")
|
| 20 |
parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu", help="device to use for PyTorch inference")
|
| 21 |
parser.add_argument("--output_dir", "-o", type=str, default=".", help="directory to save the outputs")
|
|
|
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
import torch
|
| 9 |
+
from app import LANGUAGES, WHISPER_MODELS, WhisperTranscriber
|
| 10 |
from src.download import download_url
|
| 11 |
|
| 12 |
from src.utils import optional_float, optional_int, str2bool
|
|
|
|
| 15 |
def cli():
|
| 16 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 17 |
parser.add_argument("audio", nargs="+", type=str, help="audio file(s) to transcribe")
|
| 18 |
+
parser.add_argument("--model", default="small", choices=WHISPER_MODELS, help="name of the Whisper model to use")
|
| 19 |
parser.add_argument("--model_dir", type=str, default=None, help="the path to save model files; uses ~/.cache/whisper by default")
|
| 20 |
parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu", help="device to use for PyTorch inference")
|
| 21 |
parser.add_argument("--output_dir", "-o", type=str, default=".", help="directory to save the outputs")
|