Shuwei Hou
commited on
Commit
·
652e321
1
Parent(s):
04360c2
update_model_address
Browse files
transcription/transcription.py
CHANGED
|
@@ -15,7 +15,7 @@ from .whisperx.alignment import load_align_model, align
|
|
| 15 |
|
| 16 |
|
| 17 |
class MazeWhisperModel:
|
| 18 |
-
def __init__(self, model_name: str = "
|
| 19 |
self.device = device
|
| 20 |
self.model_name = model_name
|
| 21 |
|
|
@@ -50,7 +50,7 @@ class MazeWhisperModel:
|
|
| 50 |
|
| 51 |
|
| 52 |
class WhisperXPipeline:
|
| 53 |
-
def __init__(self, model_name: str = "
|
| 54 |
vad_method: str = "pyannote", chunk_size: int = 30,
|
| 55 |
enable_alignment: bool = True, align_language: str = "en"):
|
| 56 |
self.device = device
|
|
@@ -288,7 +288,7 @@ def translate_audio_file(model: str = "mazeWhisper", audio_path: str = "", devic
|
|
| 288 |
|
| 289 |
try:
|
| 290 |
pipeline = WhisperXPipeline(
|
| 291 |
-
model_name="
|
| 292 |
device=device,
|
| 293 |
vad_method="pyannote",
|
| 294 |
chunk_size=10,
|
|
@@ -356,7 +356,7 @@ def translate_audio_file(model: str = "mazeWhisper", audio_path: str = "", devic
|
|
| 356 |
result_data = {
|
| 357 |
"session_id": session_id,
|
| 358 |
"audio_path": audio_path,
|
| 359 |
-
"model": "
|
| 360 |
"device": device,
|
| 361 |
"alignment_enabled": enable_alignment,
|
| 362 |
"has_word_timestamps": has_word_timestamps,
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
class MazeWhisperModel:
|
| 18 |
+
def __init__(self, model_name: str = "sven33/maze-whisper-3000", device: str = "cuda"):
|
| 19 |
self.device = device
|
| 20 |
self.model_name = model_name
|
| 21 |
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
class WhisperXPipeline:
|
| 53 |
+
def __init__(self, model_name: str = "sven33/maze-whisper-3000", device: str = "cuda",
|
| 54 |
vad_method: str = "pyannote", chunk_size: int = 30,
|
| 55 |
enable_alignment: bool = True, align_language: str = "en"):
|
| 56 |
self.device = device
|
|
|
|
| 288 |
|
| 289 |
try:
|
| 290 |
pipeline = WhisperXPipeline(
|
| 291 |
+
model_name="sven33/maze-whisper-3000",
|
| 292 |
device=device,
|
| 293 |
vad_method="pyannote",
|
| 294 |
chunk_size=10,
|
|
|
|
| 356 |
result_data = {
|
| 357 |
"session_id": session_id,
|
| 358 |
"audio_path": audio_path,
|
| 359 |
+
"model": "sven33/maze-whisper-3000",
|
| 360 |
"device": device,
|
| 361 |
"alignment_enabled": enable_alignment,
|
| 362 |
"has_word_timestamps": has_word_timestamps,
|