Spaces:
Running
Running
jhj0517
commited on
Commit
·
e3a6426
1
Parent(s):
3e8d967
add parameters
Browse files- app.py +15 -3
- modules/faster_whisper_inference.py +2 -0
- modules/whisper_Inference.py +2 -0
- modules/whisper_data_class.py +8 -0
app.py
CHANGED
|
@@ -63,6 +63,8 @@ class App:
|
|
| 63 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
| 64 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
| 65 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
|
|
|
|
|
|
| 66 |
with gr.Row():
|
| 67 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 68 |
with gr.Row():
|
|
@@ -77,7 +79,9 @@ class App:
|
|
| 77 |
beam_size=nb_beam_size,
|
| 78 |
log_prob_threshold=nb_log_prob_threshold,
|
| 79 |
no_speech_threshold=nb_no_speech_threshold,
|
| 80 |
-
compute_type=dd_compute_type
|
|
|
|
|
|
|
| 81 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
| 82 |
inputs=params + whisper_params.to_list(),
|
| 83 |
outputs=[tb_indicator, files_subtitles])
|
|
@@ -109,6 +113,8 @@ class App:
|
|
| 109 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
| 110 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
| 111 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
|
|
|
|
|
|
| 112 |
with gr.Row():
|
| 113 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 114 |
with gr.Row():
|
|
@@ -123,7 +129,9 @@ class App:
|
|
| 123 |
beam_size=nb_beam_size,
|
| 124 |
log_prob_threshold=nb_log_prob_threshold,
|
| 125 |
no_speech_threshold=nb_no_speech_threshold,
|
| 126 |
-
compute_type=dd_compute_type
|
|
|
|
|
|
|
| 127 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
| 128 |
inputs=params + whisper_params.to_list(),
|
| 129 |
outputs=[tb_indicator, files_subtitles])
|
|
@@ -148,6 +156,8 @@ class App:
|
|
| 148 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
| 149 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
| 150 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
|
|
|
|
|
|
| 151 |
with gr.Row():
|
| 152 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 153 |
with gr.Row():
|
|
@@ -162,7 +172,9 @@ class App:
|
|
| 162 |
beam_size=nb_beam_size,
|
| 163 |
log_prob_threshold=nb_log_prob_threshold,
|
| 164 |
no_speech_threshold=nb_no_speech_threshold,
|
| 165 |
-
compute_type=dd_compute_type
|
|
|
|
|
|
|
| 166 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
| 167 |
inputs=params + whisper_params.to_list(),
|
| 168 |
outputs=[tb_indicator, files_subtitles])
|
|
|
|
| 63 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
| 64 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
| 65 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
| 66 |
+
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
| 67 |
+
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
| 68 |
with gr.Row():
|
| 69 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 70 |
with gr.Row():
|
|
|
|
| 79 |
beam_size=nb_beam_size,
|
| 80 |
log_prob_threshold=nb_log_prob_threshold,
|
| 81 |
no_speech_threshold=nb_no_speech_threshold,
|
| 82 |
+
compute_type=dd_compute_type,
|
| 83 |
+
best_of=nb_best_of,
|
| 84 |
+
patience=nb_patience)
|
| 85 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
| 86 |
inputs=params + whisper_params.to_list(),
|
| 87 |
outputs=[tb_indicator, files_subtitles])
|
|
|
|
| 113 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
| 114 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
| 115 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
| 116 |
+
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
| 117 |
+
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
| 118 |
with gr.Row():
|
| 119 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 120 |
with gr.Row():
|
|
|
|
| 129 |
beam_size=nb_beam_size,
|
| 130 |
log_prob_threshold=nb_log_prob_threshold,
|
| 131 |
no_speech_threshold=nb_no_speech_threshold,
|
| 132 |
+
compute_type=dd_compute_type,
|
| 133 |
+
best_of=nb_best_of,
|
| 134 |
+
patience=nb_patience)
|
| 135 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
| 136 |
inputs=params + whisper_params.to_list(),
|
| 137 |
outputs=[tb_indicator, files_subtitles])
|
|
|
|
| 156 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
| 157 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
| 158 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
| 159 |
+
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
| 160 |
+
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
| 161 |
with gr.Row():
|
| 162 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
| 163 |
with gr.Row():
|
|
|
|
| 172 |
beam_size=nb_beam_size,
|
| 173 |
log_prob_threshold=nb_log_prob_threshold,
|
| 174 |
no_speech_threshold=nb_no_speech_threshold,
|
| 175 |
+
compute_type=dd_compute_type,
|
| 176 |
+
best_of=nb_best_of,
|
| 177 |
+
patience=nb_patience)
|
| 178 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
| 179 |
inputs=params + whisper_params.to_list(),
|
| 180 |
outputs=[tb_indicator, files_subtitles])
|
modules/faster_whisper_inference.py
CHANGED
|
@@ -264,6 +264,8 @@ class FasterWhisperInference(BaseInterface):
|
|
| 264 |
beam_size=params.beam_size,
|
| 265 |
log_prob_threshold=params.log_prob_threshold,
|
| 266 |
no_speech_threshold=params.no_speech_threshold,
|
|
|
|
|
|
|
| 267 |
)
|
| 268 |
progress(0, desc="Loading audio..")
|
| 269 |
|
|
|
|
| 264 |
beam_size=params.beam_size,
|
| 265 |
log_prob_threshold=params.log_prob_threshold,
|
| 266 |
no_speech_threshold=params.no_speech_threshold,
|
| 267 |
+
best_of=params.best_of,
|
| 268 |
+
patience=params.patience
|
| 269 |
)
|
| 270 |
progress(0, desc="Loading audio..")
|
| 271 |
|
modules/whisper_Inference.py
CHANGED
|
@@ -255,6 +255,8 @@ class WhisperInference(BaseInterface):
|
|
| 255 |
no_speech_threshold=params.no_speech_threshold,
|
| 256 |
task="translate" if params.is_translate and self.current_model_size in self.translatable_model else "transcribe",
|
| 257 |
fp16=True if params.compute_type == "float16" else False,
|
|
|
|
|
|
|
| 258 |
progress_callback=progress_callback)["segments"]
|
| 259 |
elapsed_time = time.time() - start_time
|
| 260 |
|
|
|
|
| 255 |
no_speech_threshold=params.no_speech_threshold,
|
| 256 |
task="translate" if params.is_translate and self.current_model_size in self.translatable_model else "transcribe",
|
| 257 |
fp16=True if params.compute_type == "float16" else False,
|
| 258 |
+
best_of=params.best_of,
|
| 259 |
+
patience=params.patience,
|
| 260 |
progress_callback=progress_callback)["segments"]
|
| 261 |
elapsed_time = time.time() - start_time
|
| 262 |
|
modules/whisper_data_class.py
CHANGED
|
@@ -11,6 +11,8 @@ class WhisperGradioComponents:
|
|
| 11 |
log_prob_threshold: gr.Number
|
| 12 |
no_speech_threshold: gr.Number
|
| 13 |
compute_type: gr.Dropdown
|
|
|
|
|
|
|
| 14 |
"""
|
| 15 |
A data class to pass Gradio components to the function before Gradio pre-processing.
|
| 16 |
See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
|
|
@@ -35,6 +37,10 @@ class WhisperGradioComponents:
|
|
| 35 |
compute_type: gr.Dropdown
|
| 36 |
compute type for transcription.
|
| 37 |
see more info : https://opennmt.net/CTranslate2/quantization.html
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
"""
|
| 39 |
|
| 40 |
def to_list(self) -> list:
|
|
@@ -74,6 +80,8 @@ class WhisperValues:
|
|
| 74 |
log_prob_threshold: float
|
| 75 |
no_speech_threshold: float
|
| 76 |
compute_type: str
|
|
|
|
|
|
|
| 77 |
"""
|
| 78 |
A data class to use Whisper parameters in the function after Gradio pre-processing.
|
| 79 |
See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components
|
|
|
|
| 11 |
log_prob_threshold: gr.Number
|
| 12 |
no_speech_threshold: gr.Number
|
| 13 |
compute_type: gr.Dropdown
|
| 14 |
+
best_of: gr.Number
|
| 15 |
+
patience: gr.Number
|
| 16 |
"""
|
| 17 |
A data class to pass Gradio components to the function before Gradio pre-processing.
|
| 18 |
See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
|
|
|
|
| 37 |
compute_type: gr.Dropdown
|
| 38 |
compute type for transcription.
|
| 39 |
see more info : https://opennmt.net/CTranslate2/quantization.html
|
| 40 |
+
best_of: gr.Number
|
| 41 |
+
Number of candidates when sampling with non-zero temperature.
|
| 42 |
+
patience: gr.Number
|
| 43 |
+
Beam search patience factor.
|
| 44 |
"""
|
| 45 |
|
| 46 |
def to_list(self) -> list:
|
|
|
|
| 80 |
log_prob_threshold: float
|
| 81 |
no_speech_threshold: float
|
| 82 |
compute_type: str
|
| 83 |
+
best_of: int
|
| 84 |
+
patience: float
|
| 85 |
"""
|
| 86 |
A data class to use Whisper parameters in the function after Gradio pre-processing.
|
| 87 |
See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components
|