Update app.py
Browse files
app.py
CHANGED
@@ -122,43 +122,42 @@ class App:
|
|
122 |
value=self.whisper_inf.diarizer.get_device(),
|
123 |
interactive=True, visible=False)
|
124 |
|
125 |
-
with gr.
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
interactive=True, visible=False)
|
162 |
|
163 |
with gr.Accordion("Advanced processing options", open=False, visible=False):
|
164 |
nb_beam_size = gr.Number(label="Beam Size", value=whisper_params["beam_size"], precision=0, interactive=True,
|
|
|
122 |
value=self.whisper_inf.diarizer.get_device(),
|
123 |
interactive=True, visible=False)
|
124 |
|
125 |
+
with gr.Accordion("Voice Detection Filter (⚠ experimental)", open=False, visible=True):
|
126 |
+
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
|
127 |
+
interactive=True,
|
128 |
+
info="Enable to transcribe only detected voice parts")
|
129 |
+
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
130 |
+
value=vad_params["threshold"],
|
131 |
+
info="Lower it to be more sensitive to small sounds")
|
132 |
+
nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
|
133 |
+
value=vad_params["min_speech_duration_ms"],
|
134 |
+
info="Final speech chunks shorter than this time are thrown out")
|
135 |
+
nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
|
136 |
+
value=vad_params["max_speech_duration_s"],
|
137 |
+
info="Maximum duration of speech chunks in seconds")
|
138 |
+
nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
|
139 |
+
value=vad_params["min_silence_duration_ms"],
|
140 |
+
info="In the end of each speech chunk wait for this time"
|
141 |
+
" before separating it")
|
142 |
+
nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
|
143 |
+
info="Final speech chunks are padded by this time each side")
|
144 |
+
|
145 |
+
with gr.Accordion("Background Music Remover Filter (⚠ experimental)", open=False):
|
146 |
+
cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
|
147 |
+
info="Enable to remove background music by submodel before transcribing",
|
148 |
+
interactive=True)
|
149 |
+
dd_uvr_device = gr.Dropdown(label="Device",
|
150 |
+
value=self.whisper_inf.music_separator.device,
|
151 |
+
choices=self.whisper_inf.music_separator.available_devices,
|
152 |
+
interactive=True, visible=False)
|
153 |
+
dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
|
154 |
+
choices=self.whisper_inf.music_separator.available_models)
|
155 |
+
nb_uvr_segment_size = gr.Number(label="Segment Size", value=uvr_params["segment_size"], precision=0,
|
156 |
+
interactive=True, visible=False)
|
157 |
+
cb_uvr_save_file = gr.Checkbox(label="Save separated files to output", value=uvr_params["save_file"],
|
158 |
+
interactive=True, visible=False)
|
159 |
+
cb_uvr_enable_offload = gr.Checkbox(label="Offload sub model after removing background music",value=uvr_params["enable_offload"],
|
160 |
+
interactive=True, visible=False)
|
|
|
161 |
|
162 |
with gr.Accordion("Advanced processing options", open=False, visible=False):
|
163 |
nb_beam_size = gr.Number(label="Beam Size", value=whisper_params["beam_size"], precision=0, interactive=True,
|