Spaces:
Configuration error
Configuration error
Fedir Zadniprovskyi
commited on
Commit
·
526f427
1
Parent(s):
7785332
fix: gradio app breaks on arm
Browse files
src/faster_whisper_server/gradio_app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from collections.abc import AsyncGenerator
|
| 2 |
from pathlib import Path
|
|
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import httpx
|
|
@@ -9,14 +10,6 @@ from openai import AsyncOpenAI
|
|
| 9 |
from faster_whisper_server.config import Config, Task
|
| 10 |
from faster_whisper_server.hf_utils import PiperModel
|
| 11 |
|
| 12 |
-
# FIX: this won't work on ARM
|
| 13 |
-
from faster_whisper_server.routers.speech import (
|
| 14 |
-
DEFAULT_VOICE,
|
| 15 |
-
MAX_SAMPLE_RATE,
|
| 16 |
-
MIN_SAMPLE_RATE,
|
| 17 |
-
SUPPORTED_RESPONSE_FORMATS,
|
| 18 |
-
)
|
| 19 |
-
|
| 20 |
TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions"
|
| 21 |
TRANSLATION_ENDPOINT = "/v1/audio/translations"
|
| 22 |
TIMEOUT_SECONDS = 180
|
|
@@ -163,13 +156,20 @@ def create_gradio_demo(config: Config) -> gr.Blocks: # noqa: C901, PLR0915
|
|
| 163 |
)
|
| 164 |
|
| 165 |
with gr.Tab(label="Speech Generation"):
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
The last part of the voice name is the quality (x_low, low, medium, high).
|
| 174 |
Each quality has a different default sample rate:
|
| 175 |
- x_low: 16000 Hz
|
|
@@ -177,32 +177,34 @@ Each quality has a different default sample rate:
|
|
| 177 |
- medium: 22050 Hz
|
| 178 |
- high: 22050 Hz
|
| 179 |
""",
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
Setting this will resample the generated audio to the desired sample rate.
|
| 193 |
You may want to set this if you are going to use voices of different qualities but want to keep the same sample rate.
|
| 194 |
Default: None (No resampling)
|
| 195 |
""",
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
demo.load(update_whisper_model_dropdown, inputs=None, outputs=model_dropdown)
|
| 207 |
-
demo.load(update_piper_voices_dropdown, inputs=None, outputs=voice_dropdown)
|
| 208 |
return demo
|
|
|
|
| 1 |
from collections.abc import AsyncGenerator
|
| 2 |
from pathlib import Path
|
| 3 |
+
import platform
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
import httpx
|
|
|
|
| 10 |
from faster_whisper_server.config import Config, Task
|
| 11 |
from faster_whisper_server.hf_utils import PiperModel
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions"
|
| 14 |
TRANSLATION_ENDPOINT = "/v1/audio/translations"
|
| 15 |
TIMEOUT_SECONDS = 180
|
|
|
|
| 156 |
)
|
| 157 |
|
| 158 |
with gr.Tab(label="Speech Generation"):
|
| 159 |
+
if platform.machine() != "x86_64":
|
| 160 |
+
from faster_whisper_server.routers.speech import (
|
| 161 |
+
DEFAULT_VOICE,
|
| 162 |
+
MAX_SAMPLE_RATE,
|
| 163 |
+
MIN_SAMPLE_RATE,
|
| 164 |
+
SUPPORTED_RESPONSE_FORMATS,
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
text = gr.Textbox(label="Input Text")
|
| 168 |
+
voice_dropdown = gr.Dropdown(
|
| 169 |
+
choices=["en_US-amy-medium"],
|
| 170 |
+
label="Voice",
|
| 171 |
+
value="en_US-amy-medium",
|
| 172 |
+
info="""
|
| 173 |
The last part of the voice name is the quality (x_low, low, medium, high).
|
| 174 |
Each quality has a different default sample rate:
|
| 175 |
- x_low: 16000 Hz
|
|
|
|
| 177 |
- medium: 22050 Hz
|
| 178 |
- high: 22050 Hz
|
| 179 |
""",
|
| 180 |
+
)
|
| 181 |
+
response_fromat_dropdown = gr.Dropdown(
|
| 182 |
+
choices=SUPPORTED_RESPONSE_FORMATS,
|
| 183 |
+
label="Response Format",
|
| 184 |
+
value="wav",
|
| 185 |
+
)
|
| 186 |
+
speed_slider = gr.Slider(minimum=0.25, maximum=4.0, step=0.05, label="Speed", value=1.0)
|
| 187 |
+
sample_rate_slider = gr.Number(
|
| 188 |
+
minimum=MIN_SAMPLE_RATE,
|
| 189 |
+
maximum=MAX_SAMPLE_RATE,
|
| 190 |
+
label="Desired Sample Rate",
|
| 191 |
+
info="""
|
| 192 |
Setting this will resample the generated audio to the desired sample rate.
|
| 193 |
You may want to set this if you are going to use voices of different qualities but want to keep the same sample rate.
|
| 194 |
Default: None (No resampling)
|
| 195 |
""",
|
| 196 |
+
value=lambda: None,
|
| 197 |
+
)
|
| 198 |
+
button = gr.Button("Generate Speech")
|
| 199 |
+
output = gr.Audio(type="filepath")
|
| 200 |
+
button.click(
|
| 201 |
+
handle_audio_speech,
|
| 202 |
+
[text, voice_dropdown, response_fromat_dropdown, speed_slider, sample_rate_slider],
|
| 203 |
+
output,
|
| 204 |
+
)
|
| 205 |
+
demo.load(update_piper_voices_dropdown, inputs=None, outputs=voice_dropdown)
|
| 206 |
+
else:
|
| 207 |
+
gr.Textbox("Speech generation is only supported on x86_64 machines.")
|
| 208 |
|
| 209 |
demo.load(update_whisper_model_dropdown, inputs=None, outputs=model_dropdown)
|
|
|
|
| 210 |
return demo
|