Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,16 @@
|
|
1 |
-
import gradio as gr
|
2 |
import os
|
3 |
import torch
|
4 |
from huggingface_hub import InferenceClient
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# Import eSpeak TTS pipeline
|
7 |
from tts_cli import (
|
8 |
build_model as build_model_espeak,
|
@@ -111,85 +119,123 @@ def tts_inference(text, engine, model_file, voice_file, speed=1.0):
|
|
111 |
return (sr, audio) # Gradio expects (sample_rate, np_array)
|
112 |
|
113 |
|
114 |
-
# ---------------------------------------------------------------------
|
115 |
-
# Build Gradio App
|
116 |
-
# ---------------------------------------------------------------------
|
117 |
-
def create_gradio_app():
|
118 |
-
model_list = get_models()
|
119 |
-
voice_list = get_voices()
|
120 |
-
|
121 |
-
css = """
|
122 |
-
h4 {
|
123 |
-
text-align: center;
|
124 |
-
display:block;
|
125 |
-
}
|
126 |
-
h2 {
|
127 |
-
text-align: center;
|
128 |
-
display:block;
|
129 |
-
}
|
130 |
-
"""
|
131 |
-
with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
|
132 |
-
gr.Markdown("## LLAMA TTS DEMO - API - GRADIO VISUAL")
|
133 |
-
|
134 |
-
# Row 1: Text input
|
135 |
-
text_input = gr.Textbox(
|
136 |
-
label="Enter your question",
|
137 |
-
value="What is AI?",
|
138 |
-
lines=2,
|
139 |
-
)
|
140 |
|
141 |
-
# Row 2: Engine selection
|
142 |
-
# engine_dropdown = gr.Dropdown(
|
143 |
-
# choices=["espeak", "openphonemizer"],
|
144 |
-
# value="openphonemizer",
|
145 |
-
# label="Phonemizer",
|
146 |
-
# )
|
147 |
-
|
148 |
-
# Row 3: Model dropdown
|
149 |
-
# model_dropdown = gr.Dropdown(
|
150 |
-
# choices=model_list,
|
151 |
-
# value=model_list[0] if model_list else None,
|
152 |
-
# label="Model (.pth)",
|
153 |
-
# )
|
154 |
-
|
155 |
-
# Row 4: Voice dropdown
|
156 |
-
# voice_dropdown = gr.Dropdown(
|
157 |
-
# choices=voice_list,
|
158 |
-
# value=voice_list[0] if voice_list else None,
|
159 |
-
# label="Voice (.pt)",
|
160 |
-
# )
|
161 |
-
|
162 |
-
# Row 5: Speed slider
|
163 |
-
speed_slider = gr.Slider(
|
164 |
-
minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"
|
165 |
-
)
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
# Connect the button to our inference function
|
172 |
-
generate_btn.click(
|
173 |
-
fn=tts_inference,
|
174 |
-
inputs=[
|
175 |
-
text_input,
|
176 |
-
gr.State("openphonemizer"), #engine_dropdown,
|
177 |
-
gr.State("kokoro-v0_19.pth"), #model_dropdown,
|
178 |
-
gr.State("af_bella.pt"), #voice_dropdown,
|
179 |
-
speed_slider,
|
180 |
-
],
|
181 |
-
outputs=tts_output,
|
182 |
-
)
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
|
|
|
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
-
# ---------------------------------------------------------------------
|
191 |
-
# Main
|
192 |
-
# ---------------------------------------------------------------------
|
193 |
if __name__ == "__main__":
|
194 |
-
app =
|
195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import gradio as gr
|
2 |
import os
|
3 |
import torch
|
4 |
from huggingface_hub import InferenceClient
|
5 |
|
6 |
+
|
7 |
+
# Khurram
|
8 |
+
from fastapi import FastAPI, Query
|
9 |
+
from pydantic import BaseModel
|
10 |
+
import uvicorn
|
11 |
+
from fastapi.responses import JSONResponse
|
12 |
+
#################
|
13 |
+
|
14 |
# Import eSpeak TTS pipeline
|
15 |
from tts_cli import (
|
16 |
build_model as build_model_espeak,
|
|
|
119 |
return (sr, audio) # Gradio expects (sample_rate, np_array)
|
120 |
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
+
#------------------------------------------
|
125 |
+
# FAST API
|
126 |
+
#---------------
|
127 |
+
app = FastAPI()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
+
class TTSRequest(BaseModel):
|
130 |
+
text: str
|
131 |
+
engine: str
|
132 |
+
model_file: str
|
133 |
+
voice_file: str
|
134 |
+
speed: float = 1.0
|
135 |
|
136 |
+
@app.post("/tts")
|
137 |
+
def generate_tts(request: TTSRequest):
|
138 |
+
try:
|
139 |
+
sr, audio = tts_inference(
|
140 |
+
text="What is Deep SeEK? define in 2 lines",
|
141 |
+
engine="openphonemizer",
|
142 |
+
model_file="kokoro-v0_19.pth",
|
143 |
+
voice_file="af_bella.pt",
|
144 |
+
speed=1.0
|
145 |
+
)
|
146 |
+
|
147 |
+
return JSONResponse(content={
|
148 |
+
"sample_rate": sr,
|
149 |
+
"audio_tensor": audio.tolist()
|
150 |
+
})
|
151 |
+
except Exception as e:
|
152 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
153 |
|
|
|
|
|
|
|
154 |
if __name__ == "__main__":
|
155 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
156 |
+
|
157 |
+
|
158 |
+
###############################
|
159 |
+
|
160 |
+
# # ---------------------------------------------------------------------
|
161 |
+
# # Build Gradio App
|
162 |
+
# # ---------------------------------------------------------------------
|
163 |
+
# def create_gradio_app():
|
164 |
+
# model_list = get_models()
|
165 |
+
# voice_list = get_voices()
|
166 |
+
|
167 |
+
# css = """
|
168 |
+
# h4 {
|
169 |
+
# text-align: center;
|
170 |
+
# display:block;
|
171 |
+
# }
|
172 |
+
# h2 {
|
173 |
+
# text-align: center;
|
174 |
+
# display:block;
|
175 |
+
# }
|
176 |
+
# """
|
177 |
+
# with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
|
178 |
+
# gr.Markdown("## LLAMA TTS DEMO - API - GRADIO VISUAL")
|
179 |
+
|
180 |
+
# # Row 1: Text input
|
181 |
+
# text_input = gr.Textbox(
|
182 |
+
# label="Enter your question",
|
183 |
+
# value="What is AI?",
|
184 |
+
# lines=2,
|
185 |
+
# )
|
186 |
+
|
187 |
+
# # Row 2: Engine selection
|
188 |
+
# # engine_dropdown = gr.Dropdown(
|
189 |
+
# # choices=["espeak", "openphonemizer"],
|
190 |
+
# # value="openphonemizer",
|
191 |
+
# # label="Phonemizer",
|
192 |
+
# # )
|
193 |
+
|
194 |
+
# # Row 3: Model dropdown
|
195 |
+
# # model_dropdown = gr.Dropdown(
|
196 |
+
# # choices=model_list,
|
197 |
+
# # value=model_list[0] if model_list else None,
|
198 |
+
# # label="Model (.pth)",
|
199 |
+
# # )
|
200 |
+
|
201 |
+
# # Row 4: Voice dropdown
|
202 |
+
# # voice_dropdown = gr.Dropdown(
|
203 |
+
# # choices=voice_list,
|
204 |
+
# # value=voice_list[0] if voice_list else None,
|
205 |
+
# # label="Voice (.pt)",
|
206 |
+
# # )
|
207 |
+
|
208 |
+
# # Row 5: Speed slider
|
209 |
+
# speed_slider = gr.Slider(
|
210 |
+
# minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"
|
211 |
+
# )
|
212 |
+
|
213 |
+
# # Generate button + audio output
|
214 |
+
# generate_btn = gr.Button("Generate")
|
215 |
+
# tts_output = gr.Audio(label="TTS Output")
|
216 |
+
|
217 |
+
# # Connect the button to our inference function
|
218 |
+
# generate_btn.click(
|
219 |
+
# fn=tts_inference,
|
220 |
+
# inputs=[
|
221 |
+
# text_input,
|
222 |
+
# gr.State("openphonemizer"), #engine_dropdown,
|
223 |
+
# gr.State("kokoro-v0_19.pth"), #model_dropdown,
|
224 |
+
# gr.State("af_bella.pt"), #voice_dropdown,
|
225 |
+
# speed_slider,
|
226 |
+
# ],
|
227 |
+
# outputs=tts_output,
|
228 |
+
# )
|
229 |
+
|
230 |
+
# gr.Markdown(
|
231 |
+
# "#### LLAMA - TTS"
|
232 |
+
# )
|
233 |
+
# return demo
|
234 |
+
|
235 |
+
|
236 |
+
# # ---------------------------------------------------------------------
|
237 |
+
# # Main
|
238 |
+
# # ---------------------------------------------------------------------
|
239 |
+
# if __name__ == "__main__":
|
240 |
+
# app = create_gradio_app()
|
241 |
+
# app.launch()
|