khurrameycon commited on
Commit
c9b6f29
·
verified ·
1 Parent(s): 93b51e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -77
app.py CHANGED
@@ -1,8 +1,16 @@
1
- import gradio as gr
2
  import os
3
  import torch
4
  from huggingface_hub import InferenceClient
5
 
 
 
 
 
 
 
 
 
6
  # Import eSpeak TTS pipeline
7
  from tts_cli import (
8
  build_model as build_model_espeak,
@@ -111,85 +119,123 @@ def tts_inference(text, engine, model_file, voice_file, speed=1.0):
111
  return (sr, audio) # Gradio expects (sample_rate, np_array)
112
 
113
 
114
- # ---------------------------------------------------------------------
115
- # Build Gradio App
116
- # ---------------------------------------------------------------------
117
- def create_gradio_app():
118
- model_list = get_models()
119
- voice_list = get_voices()
120
-
121
- css = """
122
- h4 {
123
- text-align: center;
124
- display:block;
125
- }
126
- h2 {
127
- text-align: center;
128
- display:block;
129
- }
130
- """
131
- with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
132
- gr.Markdown("## LLAMA TTS DEMO - API - GRADIO VISUAL")
133
-
134
- # Row 1: Text input
135
- text_input = gr.Textbox(
136
- label="Enter your question",
137
- value="What is AI?",
138
- lines=2,
139
- )
140
 
141
- # Row 2: Engine selection
142
- # engine_dropdown = gr.Dropdown(
143
- # choices=["espeak", "openphonemizer"],
144
- # value="openphonemizer",
145
- # label="Phonemizer",
146
- # )
147
-
148
- # Row 3: Model dropdown
149
- # model_dropdown = gr.Dropdown(
150
- # choices=model_list,
151
- # value=model_list[0] if model_list else None,
152
- # label="Model (.pth)",
153
- # )
154
-
155
- # Row 4: Voice dropdown
156
- # voice_dropdown = gr.Dropdown(
157
- # choices=voice_list,
158
- # value=voice_list[0] if voice_list else None,
159
- # label="Voice (.pt)",
160
- # )
161
-
162
- # Row 5: Speed slider
163
- speed_slider = gr.Slider(
164
- minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"
165
- )
166
 
167
- # Generate button + audio output
168
- generate_btn = gr.Button("Generate")
169
- tts_output = gr.Audio(label="TTS Output")
170
-
171
- # Connect the button to our inference function
172
- generate_btn.click(
173
- fn=tts_inference,
174
- inputs=[
175
- text_input,
176
- gr.State("openphonemizer"), #engine_dropdown,
177
- gr.State("kokoro-v0_19.pth"), #model_dropdown,
178
- gr.State("af_bella.pt"), #voice_dropdown,
179
- speed_slider,
180
- ],
181
- outputs=tts_output,
182
- )
183
 
184
- gr.Markdown(
185
- "#### LLAMA - TTS"
186
- )
187
- return demo
 
 
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- # ---------------------------------------------------------------------
191
- # Main
192
- # ---------------------------------------------------------------------
193
  if __name__ == "__main__":
194
- app = create_gradio_app()
195
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import gradio as gr
2
  import os
3
  import torch
4
  from huggingface_hub import InferenceClient
5
 
6
+
7
+ # Khurram
8
+ from fastapi import FastAPI, Query
9
+ from pydantic import BaseModel
10
+ import uvicorn
11
+ from fastapi.responses import JSONResponse
12
+ #################
13
+
14
  # Import eSpeak TTS pipeline
15
  from tts_cli import (
16
  build_model as build_model_espeak,
 
119
  return (sr, audio) # Gradio expects (sample_rate, np_array)
120
 
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
+ #------------------------------------------
125
+ # FAST API
126
+ #---------------
127
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ class TTSRequest(BaseModel):
130
+ text: str
131
+ engine: str
132
+ model_file: str
133
+ voice_file: str
134
+ speed: float = 1.0
135
 
136
+ @app.post("/tts")
137
+ def generate_tts(request: TTSRequest):
138
+ try:
139
+ sr, audio = tts_inference(
140
+ text="What is Deep SeEK? define in 2 lines",
141
+ engine="openphonemizer",
142
+ model_file="kokoro-v0_19.pth",
143
+ voice_file="af_bella.pt",
144
+ speed=1.0
145
+ )
146
+
147
+ return JSONResponse(content={
148
+ "sample_rate": sr,
149
+ "audio_tensor": audio.tolist()
150
+ })
151
+ except Exception as e:
152
+ return JSONResponse(content={"error": str(e)}, status_code=500)
153
 
 
 
 
154
  if __name__ == "__main__":
155
+ uvicorn.run(app, host="0.0.0.0", port=8000)
156
+
157
+
158
+ ###############################
159
+
160
+ # # ---------------------------------------------------------------------
161
+ # # Build Gradio App
162
+ # # ---------------------------------------------------------------------
163
+ # def create_gradio_app():
164
+ # model_list = get_models()
165
+ # voice_list = get_voices()
166
+
167
+ # css = """
168
+ # h4 {
169
+ # text-align: center;
170
+ # display:block;
171
+ # }
172
+ # h2 {
173
+ # text-align: center;
174
+ # display:block;
175
+ # }
176
+ # """
177
+ # with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
178
+ # gr.Markdown("## LLAMA TTS DEMO - API - GRADIO VISUAL")
179
+
180
+ # # Row 1: Text input
181
+ # text_input = gr.Textbox(
182
+ # label="Enter your question",
183
+ # value="What is AI?",
184
+ # lines=2,
185
+ # )
186
+
187
+ # # Row 2: Engine selection
188
+ # # engine_dropdown = gr.Dropdown(
189
+ # # choices=["espeak", "openphonemizer"],
190
+ # # value="openphonemizer",
191
+ # # label="Phonemizer",
192
+ # # )
193
+
194
+ # # Row 3: Model dropdown
195
+ # # model_dropdown = gr.Dropdown(
196
+ # # choices=model_list,
197
+ # # value=model_list[0] if model_list else None,
198
+ # # label="Model (.pth)",
199
+ # # )
200
+
201
+ # # Row 4: Voice dropdown
202
+ # # voice_dropdown = gr.Dropdown(
203
+ # # choices=voice_list,
204
+ # # value=voice_list[0] if voice_list else None,
205
+ # # label="Voice (.pt)",
206
+ # # )
207
+
208
+ # # Row 5: Speed slider
209
+ # speed_slider = gr.Slider(
210
+ # minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"
211
+ # )
212
+
213
+ # # Generate button + audio output
214
+ # generate_btn = gr.Button("Generate")
215
+ # tts_output = gr.Audio(label="TTS Output")
216
+
217
+ # # Connect the button to our inference function
218
+ # generate_btn.click(
219
+ # fn=tts_inference,
220
+ # inputs=[
221
+ # text_input,
222
+ # gr.State("openphonemizer"), #engine_dropdown,
223
+ # gr.State("kokoro-v0_19.pth"), #model_dropdown,
224
+ # gr.State("af_bella.pt"), #voice_dropdown,
225
+ # speed_slider,
226
+ # ],
227
+ # outputs=tts_output,
228
+ # )
229
+
230
+ # gr.Markdown(
231
+ # "#### LLAMA - TTS"
232
+ # )
233
+ # return demo
234
+
235
+
236
+ # # ---------------------------------------------------------------------
237
+ # # Main
238
+ # # ---------------------------------------------------------------------
239
+ # if __name__ == "__main__":
240
+ # app = create_gradio_app()
241
+ # app.launch()