File size: 1,388 Bytes
4300fed
610f79e
4300fed
61c12f6
7ffbb2d
 
4300fed
 
532dc11
610f79e
61c12f6
610f79e
 
1821dd9
61c12f6
1821dd9
61c12f6
 
610f79e
61c12f6
 
 
 
 
 
 
 
 
 
 
 
 
 
610f79e
4300fed
610f79e
4300fed
610f79e
 
4300fed
610f79e
70cbf96
881961f
532dc11
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
import spaces
import os, torch, io
import json
os.system('python -m unidic download')
# print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
from melo.api import TTS
import tempfile

@spaces.GPU
def synthesize(conversation_text, speed, progress=gr.Progress()):
    speed = 1.0
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    models = {
        'EN': TTS(language='EN', device=device),
    }
    speakers = ['EN-US', 'EN-Default']
    final_bio = io.BytesIO()

    conversation = json.loads(conversation_text)
    for i, turn in enumerate(conversation["conversation"]):
        bio = io.BytesIO()  
        text = turn["text"]  
        speaker = speakers[i % 2] 
        speaker_id = models['EN'].hps.data.spk2id[speaker]

    
        models['EN'].tts_to_file(text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format='wav')
        bio.seek(0)  
        final_bio.write(bio.read())

    final_bio.seek(0)
    return final_bio.getvalue()
  
with gr.Blocks() as demo:
    gr.Markdown('# Article to Podcast')
    with gr.Group():
        text = gr.Textbox(label="Article Link")
    btn = gr.Button('Podcasitfy', variant='primary')
    aud = gr.Audio(interactive=False)
    btn.click(synthesize, inputs=[text], outputs=[aud])

demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True)