Podcastify

Running

File size: 2,688 Bytes

4300fed
610f79e
4300fed
61c12f6
c84c9fa
 
3661e80
7ffbb2d
4300fed
 
4fbe883
2227484
c84c9fa
532dc11
c84c9fa
3661e80
8bb652f
3661e80
 
c84c9fa
 
3661e80
 
a3bb4a3
610f79e
a3bb4a3
 
8bb652f
 
a3bb4a3
 
 
 
 
 
 
 
 
c84c9fa
 
 
 
 
 
a3bb4a3
c84c9fa
 
 
 
 
 
a3bb4a3
1821dd9
a3bb4a3
1821dd9
a3bb4a3
610f79e
4fbe883
c84c9fa
61c12f6
4fbe883
 
 
a3bb4a3
 
 
 
4fbe883
 
 
 
a3bb4a3
 
4fbe883
 
a3bb4a3
4300fed
a3bb4a3
 
 
4300fed
610f79e
a3bb4a3
4300fed
610f79e
70cbf96
c84c9fa

import gradio as gr
import spaces
import os, torch, io
import json
import re
# os.system("python -m unidic download")
import httpx
# print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
from melo.api import TTS
import tempfile
import wave
from pydub import AudioSegment
from gradio_client import Client

client = Client("eswardivi/AIO_Chat")
def fetch_text(url):
    print("Entered Webpage Extraction")
    prefix_url = "https://r.jina.ai/"
    url = prefix_url + url
    response = httpx.get(url, timeout=120.0)
    print("Response Received")
    return response.text


@spaces.GPU
def synthesize(article_url, progress=gr.Progress()):
    text = fetch_text(article_url)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    template = """
        {
            "conversation": [
                {"speaker": "", "text": ""},
                {"speaker": "", "text": ""}
            ]
        }
        """

    result = client.predict(
		f"{text} \n Convert the text as Elaborate Conversation between two people as Podcast.\nfollowing this template and return only JSON \n {template}",
		0.9,	
		True,	
		1024,
		api_name="/chat"
    )
    pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
    json_match = re.search(pattern, result)
    if json_match:
        conversation=json_match.group()
    else:
        conversation = template
    speed = 1.0
    models = {
        "EN": TTS(language="EN", device=device),
    }
    speakers = ["EN-Default", "EN-US"]

    combined_audio = AudioSegment.empty()
    conversation = json.loads(conversation)
    for i, turn in enumerate(conversation["conversation"]):
        bio = io.BytesIO()
        text = turn["text"]
        speaker = speakers[i % 2]
        speaker_id = models["EN"].hps.data.spk2id[speaker]
        models["EN"].tts_to_file(
            text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format="wav"
        )
        bio.seek(0)
        audio_segment = AudioSegment.from_file(bio, format="wav")
        combined_audio += audio_segment

    final_audio_path = "final.mp3"
    combined_audio.export(final_audio_path, format="mp3")
    return final_audio_path


with gr.Blocks() as demo:
    gr.Markdown("# Not Ready to USE")
    gr.Markdown("# Turn Any Article into Podcast")
    gr.Markdown("## Easily convert articles from URLs into listenable audio Podcast.")
    with gr.Group():
        text = gr.Textbox(label="Article Link")
    btn = gr.Button("Podcasitfy", variant="primary")
    aud = gr.Audio(interactive=False)
    btn.click(synthesize, inputs=[text], outputs=[aud])

demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True,share=True)