Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,180 Bytes
4300fed 610f79e 4300fed 61c12f6 a3bb4a3 3661e80 a3bb4a3 7ffbb2d 4300fed 4fbe883 2227484 a3bb4a3 532dc11 3661e80 a3bb4a3 610f79e a3bb4a3 1821dd9 a3bb4a3 1821dd9 a3bb4a3 610f79e 4fbe883 a3bb4a3 61c12f6 4fbe883 a3bb4a3 4fbe883 a3bb4a3 4fbe883 a3bb4a3 4300fed a3bb4a3 4300fed 610f79e a3bb4a3 4300fed 610f79e 70cbf96 881961f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import gradio as gr
import spaces
import os, torch, io
import json
os.system("python -m unidic download")
import httpx
# print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
from melo.api import TTS
import tempfile
import wave
from pydub import AudioSegment
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TextIteratorStreamer,
BitsAndBytesConfig,
)
quantization_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(
"NousResearch/Hermes-2-Pro-Llama-3-8B",
quantization_config=quantization_config,
token=token,
)
tok = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B", token=token)
terminators = [tok.eos_token_id, tok.convert_tokens_to_ids("<|eot_id|>")]
def fetch_text(url):
prefix_url = "https://r.jina.ai/"
url = prefix_url + url
response = httpx.get(url, timeout=60.0)
return response.text
@spaces.GPU
def synthesize(article_url, progress=gr.Progress()):
text = fetch_text(article_url)
template = """
{
"conversation": [
{"speaker": "", "text": ""},
{"speaker": "", "text": ""}
]
}
"""
chat = [
{
"role": "user",
"content": f"{text} \n Convert the text as Elaborate Conversation between two people as Podcast.\nfollowing this template \n {template}",
}
]
messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
model_inputs = tok([messages], return_tensors="pt").to(device)
text = model.generate(
model_inputs,
max_new_tokens=1024,
do_sample=True,
temperature=0.9,
eos_token_id=terminators,
)
speed = 1.0
device = "cuda" if torch.cuda.is_available() else "cpu"
models = {
"EN": TTS(language="EN", device=device),
}
speakers = ["EN-Default", "EN-US"]
combined_audio = AudioSegment.empty()
conversation = json.loads(text)
for i, turn in enumerate(conversation["conversation"]):
bio = io.BytesIO()
text = turn["text"]
speaker = speakers[i % 2]
speaker_id = models["EN"].hps.data.spk2id[speaker]
models["EN"].tts_to_file(
text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format="wav"
)
bio.seek(0)
audio_segment = AudioSegment.from_file(bio, format="wav")
combined_audio += audio_segment
final_audio_path = "final.mp3"
combined_audio.export(final_audio_path, format="mp3")
return final_audio_path
with gr.Blocks() as demo:
gr.Markdown("# Not Ready to USE")
gr.Markdown("# Turn Any Article into Podcast")
gr.Markdown("## Easily convert articles from URLs into listenable audio Podcast.")
with gr.Group():
text = gr.Textbox(label="Article Link")
btn = gr.Button("Podcasitfy", variant="primary")
aud = gr.Audio(interactive=False)
btn.click(synthesize, inputs=[text], outputs=[aud])
demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True)
|