IT2091024v2 / app.py
Pijush2023's picture
Create app.py
2ea6305 verified
raw
history blame
1.59 kB
# app.py
import os
import sys
import pathlib
import subprocess
import gradio as gr
from fam.llm.fast_inference import TTS
# Clone the repository
if not os.path.exists("metavoice-src"):
subprocess.run(["git", "clone", "https://github.com/metavoiceio/metavoice-src.git"])
os.chdir("metavoice-src")
# Install dependencies
subprocess.run(["sudo", "apt", "install", "pipx", "-y"])
subprocess.run(["pipx", "install", "poetry"])
subprocess.run(["pipx", "run", "poetry", "install"])
subprocess.run(["pipx", "run", "poetry", "run", "pip", "install", "torch==2.2.1", "torchaudio==2.2.1"])
# Get the poetry environment path
result = subprocess.run(["pipx", "run", "poetry", "env", "list"], capture_output=True, text=True)
venv = result.stdout.split()[0]
with open("poetry_env.txt", "w") as f:
f.write(venv)
# Add the virtual environment to the system path
venv_path = pathlib.Path("poetry_env.txt").read_text().strip("\n")
sys.path.append(f"{venv_path}/lib/python3.10/site-packages")
# Initialize TTS
tts = TTS()
def text_to_speech(text):
wav_file = tts.synthesise(
text=text,
spk_ref_path="assets/bria.mp3" # Specify your speaker reference file path
)
return wav_file
# Create Gradio interface
interface = gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
outputs=gr.Audio(type="numpy", label="Generated Audio"),
title="MetaVoice-1B Text to Speech",
description="Enter text to convert it into speech using the MetaVoice-1B model."
)
# Launch the Gradio interface
interface.launch(share=True)