# app.py import os import sys import pathlib import subprocess import gradio as gr from fam.llm.fast_inference import TTS # Clone the repository if not os.path.exists("metavoice-src"): subprocess.run(["git", "clone", "https://github.com/metavoiceio/metavoice-src.git"]) os.chdir("metavoice-src") # Install dependencies subprocess.run(["sudo", "apt", "install", "pipx", "-y"]) subprocess.run(["pipx", "install", "poetry"]) subprocess.run(["pipx", "run", "poetry", "install"]) subprocess.run(["pipx", "run", "poetry", "run", "pip", "install", "torch==2.2.1", "torchaudio==2.2.1"]) # Get the poetry environment path result = subprocess.run(["pipx", "run", "poetry", "env", "list"], capture_output=True, text=True) venv = result.stdout.split()[0] with open("poetry_env.txt", "w") as f: f.write(venv) # Add the virtual environment to the system path venv_path = pathlib.Path("poetry_env.txt").read_text().strip("\n") sys.path.append(f"{venv_path}/lib/python3.10/site-packages") # Initialize TTS tts = TTS() def text_to_speech(text): wav_file = tts.synthesise( text=text, spk_ref_path="assets/bria.mp3" # Specify your speaker reference file path ) return wav_file # Create Gradio interface interface = gr.Interface( fn=text_to_speech, inputs=gr.Textbox(lines=2, placeholder="Enter text here..."), outputs=gr.Audio(type="numpy", label="Generated Audio"), title="MetaVoice-1B Text to Speech", description="Enter text to convert it into speech using the MetaVoice-1B model." ) # Launch the Gradio interface interface.launch(share=True)