File size: 1,593 Bytes
2ea6305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# app.py

import os
import sys
import pathlib
import subprocess
import gradio as gr
from fam.llm.fast_inference import TTS

# Clone the repository
if not os.path.exists("metavoice-src"):
    subprocess.run(["git", "clone", "https://github.com/metavoiceio/metavoice-src.git"])
os.chdir("metavoice-src")

# Install dependencies
subprocess.run(["sudo", "apt", "install", "pipx", "-y"])
subprocess.run(["pipx", "install", "poetry"])
subprocess.run(["pipx", "run", "poetry", "install"])
subprocess.run(["pipx", "run", "poetry", "run", "pip", "install", "torch==2.2.1", "torchaudio==2.2.1"])

# Get the poetry environment path
result = subprocess.run(["pipx", "run", "poetry", "env", "list"], capture_output=True, text=True)
venv = result.stdout.split()[0]
with open("poetry_env.txt", "w") as f:
    f.write(venv)

# Add the virtual environment to the system path
venv_path = pathlib.Path("poetry_env.txt").read_text().strip("\n")
sys.path.append(f"{venv_path}/lib/python3.10/site-packages")

# Initialize TTS
tts = TTS()

def text_to_speech(text):
    wav_file = tts.synthesise(
        text=text,
        spk_ref_path="assets/bria.mp3"  # Specify your speaker reference file path
    )
    return wav_file

# Create Gradio interface
interface = gr.Interface(
    fn=text_to_speech,
    inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
    outputs=gr.Audio(type="numpy", label="Generated Audio"),
    title="MetaVoice-1B Text to Speech",
    description="Enter text to convert it into speech using the MetaVoice-1B model."
)

# Launch the Gradio interface
interface.launch(share=True)