Spaces:

rbgo
/

Open-Source-TTS-Gallary

Running

File size: 3,916 Bytes

90fcad7

# ---------------------------------------------------------------
# app.py – “TTS Showcase” (static-audio-only Streamlit demo)
# ---------------------------------------------------------------
import os
import streamlit as st

# ---------- 1. Page-wide settings ----------
st.set_page_config(
    page_title="🔊 TTS Showcase",
    page_icon="🎧",
    layout="wide"
)

# ---------- 2. Demo metadata ----------
MODELS = {
    "nari-labs/Dia-1.6B"              : "Dia-1.6 B",
    "hexgrad/Kokoro-82M"              : "Kokoro 82 M",
    "sesame/csm-1b"                   : "CSM 1 B",
    "SparkAudio/Spark-TTS-0.5B"       : "Spark-TTS 0.5 B",
    "canopylabs/orpheus-3b-0.1-ft"    : "Orpheus 3 B (FT)",
    "SWivid/F5-TTS"                   : "F5-TTS",
    "Zyphra/Zonos-v0.1-transformer"   : "Zonos v0.1",
    "coqui/XTTS-v2"                   : "XTTS-v2",
    "HKUSTAudio/Llasa-3B"             : "Llasa 3 B",
    "amphion/MaskGCT"                 : "MaskGCT",
    "OuteAI/Llama-OuteTTS-1.0-1B"     : "Llama-Oute 1 B",
    "ByteDance/MegaTTS3"              : "MegaTTS 3"
}

# Folder that contains subfolders with the audio clips
SAMPLES_DIR = "samples"                      # <- change if yours is different
CLIP_NAME   = "generated-audio.wav"          # <- your agreed filename

# ---------- 3. Light CSS glow-up ----------
st.markdown(
    """
    <style>
    /* Wider central column & soft grey background */
    .block-container { padding-top: 2rem; }
    body              { background: #f5f7fa; }

    /* Simple card look */
    .tts-card {
        background: #ffffff;
        border-radius: 12px;
        padding: 1.2rem 1rem;
        box-shadow: 0 2px 8px rgba(0,0,0,.04);
        margin-bottom: 1.5rem;
    }
    .tts-title {
        font-weight: 600;
        font-size: 1.05rem;
        margin-bottom: .5rem;
    }
    audio { width: 100%; }   /* Full-width players */
    </style>
    """,
    unsafe_allow_html=True
)

# ---------- 4. Header & optional quick-filter ----------
st.title("🎙️ Open-Source Text to Speech Model Gallery")

with st.expander("ℹ️ About this demo", expanded=True):
    st.write(
        """
        * 12 popular TTS checkpoints, each with a single **_pre-synthesised_** sample  
        * Nothing heavy runs in your browser – it’s basically an audio gallery  
        * All clips should live under `samples/<repo-slug>/generated-audio.wav`
        """
    )

filter_text = st.text_input(
    "Filter models… (e.g. “coqui” or “3B”)",
    placeholder="Leave blank to show all",
    label_visibility="collapsed"
).lower().strip()

# ---------- 5. Render cards in a responsive 3-column grid ----------
COLS_PER_ROW = 3
cols = st.columns(COLS_PER_ROW)

def repo_to_slug(repo: str) -> str:
    """huggingface/xxx -> huggingface_xxx (for folder naming)."""
    return repo.replace("/", "_")

visible_models = [
    (repo, nice_name)
    for repo, nice_name in MODELS.items()
    if filter_text in repo.lower() or filter_text in nice_name.lower()
]

if not visible_models:
    st.warning("No models match that filter.")
else:
    for idx, (repo, display_name) in enumerate(visible_models):
        with cols[idx % COLS_PER_ROW]:
            with st.container():
                st.markdown("<div class='tts-card'>", unsafe_allow_html=True)
                st.markdown(f"<div class='tts-title'>🎧 {display_name}</div>", unsafe_allow_html=True)

                # Resolved path: samples/<repo-as-slug>/generated-audio.wav
                audio_path = os.path.join(SAMPLES_DIR, repo_to_slug(repo), CLIP_NAME)
                if os.path.isfile(audio_path):
                    st.audio(audio_path)
                else:
                    st.error("Sample clip not found 🤷‍♂️")

                st.markdown("</div>", unsafe_allow_html=True)

# ---------- 6. Footer ----------
st.markdown("---")
st.caption("Crafted with ❤️ using Streamlit 1.35")