File size: 4,609 Bytes
90fcad7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e0273c
 
 
 
 
90fcad7
 
 
6e0273c
90fcad7
6e0273c
90fcad7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfbb9d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a6e3f27
 
 
 
 
 
 
 
 
cfbb9d5
90fcad7
 
 
 
 
 
 
 
 
 
a6e3f27
cfbb9d5
90fcad7
 
cfbb9d5
90fcad7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfbb9d5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# ---------------------------------------------------------------
# app.py – “TTS Showcase” (static-audio-only Streamlit demo)
# ---------------------------------------------------------------
import os
import streamlit as st

# ---------- 1. Page-wide settings ----------
st.set_page_config(
    page_title="🔊 TTS Showcase",
    page_icon="🎧",
    layout="wide"
)

# ---------- 2. Demo metadata ----------
MODELS = {
    "nari-labs/Dia-1.6B"              : "Dia-1.6B",
    "hexgrad/Kokoro-82M"              : "Kokoro 82M",
    "sesame/csm-1b"                   : "CSM 1B",
    "SparkAudio/Spark-TTS-0.5B"       : "Spark-TTS 0.5B",
    "canopylabs/orpheus-3b-0.1-ft"    : "Orpheus3b-0.1-ft",
    "SWivid/F5-TTS"                   : "F5-TTS",
    "Zyphra/Zonos-v0.1-transformer"   : "Zonos v0.1",
    "coqui/XTTS-v2"                   : "XTTS-v2",
    "HKUSTAudio/Llasa-3B"             : "Llasa 3B",
    "amphion/MaskGCT"                 : "MaskGCT",
    "OuteAI/Llama-OuteTTS-1.0-1B"     : "Llama-OuteTTS-1.0-1B",
    "ByteDance/MegaTTS3"              : "MegaTTS 3"
}

# Folder that contains subfolders with the audio clips
SAMPLES_DIR = "samples"                      # <- change if yours is different
CLIP_NAME   = "generated-audio.wav"          # <- your agreed filename

# ---------- 3. Light CSS glow-up ----------
st.markdown(
    """
    <style>
    /* Wider central column & soft grey background */
    .block-container { padding-top: 2rem; }
    body              { background: #f5f7fa; }

    /* Simple card look */
    .tts-card {
        background: #ffffff;
        border-radius: 12px;
        padding: 1.2rem 1rem;
        box-shadow: 0 2px 8px rgba(0,0,0,.04);
        margin-bottom: 1.5rem;
    }
    .tts-title {
        font-weight: 600;
        font-size: 1.05rem;
        margin-bottom: .5rem;
    }
    audio { width: 100%; }   /* Full-width players */
    </style>
    """,
    unsafe_allow_html=True
)

st.markdown(
    """
    <style>
    /* (-- existing styles here --) */

    /* ---------- Inferless banner ---------- */
    #inferless-banner{
        display:flex;
        align-items:center;
        gap:.5rem;
        margin-top:2rem;
        font-size:.85rem;
        color:#555;
        opacity:.8;
    }
    #inferless-banner img{
        height:24px;            /* 👈 nice & small */
        width:24px;
        object-fit:contain;
        border-radius:4px;      /* optional: soft corners */
    }
    .inferless-text{
        letter-spacing:.2px;
        font-weight:500;
    }
    </style>
    """,
    unsafe_allow_html=True
)


st.markdown(
    """
    <div id="inferless-banner">
        <img src="https://i.tracxn.com/logo/company/1678863153264_9e6a9a4d-b955-42b3-895e-b94ade13c997.jpeg?format=webp&height=120&width=120" alt="Inferless Logo">
        <div class="inferless-text">Powered by Inferless</div>
    </div>
    """,
    unsafe_allow_html=True
)

# ---------- 4. Header & optional quick-filter ----------
st.title("🎙️ Open-Source Text to Speech Model Gallery")

with st.expander("ℹ️ About this demo", expanded=True):
    st.write(
        """
        * 12 popular TTS checkpoints, each with a single **_pre-synthesised_** sample  
        """
    )



filter_text = st.text_input(
    "Filter models… (e.g. “coqui” or “3B”)",
    placeholder="Search Model",
    label_visibility="collapsed"
).lower().strip()

# ---------- 5. Render cards in a responsive 3-column grid ----------
COLS_PER_ROW = 3
cols = st.columns(COLS_PER_ROW)

def repo_to_slug(repo: str) -> str:
    """huggingface/xxx -> huggingface_xxx (for folder naming)."""
    return repo.replace("/", "_")

visible_models = [
    (repo, nice_name)
    for repo, nice_name in MODELS.items()
    if filter_text in repo.lower() or filter_text in nice_name.lower()
]

if not visible_models:
    st.warning("No models match that filter.")
else:
    for idx, (repo, display_name) in enumerate(visible_models):
        with cols[idx % COLS_PER_ROW]:
            with st.container():
                st.markdown("<div class='tts-card'>", unsafe_allow_html=True)
                st.markdown(f"<div class='tts-title'>🎧 {display_name}</div>", unsafe_allow_html=True)

                # Resolved path: samples/<repo-as-slug>/generated-audio.wav
                audio_path = os.path.join(SAMPLES_DIR, repo_to_slug(repo), CLIP_NAME)
                if os.path.isfile(audio_path):
                    st.audio(audio_path)
                else:
                    st.error("Sample clip not found 🤷‍♂️")

                st.markdown("</div>", unsafe_allow_html=True)