Spaces:

navidved
/

gooya-asr

Running

File size: 4,805 Bytes

f39366f
bda9ee3
f118118
99106b6
 
9c7cb99
bda9ee3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99106b6
bda9ee3
 
5b1e694
55c61cc
1967b9f
 
 
 
 
 
99106b6
5b1e694
1967b9f
 
 
5b1e694
99106b6
1967b9f
 
f39366f
1967b9f
 
 
 
bda9ee3
1967b9f
 
 
bda9ee3
1967b9f
99106b6
 
 
 
1967b9f
 
 
99106b6
1967b9f
 
99106b6
1967b9f
99106b6
1967b9f
 
 
55c61cc
f118118
 
1967b9f
 
 
 
 
 
f118118
99106b6
1967b9f
 
 
 
 
 
 
f118118
 
99106b6
 
f118118
bda9ee3
 
 
99106b6
bda9ee3
1967b9f
bda9ee3
248ce4e
99106b6
bda9ee3
f39366f
 
 
1967b9f
bda9ee3
99106b6
 
1967b9f
 
f118118
99106b6
bda9ee3
1967b9f
bda9ee3
 
 
 
 
 
1967b9f
f39366f
1967b9f
f39366f
99106b6
1967b9f
f118118
 
99106b6

import gradio as gr
import requests, os, time, pathlib

ASR_API_URL = os.getenv("ASR_API_URL")
AUTH_TOKEN  = os.getenv("AUTH_TOKEN")

# ---------- کمکى: استخراج مسیر واقعی فایل ----------
def extract_path(audio_value):
    """
    audio_value می‌تواند None، رشته یا دیکشنری باشد.
    خروجی: مسیر فایل یا None
    """
    if not audio_value:
        return None
    if isinstance(audio_value, dict):
        return audio_value.get("path")            # گرادیو ≥ 4
    # برای سازگاری با نسخه‌های قدیم‌تر که str برمی‌گردانند
    return audio_value if isinstance(audio_value, (str, pathlib.Path)) else None

# ---------- پردازش ----------
def transcribe_audio(audio_value):
    file_path = extract_path(audio_value)
    if not file_path:
        return "❌ فایل صوتی هنوز آماده نیست.", ""

    if not ASR_API_URL or not AUTH_TOKEN:
        return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", ""

    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {AUTH_TOKEN}",
    }

    start = time.time()
    try:
        with open(file_path, "rb") as f:
            files = {"file": (os.path.basename(file_path), f, "audio/mpeg")}
            response = requests.post(ASR_API_URL, headers=headers, files=files)
    except Exception as e:
        return f"❌ Error: {e}", ""

    elapsed = time.time() - start
    if response.status_code == 200:
        data = response.json()
        txt = data.get("transcription", "No transcription returned.")
        elapsed = data.get("time", elapsed)
        return txt, f"{elapsed:.2f} seconds"

    return f"❌ Error {response.status_code}: {response.text}", ""


# ---------- رابط کاربری ----------
CUSTOM_CSS = """
#gooya-title {color:white; background:linear-gradient(90deg,#224CA5 0%,#2CD8D5 100%);
              border-radius:12px;padding:20px 10px;margin-bottom:12px;}
.gooya-badge {display:inline-block;background:#224CA5;color:#fff;border-radius:16px;
              padding:6px 16px;font-size:0.97rem;margin-top:4px;}
"""

with gr.Blocks(css=CUSTOM_CSS) as demo:

    gr.HTML("""
    <div id="gooya-title">
        <h1 style='margin-bottom:10px;font-weight:800;font-size:2rem;'>Gooya ASR
            <span style="font-size:1.1rem;font-weight:400;opacity:0.8;'>v1.4</span></h1>
        <p style='font-size:1.12rem;margin-bottom:2px;'>High-performance Persian Speech-to-Text</p>
        <p style='font-size:0.98rem;color:#c6e8fa'>Upload or record a Persian audio file (max 30s) and instantly receive the transcription.</p>
    </div>
    """)

    with gr.Row():
        with gr.Column():
            audio = gr.Audio(
                label="Audio Input (Upload or record, up to 30s)",
                type="filepath",
                sources=["upload", "microphone"],
                show_label=True,
            )
        with gr.Column():
            inf_time_lbl  = gr.Label(label="⏱️ Processing Time", elem_classes="gooya-badge")
            transcription = gr.Textbox(
                label="📝 Transcription",
                lines=5,
                show_copy_button=True,
                placeholder="The transcription will appear here...",
                elem_id="gooya-textbox",
            )

    with gr.Row():
        submit_btn = gr.Button("Transcribe", variant="primary", interactive=False)
        clear_btn  = gr.Button("Clear",      variant="secondary")

    # ---------- فعال / غیرفعال کردن دکمه ----------
    def toggle_btn(audio_value):
        return gr.Button.update(interactive=bool(extract_path(audio_value)))

    # بعد از آپلود فایل
    audio.change(toggle_btn,      inputs=audio, outputs=submit_btn, queue=False)
    # بعد از پایان ضبط میکروفون
    audio.stop_recording(toggle_btn, inputs=audio, outputs=submit_btn, queue=False)

    # ---------- پردازش ----------
    submit_btn.click(
        transcribe_audio,
        inputs=audio,
        outputs=[transcription, inf_time_lbl],
    ).then(
        lambda: gr.Button.update(interactive=False),
        None,
        submit_btn,
        queue=False,
    )

    # ---------- پاک‌کردن ----------
    def clear_all():
        return (
            "",                                        # متن
            "",                                        # زمان
            gr.Audio.update(value=None),               # پاک کردن کامپوننت Audio
            gr.Button.update(interactive=False),       # غیر فعال کردن دکمه
        )

    clear_btn.click(
        clear_all,
        None,
        [transcription, inf_time_lbl, audio, submit_btn],
        queue=False,
    )

demo.launch()