Spaces:

navidved
/

gooya-asr

Running

File size: 2,808 Bytes

55c61cc
 
 
 
9c7cb99
5421e82
 
55c61cc
 
5421e82
55c61cc
e83050a
5421e82
55c61cc
5b1e694
55c61cc
5421e82
56a2dd0
5b1e694
56a2dd0
 
5421e82
5b1e694
 
55c61cc
 
5b1e694
55c61cc
5421e82
5b1e694
 
55c61cc
 
 
 
5421e82
55c61cc
 
5421e82
 
 
55c61cc
 
 
 
 
 
 
 
 
 
 
 
e824bc9
55c61cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5421e82
55c61cc
 
5421e82
55c61cc

import os
import time
import requests
import streamlit as st

# ---------- Environment Variables ----------
ASR_API_URL = os.getenv("ASR_API_URL")
AUTH_TOKEN = os.getenv("AUTH_TOKEN")

if not ASR_API_URL or not AUTH_TOKEN:
    st.warning("⚠️ ASR_API_URL or AUTH_TOKEN is not set. API calls will fail.")

# ---------- Core Transcription Function ----------
def transcribe_audio(file_obj):
    if not ASR_API_URL or not AUTH_TOKEN:
        return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", ""

    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {AUTH_TOKEN}",
    }

    start = time.time()
    try:
        files = {"file": ("audio.wav", file_obj, "audio/wav")}
        resp = requests.post(ASR_API_URL, headers=headers, files=files, timeout=120)
    except Exception as e:
        return f"❌ Error while calling ASR API: {e}", ""

    elapsed = time.time() - start
    if resp.status_code == 200:
        data = resp.json()
        text = data.get("transcription", "No transcription returned.")
        return text, f"{data.get('time', elapsed):.2f} s"
    return f"❌ Error: {resp.status_code}, {resp.text}", ""

# ---------- UI ----------
st.set_page_config(page_title="Gooya ASR v1.4", layout="centered")

VIOLET_MAIN = "#7F3FBF"
VIOLET_LIGHT = "#C3A6FF"
st.markdown(
    f"""
    <h1 style="background: linear-gradient(90deg, {VIOLET_MAIN}, {VIOLET_LIGHT}); color: white; padding: 20px; border-radius: 12px; text-align: center;">
        Gooya ASR v1.4
    </h1>
    """,
    unsafe_allow_html=True
)

tab1, tab2 = st.tabs(["🎤 Record from Microphone", "📁 Upload Audio File"])

with tab1:
    audio_file = st.audio_input("🎙️ Record audio from microphone")

with tab2:
    uploaded_file = st.file_uploader("📂 Upload audio file (wav/mp3)", type=["wav", "mp3"])

col1, col2 = st.columns(2)
with col1:
    btn_transcribe = st.button("Transcribe", type="primary")
with col2:
    btn_clear = st.button("Clear")

# ---------- Main Logic ----------
if btn_transcribe:
    file_to_process = uploaded_file if uploaded_file else audio_file
    if file_to_process:
        with st.spinner("⏳ Processing..."):
            transcription, elapsed = transcribe_audio(file_to_process)
            st.text_area("📝 Transcription", transcription, height=150)
            if elapsed:
                st.info(f"⏱️ Processing Time: {elapsed}")
    else:
        st.warning("Please upload or record an audio file first.")

if btn_clear:
    st.experimental_rerun()

st.markdown("""
---
### Guidelines
- Maximum audio length: 30 seconds  
- Audio content should be in Persian  
- Both transcription and processing time will be displayed  

🔗 [View the Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard)
""")