Spaces:

navidved
/

gooya-asr

Running

App Files Files Community

navidved commited on May 12

Commit

afa3129

verified ·

1 Parent(s): eed5ed8

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -27

app.py CHANGED Viewed

@@ -1,48 +1,124 @@
-import gradio as gr
-import requests, os, time
-print("--------- Gradio version:", gr.__version__, "----------")
 ASR_API_URL = os.getenv("ASR_API_URL")
 AUTH_TOKEN  = os.getenv("AUTH_TOKEN")
 def transcribe_audio(file_path: str):
     if not ASR_API_URL or not AUTH_TOKEN:
         return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", ""
     headers = {
         "accept": "application/json",
         "Authorization": f"Bearer {AUTH_TOKEN}",
     }
-    files = {"file": (file_path, open(file_path, "rb"), "audio/mpeg")}
     start = time.time()
     try:
-        resp = requests.post(ASR_API_URL, headers=headers, files=files)
     except Exception as e:
-        return f"❌ Error: {e}", ""
     elapsed = time.time() - start
-    if resp.status_code == 200:
-        data = resp.json()
         transcript = data.get("transcription", "No transcription returned.")
-        t = f"{data.get('time', elapsed):.2f} ثانیه"
-        return transcript, t
     else:
-        return f"❌ Error: {resp.status_code}, {resp.text}", ""
-def main():
-    iface = gr.Interface(
-        fn=transcribe_audio,
-        inputs=gr.Audio(label="Audio Input (upload or record, up to 30 s)", type="filepath", sources=["upload", "microphone"]),
-        outputs=[
-            gr.Textbox(label="📝 Transcription", lines=5, show_copy_button=True, placeholder="The transcription will appear here..."),
-            gr.Label(label="⏱️ Processing Time")
-        ],
-        title="Gooya ASR v1.4",
-        description="High-performance Persian Speech-to-Text. Upload or record a Persian audio file (max 30 s) and instantly receive the transcription.",
-        examples=[],
-        cache_examples=False,
-        allow_flagging="never"
     )
-    iface.launch()
 if __name__ == "__main__":
-    main()

+import os, time, requests, gradio as gr
+# -------------------- تنظیمات پایه --------------------
+print("Gradio version:", gr.__version__)
 ASR_API_URL = os.getenv("ASR_API_URL")
 AUTH_TOKEN  = os.getenv("AUTH_TOKEN")
+if not ASR_API_URL or not AUTH_TOKEN:
+    print("⚠️  Warning: ASR_API_URL or AUTH_TOKEN is not set. "
+          "Transcription will fail until they are provided.")
+# -------------------- توابع کمکی --------------------
 def transcribe_audio(file_path: str):
+    """
+    ورودی: مسیر فایل (به خاطر type='filepath')
+    خروجی: متن رونویسی و زمان پردازش یا پیام خطا
+    """
     if not ASR_API_URL or not AUTH_TOKEN:
         return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", ""
     headers = {
         "accept": "application/json",
         "Authorization": f"Bearer {AUTH_TOKEN}",
     }
     start = time.time()
     try:
+        with open(file_path, "rb") as f:
+            files = {"file": (os.path.basename(file_path), f, "audio/mpeg")}
+            response = requests.post(ASR_API_URL, headers=headers, files=files, timeout=120)
     except Exception as e:
+        return f"❌ Error while calling ASR API: {e}", ""
     elapsed = time.time() - start
+    if response.status_code == 200:
+        data = response.json()
         transcript = data.get("transcription", "No transcription returned.")
+        processing_time = f"{data.get('time', elapsed):.2f} ثانیه"
+        return transcript, processing_time
     else:
+        return f"❌ Error: {response.status_code}, {response.text}", ""
+# -------------------- رابط کاربری --------------------
+custom_css = """
+#gooya-title{color:#fff;background:linear-gradient(90deg,#224CA5 0%,#2CD8D5 100%);
+  border-radius:12px;padding:20px 10px;margin-bottom:12px;}
+.gooya-badge{display:inline-block;background:#224CA5;color:#fff;border-radius:16px;
+  padding:6px 16px;font-size:.97rem;margin-top:4px;}
+#gooya-box{background:#F7FAFF;border:1px solid #e7e9ef;border-radius:14px;
+  padding:22px 18px;margin-top:12px;}
+"""
+with gr.Blocks(css=custom_css, title="Gooya ASR v1.4") as demo:
+    # عنوان
+    gr.HTML(
+        """
+        <div id="gooya-title">
+          <h1 style='margin-bottom:10px;font-weight:800;font-size:2rem;'>
+              Gooya ASR <span style="font-size:1.1rem;font-weight:400;opacity:.8;">v1.4</span>
+          </h1>
+          <p style='font-size:1.12rem;margin-bottom:2px;'>
+              High-performance Persian Speech-to-Text
+          </p>
+          <p style='font-size:.98rem;color:#c6e8fa'>
+              Upload or record a Persian audio file (max 30 s) and instantly receive the transcription.
+          </p>
+        </div>
+        """
+    )
+    # ورودی/خروجی‌ها
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                label="Audio Input (upload or record, up to 30 s)",
+                type="filepath",
+                sources=["upload", "microphone"],
+            )
+        with gr.Column():
+            processing_time_lbl = gr.Label(label="⏱️ Processing Time",
+                                           elem_classes="gooya-badge")
+            transcription_tb = gr.Textbox(
+                label="📝 Transcription",
+                lines=5,
+                show_copy_button=True,
+                placeholder="The transcription will appear here...",
+                elem_id="gooya-textbox",
+            )
+    # دکمه‌ها
+    with gr.Row():
+        btn_transcribe = gr.Button("Transcribe", variant="primary")
+        btn_clear      = gr.Button("Clear",      variant="secondary")
+    gr.Markdown(
+        """
+**دستورالعمل‌ها**
+- حداکثر طول صدا: **۳۰ ثانیه**
+- صدا باید فارسی باشد.
+- نتیجه‌ی رونویسی و زمان پردازش بلافاصله نمایش داده می‌شود.
+برای مشاهده بنچمارک‌ها به [Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard) مراجعه کنید.
+"""
+    )
+    # اتصال توابع به دکمه‌ها
+    btn_transcribe.click(
+        transcribe_audio,
+        inputs=audio_input,
+        outputs=[transcription_tb, processing_time_lbl],
+    )
+    btn_clear.click(
+        lambda: ("", "", None),
+        inputs=None,
+        outputs=[transcription_tb, processing_time_lbl, audio_input],
     )
+# -------------------- اجرای برنامه --------------------
 if __name__ == "__main__":
+    # فقط یک فراخوانی برای queue و launch همانند کد مرجع
+    demo.queue().launch(debug=True, share=False)