File size: 4,763 Bytes
f39366f
d726228
 
 
f118118
d726228
 
bda9ee3
d726228
5d079d6
 
 
 
5b1e694
55c61cc
5d079d6
1967b9f
d726228
 
1967b9f
5d079d6
d726228
5d079d6
5b1e694
5d079d6
 
 
 
 
 
 
 
 
 
 
 
 
5b1e694
d726228
5d079d6
d726228
1967b9f
f39366f
5d079d6
 
 
 
 
 
 
d726228
5d079d6
 
 
 
 
 
 
d726228
 
 
 
 
 
 
 
 
 
 
55c61cc
f118118
 
1967b9f
 
 
 
d726228
1967b9f
f118118
d726228
1967b9f
 
 
 
 
d726228
1967b9f
f118118
 
d726228
 
f118118
d726228
 
 
 
 
99106b6
d726228
 
99106b6
f39366f
 
 
d726228
f118118
5d079d6
 
f39366f
5d079d6
f39366f
d726228
f118118
 
5d079d6
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
import requests
import os
import time

ASR_API_URL = os.getenv('ASR_API_URL')
AUTH_TOKEN = os.getenv('AUTH_TOKEN')

def transcribe_audio(file_path):
    # بررسی اینکه فایل وجود دارد
    if file_path is None:
        return "❌ Error: Please upload or record an audio file.", ""
    
    if not ASR_API_URL or not AUTH_TOKEN:
        return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", ""
    
    headers = {
        'accept': 'application/json',
        'Authorization': f'Bearer {AUTH_TOKEN}',
    }
    
    start_time = time.time()
    
    try:
        # استفاده از with برای مدیریت بهتر فایل
        with open(file_path, 'rb') as audio_file:
            files = {
                'file': (os.path.basename(file_path), audio_file, 'audio/mpeg'),
            }
            response = requests.post(ASR_API_URL, headers=headers, files=files, timeout=60)
            
    except FileNotFoundError:
        return f"❌ Error: File not found: {file_path}", ""
    except requests.exceptions.Timeout:
        return "❌ Error: Request timeout. Please try again.", ""
    except requests.exceptions.ConnectionError:
        return "❌ Error: Connection failed. Please check your internet connection.", ""
    except Exception as e:
        return f"❌ Error: {str(e)}", ""
    
    inference_time = time.time() - start_time

    if response.status_code == 200:
        try:
            res = response.json()
            transcription = res.get("transcription", "No transcription returned.")
            inference_time_str = f"{res.get('time', inference_time):.2f} seconds"
            return transcription, inference_time_str
        except ValueError:
            return "❌ Error: Invalid response format from server.", ""
    else:
        error_msg = f"❌ Error: {response.status_code}"
        try:
            error_detail = response.json().get('detail', response.text)
            error_msg += f", {error_detail}"
        except:
            error_msg += f", {response.text[:200]}"  # محدود کردن طول پیام خطا
        return error_msg, ""

with gr.Blocks(css="""
#gooya-title {color:white; background: linear-gradient(90deg, #224CA5 0%, #2CD8D5 100%); border-radius: 12px; padding:20px 10px;margin-bottom:12px;}
.gooya-badge {display:inline-block; background:#224CA5; color:#fff; border-radius:16px; padding:6px 16px; font-size:0.97rem; margin-top:4px;}
#gooya-box {background:#F7FAFF; border:1px solid #e7e9ef; border-radius:14px; padding:22px 18px; margin-top:12px;}
""") as demo:
    gr.HTML("""<div id="gooya-title">
    <h1 style='margin-bottom:10px;font-weight:800;font-size:2rem;'>Gooya ASR <span style="font-size:1.1rem; font-weight:400; opacity:0.8;">v1.4</span></h1>
    <p style='font-size:1.12rem; margin-bottom:2px;'>High-performance Persian Speech-to-Text</p>
    <p style='font-size:0.98rem; color:#c6e8fa'>Upload or record a Persian audio file (max 30s) and instantly receive the transcription.</p>
    </div>""")

    with gr.Row():
        with gr.Column():
            audio = gr.Audio(
                label="Audio Input (Upload or record, up to 30s)",
                type="filepath",
                show_label=True,
                sources=["upload", "microphone"]
            )
        with gr.Column():
            inference_time = gr.Label(label="⏱️ Processing Time", elem_classes="gooya-badge")
            transcription = gr.Textbox(
                label="📝 Transcription",
                lines=5,
                show_copy_button=True,
                placeholder="The transcription will appear here...",
                elem_id="gooya-textbox"
            )

    with gr.Row():
        submit_btn = gr.Button("Transcribe", variant="primary")
        clear_btn = gr.Button("Clear", variant="secondary")

    gr.Markdown("""
**Instructions:**  
- Maximum audio length: **30 seconds**
- Input audio should be in Persian.
- The transcription and processing time will be displayed instantly.

For performance benchmarks, visit: [Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard)
""")

    submit_btn.click(
        transcribe_audio,
        inputs=audio,
        outputs=[transcription, inference_time]
    )
    
    # اصلاح دکمه Clear - سه خروجی بجای دو
    clear_btn.click(
        lambda: ("", "", None),  # اضافه کردن None برای audio
        None,
        [transcription, inference_time, audio]
    )

# تغییر share به False برای Hugging Face Spaces
if __name__ == "__main__":
    demo.launch(
        share=False,  # در Hugging Face Spaces باید False باشد
        server_name="0.0.0.0",
        server_port=7860
    )