File size: 9,554 Bytes
594893f
 
 
 
 
5e48419
 
594893f
d2d7dc7
a247d56
4921a8c
 
 
 
 
 
5e48419
 
 
 
 
 
594893f
5e48419
 
 
a96b890
 
a12adfc
 
 
5e48419
594893f
 
5e48419
 
 
 
 
 
 
 
 
cb2de80
594893f
eaf5bbe
 
 
 
 
 
e04575a
ebf21e6
eaf5bbe
 
 
963b5a3
e04575a
963b5a3
ebf21e6
eaf5bbe
e04575a
eaf5bbe
 
 
e04575a
143de70
eaf5bbe
5e48419
 
b915101
e04575a
cb2de80
eaf5bbe
e04575a
eaf5bbe
48bba9a
cb2de80
a5fcb05
5e48419
 
 
594893f
 
5e48419
a536df8
 
 
 
 
 
 
 
 
 
 
 
 
 
4211536
958aecd
 
 
 
 
 
 
 
 
 
4211536
ab8da36
4211536
958aecd
 
 
 
 
 
 
4211536
ab8da36
4211536
958aecd
 
 
 
 
 
 
4211536
a9d86d8
4211536
 
 
a9d86d8
d61e53e
5e48419
 
 
 
 
 
 
 
 
 
 
 
a12adfc
5e48419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594893f
5e48419
 
 
 
 
 
594893f
5e48419
 
 
 
 
 
594893f
5e48419
751dcae
5e48419
62ad9d3
 
d21aa08
de78153
d21aa08
5e48419
594893f
1e77a13
a536df8
 
d2d7dc7
a536df8
d2d7dc7
a536df8
d2d7dc7
 
77d14ce
 
4921a8c
 
5e48419
 
 
 
 
594893f
5e48419
594893f
5e48419
594893f
 
b915101
ce0c093
 
 
 
5e48419
 
 
 
ce0c093
5e48419
 
 
 
 
 
 
 
594893f
983b6c1
ce0c093
 
 
 
2ce6def
594893f
 
3b71270
ce0c093
594893f
 
 
966fb46
 
4d72ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad2347
4d72ecc
 
 
966fb46
f74e667
5e48419
 
 
 
a12adfc
5e48419
 
a12adfc
5e48419
 
 
 
 
 
 
 
4caf1c5
594893f
aa6787d
202bf02
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
import gradio as gr
import re
import os
import requests
import time
import soundfile as sf
import io

def hide_notice():
    return gr.update(visible=False)
def show_popup():
    return gr.update(visible=True)

def hide_popup():
    return gr.update(visible=False)

def audio_to_bytes(audio):
    data, sr = sf.read(audio)
    audio_bytes = io.BytesIO()
    sf.write(audio_bytes, data, sr, format='WAV')
    audio_bytes.seek(0)
    return audio_bytes

def langswitch_API_call(audio, language):
    audio_bytes = audio_to_bytes(audio)
    files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')}
    api_url = os.getenv("api_url")
    response = requests.post(f"{api_url}/online/http?language={language}", files=files)
    if response.status_code != 200:
        print(response)
        raise Exception("API error")
    return response.json()

def transcribe_base(audio, language):
    response = langswitch_API_call(audio, language)
    print(response)
    transcription = response["transcription"]
    is_new_speaker = response["is_new_speaker"]
    speaker = response["classified_speaker"]
    if is_new_speaker:
        speaker_class_string = f'New speaker detected. Assigned new ID {speaker}'
    else:
        speaker_class_string = f'Speaker found in database, ID {speaker}'
    return transcription#, speaker_class_string

def fix_italian_transcription(transcription):
    no_elision_cases = {
        "un autore", "un artista", "un uomo", "un amico", "un imperatore",
        "uno studente", "uno psicologo", "uno zio",
        "di autore", "a uomo", "su imperatore", "con amico", "per artista"
    }
    
    transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|dell|nell|sull|coll|pell|dov)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
    transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
    transcription = re.sub(r"\bpo\b", "po'", transcription)
    transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
    transcription = transcription.replace("anch io", "anch'io")
    
    pattern_numbers = r"\b(trent|quarant|cinquant|sessant|settant|ottant|novant)\s+([aeiouàèìòù])"
    replacement_numbers = lambda m: m.group(1) + "'" + m.group(2)
    transcription = re.sub(pattern_numbers, replacement_numbers, transcription)
    
    for phrase in no_elision_cases:
        fixed = phrase.replace(" ", "'")
        transcription = transcription.replace(fixed, phrase)
    
    return transcription

def transcribe_mic(audio_microphone, language):
    print("Transcription microphone")
    transcription = transcribe_base(audio_microphone, language)
    print(transcription)
    if language=="it":
        transcription = fix_italian_transcription(transcription)
        print(transcription)
        
    return transcription
    #return transcribe_base(audio_microphone, language)

def transcribe_file(audio_upload, language):
    print("Transcription local file")
    return transcribe_base(audio_upload, language)


css_content = """
.intro-message {
    position: fixed;
    top: 0; left: 0;
    width: 100vw;
    height: 100vh;
    background: rgba(255,255,255,0.95);
    display: flex;
    flex-direction: column;
    justify-content: center;
    align-items: center;
    z-index: 9999;
    padding: 40px;
    box-sizing: border-box;
}
.popup-overlay {
    position: fixed;
    top: 0;
    left: 0;
    width: 100vw;
    height: 100vh;
    background: rgba(0, 0, 0, 0.6);
    z-index: 10000;
    display: flex;
    justify-content: center;
    align-items: center;
}

.popup-box {
    background-color: white;
    padding: 20px;
    border-radius: 10px;
    box-shadow: 0 0 20px rgba(0,0,0,0.3);
    width: 360px;
    text-align: center;
    z-index: 10001;
}

.popup-button {
    background-color: #5b65a7;
    color: white;
    padding: 10px 20px;
    border-radius: 8px;
    margin-top: 10px;
    border: none;
    cursor: pointer;
}
/*
.popup-button:hover {
    background-color: #3c4687 !important;
}
/*

/*
.gradio-container{
    padding: 0 !important;
}
.html-container{
    padding: 0 !important;
}
*/
#orai-info{
    padding: 50px;
    text-align: center;
    font-size: 1rem;
    background: url('https://elia.eus/static/elhuyar/img/landing_page/ig.webp') rgba(0,0,0,0.8);
    background-repeat: no-repeat;
    background-position: center center;
    background-size: cover;
    background-blend-mode: multiply;
}
#orai-info-text p{
    color: white !important;
}
/*
#orai-info img{
    margin: auto;
    display: block;
    margin-bottom: 1rem;
}*/
.bold{
    font-weight: bold;
    color: inherit !important;
}
footer{
    display:none !important
}

.logos{
    display: flex;
    justify-content: center;
}
.sermas-logo{
    display: flex;
    align-items: center;
    margin-right: 3rem;
}
.sermas-logo span{
    color: white !important;
    font-size: 2.5rem;
    font-family: Verdana, Geneva, sans-serif !important;
    font-weight: bold;
}

.text-elhuyar{
    color: #0045e7;
}

#header{
    padding: 50px;
    padding-top: 30px;
    background-color: #5b65a7;
}
#header h1,h3{
    color: white;
}

button.primary{
    background-color: #5b65a7;
}
button.primary:hover{
    background-color: #3c4687;
}

button.selected{
    color: #5b65a7 !important;
}
button.selected::after{
    background-color: #5b65a7;
}

.record-button::before{
    background: #E50914;
}
"""




demo = gr.Blocks(css=css_content) #, fill_width=True)
with demo:

    intro = gr.Column(visible=True, elem_id="intro-message")
    with intro:
        gr.Markdown("""
        Welcome!

        ⚠️ The recordings are not saved and are automatically removed.
        """)
        ok_button = gr.Button("OK")
        ok_button.click(fn=lambda: gr.update(visible=False), outputs=intro)

    
    gr.HTML("""
<div id="header">
    <h1>LANGSWITCH</h1>
    <h3>Multilingual Automatic Speech Recognition in noisy environments</h3>
</div>
""")

    with gr.Tab("Transcribe microphone"):
        iface = gr.Interface(
            fn=transcribe_mic,
            inputs=[
                gr.Audio(sources="microphone", type="filepath"),
                gr.Dropdown(label="Language", choices=[("English", "en"),
                                     ("Euskara", "eu"),
                                     ("Español", "es"),
                                     ("Français", "fr"),
                                     ("Italiano", "it")],
                            value="en")
            ],
            outputs=[
                gr.Textbox(label="Transcription", autoscroll=False),
                #gr.Textbox(label="Speaker Identification", autoscroll=False)
            ],
            allow_flagging="never",
        )

    with gr.Tab("Transcribe local file"):
        iface = gr.Interface(
            fn=transcribe_file,
            inputs=[
                gr.Audio(sources="upload", type="filepath"),
                gr.Dropdown(choices=[("English", "en"),
                                     ("Euskara", "eu"),
                                     ("Español", "es"),
                                     ("Français", "fr"),
                                     ("Italiano", "it")],
                            value="en")
            ],
            outputs=[
                gr.Textbox(label="Transcription", autoscroll=False),
                #gr.Textbox(label="Speaker Identification", autoscroll=False)
            ],
            allow_flagging="never",
        )

    ###############################
    #popup = gr.Column(visible=False, elem_classes="popup-overlay")
    #with popup:
    #    with gr.Column(elem_classes=["popup-box"]):
    #        gr.Markdown(
    #            """
    #            ### ℹ️ Notice
    # 
    #             Grabaketak ez dira gordetzen eta automatikoki ezabatzen dira.  
    #             No se guardan las grabaciones y se eliminan automáticamente.  
    #             The recordings are not saved and are automatically removed.
    #             """
    #         )
    #         ok_btn = gr.Button("OK", elem_classes=["popup-button"])
    #         ok_btn.click(fn=hide_popup, outputs=popup)

    ## Show on load
    #demo.load(fn=show_popup, outputs=popup)
    #
    ###################
    
    gr.HTML("""
<div id="orai-info">
    <div class="logos">
        <div class="sermas-logo">
            <img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/>
            <span>SERMAS</span>
        </div>
        <img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/>
    </div>
    <div id="orai-info-text">
        <p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
        <p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
        <p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
    </div>
</div>
<p>""")
    
demo.queue(max_size=1)
#demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")
demo.launch(share=False, max_threads=3)