Sermas / app.py
imorcillo's picture
Update app.py
6202273 verified
raw
history blame
10.2 kB
import gradio as gr
import re
import os
import requests
import time
import soundfile as sf
import io
def hide_notice():
return gr.update(visible=False)
def start_app():
return gr.update(visible=False), gr.update(visible=True)
def audio_to_bytes(audio):
data, sr = sf.read(audio)
audio_bytes = io.BytesIO()
sf.write(audio_bytes, data, sr, format='WAV')
audio_bytes.seek(0)
return audio_bytes
def langswitch_API_call(audio, language):
audio_bytes = audio_to_bytes(audio)
files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')}
api_url = os.getenv("api_url")
response = requests.post(f"{api_url}/online/http?language={language}", files=files)
if response.status_code != 200:
print(response)
raise Exception("API error")
return response.json()
def transcribe_base(audio, language):
response = langswitch_API_call(audio, language)
print(response)
transcription = response["transcription"]
is_new_speaker = response["is_new_speaker"]
speaker = response["classified_speaker"]
if is_new_speaker:
speaker_class_string = f'New speaker detected. Assigned new ID {speaker}'
else:
speaker_class_string = f'Speaker found in database, ID {speaker}'
return transcription#, speaker_class_string
def fix_italian_transcription(transcription):
no_elision_cases = {
"un autore", "un artista", "un uomo", "un amico", "un imperatore",
"uno studente", "uno psicologo", "uno zio",
"di autore", "a uomo", "su imperatore", "con amico", "per artista"
}
transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|dell|nell|sull|coll|pell|dov)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
transcription = re.sub(r"\bpo\b", "po'", transcription)
transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
transcription = transcription.replace("anch io", "anch'io")
pattern_numbers = r"\b(trent|quarant|cinquant|sessant|settant|ottant|novant)\s+([aeiouàèìòù])"
replacement_numbers = lambda m: m.group(1) + "'" + m.group(2)
transcription = re.sub(pattern_numbers, replacement_numbers, transcription)
for phrase in no_elision_cases:
fixed = phrase.replace(" ", "'")
transcription = transcription.replace(fixed, phrase)
return transcription
def transcribe_mic(audio_microphone, language):
print("Transcription microphone")
transcription = transcribe_base(audio_microphone, language)
print(transcription)
if language=="it":
transcription = fix_italian_transcription(transcription)
print(transcription)
return transcription
#return transcribe_base(audio_microphone, language)
def transcribe_file(audio_upload, language):
print("Transcription local file")
return transcribe_base(audio_upload, language)
css_content = """
.intro-text {
font-size: 1.1rem;
line-height: 1.6;
text-align: center;
color: #333;
}
.ok-button {
background-color: #4CAF50; /* green */
color: white;
padding: 10px 20px;
border-radius: 8px;
margin-top: 20px;
border: none;
font-weight: bold;
cursor: pointer;
font-size: 1rem;
transition: background-color 0.3s ease;
}
.ok-button:hover {
background-color: #388E3C;
}
.intro-message {
position: fixed;
top: 0; left: 0;
width: 100vw;
height: 100vh;
background: rgba(255,255,255,0.95);
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
z-index: 9999;
padding: 40px;
box-sizing: border-box;
}
.popup-overlay {
position: fixed;
top: 0;
left: 0;
width: 100vw;
height: 100vh;
background: rgba(0, 0, 0, 0.6);
z-index: 10000;
display: flex;
justify-content: center;
align-items: center;
}
.popup-box {
background-color: white;
padding: 20px;
border-radius: 10px;
box-shadow: 0 0 20px rgba(0,0,0,0.3);
width: 360px;
text-align: center;
z-index: 10001;
}
.popup-button {
background-color: #5b65a7;
color: white;
padding: 10px 20px;
border-radius: 8px;
margin-top: 10px;
border: none;
cursor: pointer;
}
/*
.popup-button:hover {
background-color: #3c4687 !important;
}
/*
/*
.gradio-container{
padding: 0 !important;
}
.html-container{
padding: 0 !important;
}
*/
#orai-info{
padding: 50px;
text-align: center;
font-size: 1rem;
background: url('https://elia.eus/static/elhuyar/img/landing_page/ig.webp') rgba(0,0,0,0.8);
background-repeat: no-repeat;
background-position: center center;
background-size: cover;
background-blend-mode: multiply;
}
#orai-info-text p{
color: white !important;
}
/*
#orai-info img{
margin: auto;
display: block;
margin-bottom: 1rem;
}*/
.bold{
font-weight: bold;
color: inherit !important;
}
footer{
display:none !important
}
.logos{
display: flex;
justify-content: center;
}
.sermas-logo{
display: flex;
align-items: center;
margin-right: 3rem;
}
.sermas-logo span{
color: white !important;
font-size: 2.5rem;
font-family: Verdana, Geneva, sans-serif !important;
font-weight: bold;
}
.text-elhuyar{
color: #0045e7;
}
#header{
padding: 50px;
padding-top: 30px;
background-color: #5b65a7;
}
#header h1,h3{
color: white;
}
button.primary{
background-color: #5b65a7;
}
button.primary:hover{
background-color: #3c4687;
}
button.selected{
color: #5b65a7 !important;
}
button.selected::after{
background-color: #5b65a7;
}
.record-button::before{
background: #E50914;
}
"""
demo = gr.Blocks(css=css_content) #, fill_width=True)
with demo:
intro = gr.Column(visible=True, elem_id="intro-message")
app_block = gr.Column(visible=False)
def start_app():
return gr.update(visible=False), gr.update(visible=True)
with intro:
gr.Markdown("""
Ongi etorri LANGSWITCH-en demora, Orai NLP Teknologiak garatutako hizketa-ezagutzaile eleanitza!
🇪🇸 ¡Bienvenida/o a la demo de LANGSWITCH, el sistema de reconocimiento automático del habla multilingüe desarrollado por Orai NLP Teknologiak!
🇬🇧 Welcome to the LANGSWITCH demo, the multilingual automatic speech recogniser developed by Orai NLP Teknologiak!
Grabaketak ez dira gordetzen eta automatikoki ezabatzen dira.
🇪🇸 No se guardan las grabaciones y se eliminan automáticamente.
🇬🇧 The recordings are not saved and are automatically removed.
""", elem_id=["intro-text"])
ok_button = gr.Button("OK", elem_id=["ok-button"])
ok_button.click(fn=start_app, outputs=[intro, app_block])
with app_block:
gr.HTML("""
<div id="header">
<h1>LANGSWITCH</h1>
<h3>Multilingual Automatic Speech Recognition in noisy environments</h3>
</div>
""")
with gr.Tab("Transcribe microphone"):
iface = gr.Interface(
fn=transcribe_mic,
inputs=[
gr.Audio(sources="microphone", type="filepath"),
gr.Dropdown(label="Language", choices=[("English", "en"),
("Euskara", "eu"),
("Español", "es"),
("Français", "fr"),
("Italiano", "it")],
value="en")
],
outputs=[
gr.Textbox(label="Transcription", autoscroll=False),
#gr.Textbox(label="Speaker Identification", autoscroll=False)
],
allow_flagging="never",
)
with gr.Tab("Transcribe local file"):
iface = gr.Interface(
fn=transcribe_file,
inputs=[
gr.Audio(sources="upload", type="filepath"),
gr.Dropdown(choices=[("English", "en"),
("Euskara", "eu"),
("Español", "es"),
("Français", "fr"),
("Italiano", "it")],
value="en")
],
outputs=[
gr.Textbox(label="Transcription", autoscroll=False),
#gr.Textbox(label="Speaker Identification", autoscroll=False)
],
allow_flagging="never",
)
gr.HTML("""
<div id="orai-info">
<div class="logos">
<div class="sermas-logo">
<img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/>
<span>SERMAS</span>
</div>
<img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/>
</div>
<div id="orai-info-text">
<p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
<p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
<p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
</div>
</div>
<p>""")
demo.queue(max_size=1)
#demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")
demo.launch(share=False, max_threads=3)