Spaces:

orai-nlp
/

Sermas

Sleeping

App Files Files

Sermas / app.py

imorcillo

Update app.py

6202273 verified about 2 months ago

raw

history blame

10.2 kB

	import gradio as gr
	import re
	import os
	import requests
	import time
	import soundfile as sf
	import io

	def hide_notice():
	return gr.update(visible=False)
	def start_app():
	return gr.update(visible=False), gr.update(visible=True)

	def audio_to_bytes(audio):
	data, sr = sf.read(audio)
	audio_bytes = io.BytesIO()
	sf.write(audio_bytes, data, sr, format='WAV')
	audio_bytes.seek(0)
	return audio_bytes

	def langswitch_API_call(audio, language):
	audio_bytes = audio_to_bytes(audio)
	files = {'file': (f'audio_chunk.wav', audio_bytes, 'audio/wav')}
	api_url = os.getenv("api_url")
	response = requests.post(f"{api_url}/online/http?language={language}", files=files)
	if response.status_code != 200:
	print(response)
	raise Exception("API error")
	return response.json()

	def transcribe_base(audio, language):
	response = langswitch_API_call(audio, language)
	print(response)
	transcription = response["transcription"]
	is_new_speaker = response["is_new_speaker"]
	speaker = response["classified_speaker"]
	if is_new_speaker:
	speaker_class_string = f'New speaker detected. Assigned new ID {speaker}'
	else:
	speaker_class_string = f'Speaker found in database, ID {speaker}'
	return transcription#, speaker_class_string

	def fix_italian_transcription(transcription):
	no_elision_cases = {
	"un autore", "un artista", "un uomo", "un amico", "un imperatore",
	"uno studente", "uno psicologo", "uno zio",
	"di autore", "a uomo", "su imperatore", "con amico", "per artista"
	}

	transcription = re.sub(r"\b(un\|l\|d\|s\|t\|m\|c\|n\|quest\|all\|dall\|dell\|nell\|sull\|coll\|pell\|dov)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
	transcription = re.sub(r"\b(s\|t\|m\|c\|n)\s+(?=è\b\|ha\b\|hanno\b)", r"\1'", transcription)
	transcription = re.sub(r"\bpo\b", "po'", transcription)
	transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
	transcription = transcription.replace("anch io", "anch'io")

	pattern_numbers = r"\b(trent\|quarant\|cinquant\|sessant\|settant\|ottant\|novant)\s+([aeiouàèìòù])"
	replacement_numbers = lambda m: m.group(1) + "'" + m.group(2)
	transcription = re.sub(pattern_numbers, replacement_numbers, transcription)

	for phrase in no_elision_cases:
	fixed = phrase.replace(" ", "'")
	transcription = transcription.replace(fixed, phrase)

	return transcription

	def transcribe_mic(audio_microphone, language):
	print("Transcription microphone")
	transcription = transcribe_base(audio_microphone, language)
	print(transcription)
	if language=="it":
	transcription = fix_italian_transcription(transcription)
	print(transcription)

	return transcription
	#return transcribe_base(audio_microphone, language)

	def transcribe_file(audio_upload, language):
	print("Transcription local file")
	return transcribe_base(audio_upload, language)


	css_content = """
	.intro-text {
	font-size: 1.1rem;
	line-height: 1.6;
	text-align: center;
	color: #333;
	}

	.ok-button {
	background-color: #4CAF50; /* green */
	color: white;
	padding: 10px 20px;
	border-radius: 8px;
	margin-top: 20px;
	border: none;
	font-weight: bold;
	cursor: pointer;
	font-size: 1rem;
	transition: background-color 0.3s ease;
	}

	.ok-button:hover {
	background-color: #388E3C;
	}

	.intro-message {
	position: fixed;
	top: 0; left: 0;
	width: 100vw;
	height: 100vh;
	background: rgba(255,255,255,0.95);
	display: flex;
	flex-direction: column;
	justify-content: center;
	align-items: center;
	z-index: 9999;
	padding: 40px;
	box-sizing: border-box;
	}
	.popup-overlay {
	position: fixed;
	top: 0;
	left: 0;
	width: 100vw;
	height: 100vh;
	background: rgba(0, 0, 0, 0.6);
	z-index: 10000;
	display: flex;
	justify-content: center;
	align-items: center;
	}

	.popup-box {
	background-color: white;
	padding: 20px;
	border-radius: 10px;
	box-shadow: 0 0 20px rgba(0,0,0,0.3);
	width: 360px;
	text-align: center;
	z-index: 10001;
	}

	.popup-button {
	background-color: #5b65a7;
	color: white;
	padding: 10px 20px;
	border-radius: 8px;
	margin-top: 10px;
	border: none;
	cursor: pointer;
	}
	/*
	.popup-button:hover {
	background-color: #3c4687 !important;
	}
	/*

	/*
	.gradio-container{
	padding: 0 !important;
	}
	.html-container{
	padding: 0 !important;
	}
	*/
	#orai-info{
	padding: 50px;
	text-align: center;
	font-size: 1rem;
	background: url('https://elia.eus/static/elhuyar/img/landing_page/ig.webp') rgba(0,0,0,0.8);
	background-repeat: no-repeat;
	background-position: center center;
	background-size: cover;
	background-blend-mode: multiply;
	}
	#orai-info-text p{
	color: white !important;
	}
	/*
	#orai-info img{
	margin: auto;
	display: block;
	margin-bottom: 1rem;
	}*/
	.bold{
	font-weight: bold;
	color: inherit !important;
	}
	footer{
	display:none !important
	}

	.logos{
	display: flex;
	justify-content: center;
	}
	.sermas-logo{
	display: flex;
	align-items: center;
	margin-right: 3rem;
	}
	.sermas-logo span{
	color: white !important;
	font-size: 2.5rem;
	font-family: Verdana, Geneva, sans-serif !important;
	font-weight: bold;
	}

	.text-elhuyar{
	color: #0045e7;
	}

	#header{
	padding: 50px;
	padding-top: 30px;
	background-color: #5b65a7;
	}
	#header h1,h3{
	color: white;
	}

	button.primary{
	background-color: #5b65a7;
	}
	button.primary:hover{
	background-color: #3c4687;
	}

	button.selected{
	color: #5b65a7 !important;
	}
	button.selected::after{
	background-color: #5b65a7;
	}

	.record-button::before{
	background: #E50914;
	}
	"""




	demo = gr.Blocks(css=css_content) #, fill_width=True)
	with demo:

	intro = gr.Column(visible=True, elem_id="intro-message")
	app_block = gr.Column(visible=False)

	def start_app():
	return gr.update(visible=False), gr.update(visible=True)

	with intro:
	gr.Markdown("""
	Ongi etorri LANGSWITCH-en demora, Orai NLP Teknologiak garatutako hizketa-ezagutzaile eleanitza!

	🇪🇸 ¡Bienvenida/o a la demo de LANGSWITCH, el sistema de reconocimiento automático del habla multilingüe desarrollado por Orai NLP Teknologiak!

	🇬🇧 Welcome to the LANGSWITCH demo, the multilingual automatic speech recogniser developed by Orai NLP Teknologiak!

	Grabaketak ez dira gordetzen eta automatikoki ezabatzen dira.

	🇪🇸 No se guardan las grabaciones y se eliminan automáticamente.

	🇬🇧 The recordings are not saved and are automatically removed.
	""", elem_id=["intro-text"])
	ok_button = gr.Button("OK", elem_id=["ok-button"])
	ok_button.click(fn=start_app, outputs=[intro, app_block])

	with app_block:

	gr.HTML("""
	<div id="header">
	<h1>LANGSWITCH</h1>
	<h3>Multilingual Automatic Speech Recognition in noisy environments</h3>
	</div>
	""")

	with gr.Tab("Transcribe microphone"):
	iface = gr.Interface(
	fn=transcribe_mic,
	inputs=[
	gr.Audio(sources="microphone", type="filepath"),
	gr.Dropdown(label="Language", choices=[("English", "en"),
	("Euskara", "eu"),
	("Español", "es"),
	("Français", "fr"),
	("Italiano", "it")],
	value="en")
	],
	outputs=[
	gr.Textbox(label="Transcription", autoscroll=False),
	#gr.Textbox(label="Speaker Identification", autoscroll=False)
	],
	allow_flagging="never",
	)

	with gr.Tab("Transcribe local file"):
	iface = gr.Interface(
	fn=transcribe_file,
	inputs=[
	gr.Audio(sources="upload", type="filepath"),
	gr.Dropdown(choices=[("English", "en"),
	("Euskara", "eu"),
	("Español", "es"),
	("Français", "fr"),
	("Italiano", "it")],
	value="en")
	],
	outputs=[
	gr.Textbox(label="Transcription", autoscroll=False),
	#gr.Textbox(label="Speaker Identification", autoscroll=False)
	],
	allow_flagging="never",
	)


	gr.HTML("""
	<div id="orai-info">
	<div class="logos">
	<div class="sermas-logo">
	<img src="https://sermasproject.eu/wp-content/uploads/2023/04/sermas-logo.png" width=100/>
	<span>SERMAS</span>
	</div>
	<img src="https://www.orai.eus/themes/custom/orai_for_drupal9/orai_bw.svg" width=175/>
	</div>
	<div id="orai-info-text">
	<p>The <span class="bold">LANGSWITCH</span> sub-project is part of the Open Call 1 of the <span class="bold">SERMAS</span> project. The goal of the <span class="bold">SERMAS</span> project is to provide socially-acceptable extended reality models and systems.</p>
	<p>The technology powering LANGSWITCH was developed by <span class="bold">Orai NLP Teknologiak</span></p>
	<p><span class="bold">Orai NLP Teknologiak</span> specializes in research, development, and innovation in artificial intelligence, with a focus on fostering a more competitive industrial and business landscape, enhancing public administration efficiency, and promoting a more inclusive society.</p>
	</div>
	</div>
	<p>""")

	demo.queue(max_size=1)
	#demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")
	demo.launch(share=False, max_threads=3)