|  | import gradio as gr | 
					
						
						|  | import subprocess,os | 
					
						
						|  | from datasets import load_dataset, Audio | 
					
						
						|  | import corpora | 
					
						
						|  | import ctcalign,graph | 
					
						
						|  | from numpy import random | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | import matplotlib | 
					
						
						|  | matplotlib.use('Agg') | 
					
						
						|  | import matplotlib.pyplot as plt | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def setup(): | 
					
						
						|  | r0 = subprocess.run(["pwd"], capture_output=True, text=True) | 
					
						
						|  | print('PWD::', r0.stdout) | 
					
						
						|  | r1 = subprocess.run(["wget", "https://github.com/google/REAPER/archive/refs/heads/master.zip"], capture_output=True, text=True) | 
					
						
						|  | print(r1.stdout) | 
					
						
						|  | subprocess.run(["unzip", "./master.zip"]) | 
					
						
						|  | subprocess.run(["mv", "REAPER-master", "REAPER"]) | 
					
						
						|  | subprocess.run(["rm", "./master.zip"]) | 
					
						
						|  | os.chdir('./REAPER') | 
					
						
						|  | subprocess.run(["mkdir", "build"]) | 
					
						
						|  | os.chdir('./build') | 
					
						
						|  | r2 = subprocess.run(["cmake", ".."], capture_output=True, text=True) | 
					
						
						|  | print(r2.stdout) | 
					
						
						|  | r3 = subprocess.run(["make"], capture_output=True, text=True) | 
					
						
						|  | print(r3.stdout) | 
					
						
						|  |  | 
					
						
						|  | os.chdir('../..') | 
					
						
						|  | r9 = subprocess.run(["ls", "-la"], capture_output=True, text=True) | 
					
						
						|  | print('LS::', r9.stdout) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | setup() | 
					
						
						|  |  | 
					
						
						|  | def load_lang(langname): | 
					
						
						|  | if langname=="Icelandic": | 
					
						
						|  | df = corpora.ds_i | 
					
						
						|  | model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h" | 
					
						
						|  | elif langname =="Faroese": | 
					
						
						|  | df = corpora.ds_f | 
					
						
						|  | model_path = "carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h" | 
					
						
						|  |  | 
					
						
						|  | model_word_separator = '|' | 
					
						
						|  | model_blank_token = '[PAD]' | 
					
						
						|  | lang_aligner = ctcalign.aligner(model_path,model_word_separator,model_blank_token) | 
					
						
						|  |  | 
					
						
						|  | df = df.data.to_pandas() | 
					
						
						|  | df = df.drop(columns=['audio', 'speaker_id','duration']) | 
					
						
						|  | return (df[:10], lang_aligner) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def f1(langname,lang_aligner): | 
					
						
						|  | if langname=="Icelandic": | 
					
						
						|  | ds = corpora.ds_i | 
					
						
						|  | elif langname =="Faroese": | 
					
						
						|  | ds = corpora.ds_f | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | maxdat=len(ds) | 
					
						
						|  |  | 
					
						
						|  | ds = ds.select([random.randint(maxdat-1)]) | 
					
						
						|  |  | 
					
						
						|  | sound_path = ds['audio'][0]['path'] | 
					
						
						|  | transcript = ds['normalized_text'][0] | 
					
						
						|  |  | 
					
						
						|  | return graph.align_and_graph(sound_path,transcript,lang_aligner) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | bl = gr.Blocks() | 
					
						
						|  |  | 
					
						
						|  | with bl: | 
					
						
						|  |  | 
					
						
						|  | lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Select a language") | 
					
						
						|  |  | 
					
						
						|  | align_func = gr.State() | 
					
						
						|  |  | 
					
						
						|  | with gr.Row(): | 
					
						
						|  |  | 
					
						
						|  | databrowser = gr.DataFrame(wrap=True, max_rows=50, interactive=False, overflow_row_behaviour='paginate') | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | btn1 = gr.Button(value="The random prosody button") | 
					
						
						|  | btn1.style(full_width=False, size="sm") | 
					
						
						|  |  | 
					
						
						|  | pl1 = gr.Plot() | 
					
						
						|  |  | 
					
						
						|  | btn1.click(f1, [lloadr,align_func], pl1) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | lloadr.change(load_lang,lloadr,[databrowser,align_func]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | gr.Markdown( | 
					
						
						|  | """ | 
					
						
						|  | # ABOUT | 
					
						
						|  | This is a work-in-progress demo. | 
					
						
						|  |  | 
					
						
						|  | Icelandic uses the [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr) corpus, and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr). | 
					
						
						|  |  | 
					
						
						|  | After you select a language, a few example sentences from the corpus are displayed. | 
					
						
						|  |  | 
					
						
						|  | Click the button to view time-aligned prosody information for a random sentence - this could be any sentence, not only one of the ones shown above. | 
					
						
						|  |  | 
					
						
						|  | [ABOUT REAPER PITCH TRACKING - TODO] | 
					
						
						|  |  | 
					
						
						|  | [ABOUT RMSE INTENSITY - TODO] | 
					
						
						|  |  | 
					
						
						|  | [ABOUT CTC ALIGNMENT - TODO] | 
					
						
						|  |  | 
					
						
						|  | [email protected] / https://github.com/catiR/ | 
					
						
						|  | """ | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if __name__ == "__main__": | 
					
						
						|  | bl.launch() |