Spaces:

peproject
/

pronounciationevaluation

Runtime error

App Files Files Community

bel32123 commited on Oct 30, 2023

Commit

1e93f37

1 Parent(s): 2114839

Adjust to use multitask model

Browse files

Files changed (1) hide show

app.py +11 -13

app.py CHANGED Viewed

@@ -3,17 +3,17 @@ from speechbrain.pretrained import GraphemeToPhoneme
 import os
 import torchaudio
 from wav2vecasr.MispronounciationDetector import MispronounciationDetector
-from wav2vecasr.PhonemeASRModel import Wav2Vec2PhonemeASRModel, Wav2Vec2OptimisedPhonemeASRModel
 @st.cache_resource
 def load_model():
-    path = os.path.join(os.getcwd(), "wav2vecasr", "model", "checkpoint-600")
-    asr_model = Wav2Vec2OptimisedPhonemeASRModel(path, os.path.join(path, "wav2vec2_vocab_final.json"),
-                                                         os.path.join(os.getcwd(), "wav2vecasr", "pretrained_models",
-                                                                      "en-kenlm-model", "en.arpa.bin"))
     g2p = GraphemeToPhoneme.from_hparams("speechbrain/soundchoice-g2p")
-    mispronounciation_detector = MispronounciationDetector(asr_model, g2p, "cpu")
     return mispronounciation_detector
@@ -55,12 +55,10 @@ def mispronounciation_detection_section():
             # start prediction
             st.write('# Detection Results')
             with st.spinner('Predicting...'):
-                raw_info = mispronunciation_detector.detect(audio, text)
                 st.write('#### Phoneme Level Analysis')
                 st.write(f"Phoneme Error Rate: {round(raw_info['per'],2)}")
-                # enable horizontal scrolling for phoneme output
-                #st.text_area(label="Aligned phoneme outputs", value=raw_info['phoneme_output'],height=150)
                 st.markdown(
                 f"""
                 <style>
@@ -69,9 +67,9 @@ def mispronounciation_detection_section():
                 }}
                 </style>
                 ```
-                {" ".join(raw_info['ref'])}
-                {" ".join(raw_info['hyp'])}
-                {" ".join(raw_info['phoneme_errors'])}
                 ```
                 """,
                     unsafe_allow_html=True,

 import os
 import torchaudio
 from wav2vecasr.MispronounciationDetector import MispronounciationDetector
+from wav2vecasr.PhonemeASRModel import Wav2Vec2PhonemeASRModel, Wav2Vec2OptimisedPhonemeASRModel, MultitaskPhonemeASRModel
+import torch
 @st.cache_resource
 def load_model():
+    path = os.path.join(os.getcwd(), "wav2vecasr", "model", "multitask_best_ctc.pt")
+    vocab_path = os.path.join(os.getcwd(), "wav2vecasr", "model", "vocab")
+    device = "cpu"
+    asr_model = MultitaskPhonemeASRModel(path, vocab_path, device)
     g2p = GraphemeToPhoneme.from_hparams("speechbrain/soundchoice-g2p")
+    mispronounciation_detector = MispronounciationDetector(asr_model, g2p, device)
     return mispronounciation_detector
             # start prediction
             st.write('# Detection Results')
             with st.spinner('Predicting...'):
+                raw_info = mispronunciation_detector.detect(audio, text, phoneme_error_threshold=0.25)
                 st.write('#### Phoneme Level Analysis')
                 st.write(f"Phoneme Error Rate: {round(raw_info['per'],2)}")
                 st.markdown(
                 f"""
                 <style>
                 }}
                 </style>
                 ```
+                {raw_info['ref']}
+                {raw_info['hyp']}
+                {raw_info['phoneme_errors']}
                 ```
                 """,
                     unsafe_allow_html=True,