Spaces:

ReneeYe
/

ConST-speech2text-translator

Build error

App Files Files Community

ReneeYe commited on May 20, 2022

Commit

9f7d061

1 Parent(s): d8aa18e

fix bug

Browse files

Files changed (2) hide show

app.py +23 -25
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import shutil
 import yaml
 import torchaudio
 import gradio as gr
-from huggingface_hub import snapshot_download, hf_hub_url
 LANGUAGE_CODES = {
@@ -38,10 +38,12 @@ LANG_GEN_SETUPS = {
 }
 os.system("git clone https://github.com/ReneeYe/ConST")
-os.system('mv ConST/* ./')
-os.system("rm -rf ConST")
-os.system("sudo python3 setup.py install")
-os.system("sudo python3 setup.py build_ext --inplace")
 os.system("mkdir -p data checkpoint")
@@ -52,7 +54,7 @@ def convert_audio_to_16k_wav(audio_input):
     num_frames = torchaudio.info(audio_input.name).num_frames
     filename = audio_input.name.split("/")[-1]
     shutil.copy(audio_input.name, f'data/{filename}')
-    return f'data/{filename}', num_frames
 def prepare_tsv(file_name, n_frame, language, task="ST"):
@@ -90,7 +92,7 @@ def get_model(language):
 def generate(model_path):
-    os.system(f"fairseq-generate data/ --gen-subset test_case --task speech_to_text --prefix-size 1 \
                 --max-tokens 4000000 --max-source-positions 4000000 \
                 --config-yaml config.yaml  --path {model_path} | tee temp.txt")
     output = os.popen("grep ^D temp.txt | sort -n -k 2 -t '-' | cut -f 3")
@@ -103,22 +105,24 @@ def remove_temp_files():
 def run(audio_file, language):
-    converted_audio_file, n_frame = convert_audio_to_16k_wav(audio_file)
-    prepare_tsv(converted_audio_file, n_frame, language)
-    get_vocab_and_yaml(language)
-    model_path = get_model(language)
-    generated_output = generate(model_path)
-    remove_temp_files()
-    return generated_output
-def greet(audio_file, language):
-    print(audio_file.name)
-    return f"Hello {language}!!"
 inputs = [
-        gr.inputs.Audio(source="microphone", type="file", label="Record something (in English)..."),
         gr.inputs.Dropdown(list(LANGUAGE_CODES.keys()), default="German", label="From English to Languages X..."),
     ]
@@ -133,11 +137,5 @@ iface = gr.Interface(
             "Its motivation is to use contrastive learning method to learn similar representations for semantically similar speech and text.",
     theme="seafoam",
     layout='vertical',
-    # analytics_enabled=False,
-    # flagging_dir='results/flagged/',
-    # allow_flagging=True,
-    # flagging_options=['Interesting!', 'Error: Claim Phrase Parsing', 'Error: Local Premise',
-    #                   'Error: Require Commonsense', 'Error: Evidence Retrieval'],
-    enable_queue=True
 )
-iface.launch(inline=False)

 import yaml
 import torchaudio
 import gradio as gr
+from huggingface_hub import snapshot_download
 LANGUAGE_CODES = {
 }
 os.system("git clone https://github.com/ReneeYe/ConST")
+os.system("mv ConST ConST_git")
+os.system('mv -n ConST_git/* ./')
+os.system("rm -rf ConST_git")
+# os.system("python3 setup.py install")
+# os.system("python3 setup.py build_ext --inplace")
+os.system("pip3 install --editable ./")
 os.system("mkdir -p data checkpoint")
     num_frames = torchaudio.info(audio_input.name).num_frames
     filename = audio_input.name.split("/")[-1]
     shutil.copy(audio_input.name, f'data/{filename}')
+    return filename, num_frames
 def prepare_tsv(file_name, n_frame, language, task="ST"):
 def generate(model_path):
+    os.system(f"python3 fairseq_cli/generate.py data/ --gen-subset test_case --task speech_to_text --prefix-size 1 \
                 --max-tokens 4000000 --max-source-positions 4000000 \
                 --config-yaml config.yaml  --path {model_path} | tee temp.txt")
     output = os.popen("grep ^D temp.txt | sort -n -k 2 -t '-' | cut -f 3")
 def run(audio_file, language):
+    try:
+        converted_audio_file, n_frame = convert_audio_to_16k_wav(audio_file)
+        prepare_tsv(converted_audio_file, n_frame, language)
+        get_vocab_and_yaml(language)
+        model_path = get_model(language)
+        generated_output = generate(model_path)
+        remove_temp_files()
+        return generated_output
+    except:
+        return error_output(language)
+def error_output(language):
+    return f"Fail to translate the audio into {language}, you may use the examples I provide."
 inputs = [
+        gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)..."),
         gr.inputs.Dropdown(list(LANGUAGE_CODES.keys()), default="German", label="From English to Languages X..."),
     ]
             "Its motivation is to use contrastive learning method to learn similar representations for semantically similar speech and text.",
     theme="seafoam",
     layout='vertical',
 )
+iface.launch()

requirements.txt CHANGED Viewed

@@ -20,5 +20,5 @@ sacrebleu==1.5.1
 omegaconf==2.0.5
 hydra-core==1.0.0
 huggingface_hub
-gradio
 torch==1.10.0

 omegaconf==2.0.5
 hydra-core==1.0.0
 huggingface_hub
+gradio==2.7.5
 torch==1.10.0