Spaces:
Build error
Build error
fix bug
Browse files- app.py +23 -25
- requirements.txt +1 -1
app.py
CHANGED
@@ -12,7 +12,7 @@ import shutil
|
|
12 |
import yaml
|
13 |
import torchaudio
|
14 |
import gradio as gr
|
15 |
-
from huggingface_hub import snapshot_download
|
16 |
|
17 |
|
18 |
LANGUAGE_CODES = {
|
@@ -38,10 +38,12 @@ LANG_GEN_SETUPS = {
|
|
38 |
}
|
39 |
|
40 |
os.system("git clone https://github.com/ReneeYe/ConST")
|
41 |
-
os.system(
|
42 |
-
os.system(
|
43 |
-
os.system("
|
44 |
-
os.system("
|
|
|
|
|
45 |
os.system("mkdir -p data checkpoint")
|
46 |
|
47 |
|
@@ -52,7 +54,7 @@ def convert_audio_to_16k_wav(audio_input):
|
|
52 |
num_frames = torchaudio.info(audio_input.name).num_frames
|
53 |
filename = audio_input.name.split("/")[-1]
|
54 |
shutil.copy(audio_input.name, f'data/{filename}')
|
55 |
-
return
|
56 |
|
57 |
|
58 |
def prepare_tsv(file_name, n_frame, language, task="ST"):
|
@@ -90,7 +92,7 @@ def get_model(language):
|
|
90 |
|
91 |
|
92 |
def generate(model_path):
|
93 |
-
os.system(f"
|
94 |
--max-tokens 4000000 --max-source-positions 4000000 \
|
95 |
--config-yaml config.yaml --path {model_path} | tee temp.txt")
|
96 |
output = os.popen("grep ^D temp.txt | sort -n -k 2 -t '-' | cut -f 3")
|
@@ -103,22 +105,24 @@ def remove_temp_files():
|
|
103 |
|
104 |
|
105 |
def run(audio_file, language):
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
113 |
|
114 |
|
115 |
-
def
|
116 |
-
|
117 |
-
return f"Hello {language}!!"
|
118 |
|
119 |
|
120 |
inputs = [
|
121 |
-
gr.inputs.Audio(source="microphone", type="
|
122 |
gr.inputs.Dropdown(list(LANGUAGE_CODES.keys()), default="German", label="From English to Languages X..."),
|
123 |
]
|
124 |
|
@@ -133,11 +137,5 @@ iface = gr.Interface(
|
|
133 |
"Its motivation is to use contrastive learning method to learn similar representations for semantically similar speech and text.",
|
134 |
theme="seafoam",
|
135 |
layout='vertical',
|
136 |
-
# analytics_enabled=False,
|
137 |
-
# flagging_dir='results/flagged/',
|
138 |
-
# allow_flagging=True,
|
139 |
-
# flagging_options=['Interesting!', 'Error: Claim Phrase Parsing', 'Error: Local Premise',
|
140 |
-
# 'Error: Require Commonsense', 'Error: Evidence Retrieval'],
|
141 |
-
enable_queue=True
|
142 |
)
|
143 |
-
iface.launch(
|
|
|
12 |
import yaml
|
13 |
import torchaudio
|
14 |
import gradio as gr
|
15 |
+
from huggingface_hub import snapshot_download
|
16 |
|
17 |
|
18 |
LANGUAGE_CODES = {
|
|
|
38 |
}
|
39 |
|
40 |
os.system("git clone https://github.com/ReneeYe/ConST")
|
41 |
+
os.system("mv ConST ConST_git")
|
42 |
+
os.system('mv -n ConST_git/* ./')
|
43 |
+
os.system("rm -rf ConST_git")
|
44 |
+
# os.system("python3 setup.py install")
|
45 |
+
# os.system("python3 setup.py build_ext --inplace")
|
46 |
+
os.system("pip3 install --editable ./")
|
47 |
os.system("mkdir -p data checkpoint")
|
48 |
|
49 |
|
|
|
54 |
num_frames = torchaudio.info(audio_input.name).num_frames
|
55 |
filename = audio_input.name.split("/")[-1]
|
56 |
shutil.copy(audio_input.name, f'data/{filename}')
|
57 |
+
return filename, num_frames
|
58 |
|
59 |
|
60 |
def prepare_tsv(file_name, n_frame, language, task="ST"):
|
|
|
92 |
|
93 |
|
94 |
def generate(model_path):
|
95 |
+
os.system(f"python3 fairseq_cli/generate.py data/ --gen-subset test_case --task speech_to_text --prefix-size 1 \
|
96 |
--max-tokens 4000000 --max-source-positions 4000000 \
|
97 |
--config-yaml config.yaml --path {model_path} | tee temp.txt")
|
98 |
output = os.popen("grep ^D temp.txt | sort -n -k 2 -t '-' | cut -f 3")
|
|
|
105 |
|
106 |
|
107 |
def run(audio_file, language):
|
108 |
+
try:
|
109 |
+
converted_audio_file, n_frame = convert_audio_to_16k_wav(audio_file)
|
110 |
+
prepare_tsv(converted_audio_file, n_frame, language)
|
111 |
+
get_vocab_and_yaml(language)
|
112 |
+
model_path = get_model(language)
|
113 |
+
generated_output = generate(model_path)
|
114 |
+
remove_temp_files()
|
115 |
+
return generated_output
|
116 |
+
except:
|
117 |
+
return error_output(language)
|
118 |
|
119 |
|
120 |
+
def error_output(language):
|
121 |
+
return f"Fail to translate the audio into {language}, you may use the examples I provide."
|
|
|
122 |
|
123 |
|
124 |
inputs = [
|
125 |
+
gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)..."),
|
126 |
gr.inputs.Dropdown(list(LANGUAGE_CODES.keys()), default="German", label="From English to Languages X..."),
|
127 |
]
|
128 |
|
|
|
137 |
"Its motivation is to use contrastive learning method to learn similar representations for semantically similar speech and text.",
|
138 |
theme="seafoam",
|
139 |
layout='vertical',
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
)
|
141 |
+
iface.launch()
|
requirements.txt
CHANGED
@@ -20,5 +20,5 @@ sacrebleu==1.5.1
|
|
20 |
omegaconf==2.0.5
|
21 |
hydra-core==1.0.0
|
22 |
huggingface_hub
|
23 |
-
gradio
|
24 |
torch==1.10.0
|
|
|
20 |
omegaconf==2.0.5
|
21 |
hydra-core==1.0.0
|
22 |
huggingface_hub
|
23 |
+
gradio==2.7.5
|
24 |
torch==1.10.0
|