Spaces:
Runtime error
Runtime error
Vaibhav Srivastav
commited on
Commit
·
8d69919
1
Parent(s):
f6bce7b
for the love of god please work
Browse files
app.py
CHANGED
@@ -7,9 +7,9 @@ from transformers import AutoProcessor, AutoModelForCTC
|
|
7 |
|
8 |
nltk.download("punkt")
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
|
14 |
def load_and_fix_data(input_file):
|
15 |
#read the file
|
@@ -26,7 +26,8 @@ def fix_transcription_casing(input_sentence):
|
|
26 |
sentences = nltk.sent_tokenize(input_sentence)
|
27 |
return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
|
28 |
|
29 |
-
def predict_and_ctc_decode(input_file):
|
|
|
30 |
speech = load_and_fix_data(input_file)
|
31 |
|
32 |
input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
|
@@ -40,7 +41,8 @@ def predict_and_ctc_decode(input_file):
|
|
40 |
|
41 |
return transcribed_text
|
42 |
|
43 |
-
def predict_and_greedy_decode(input_file):
|
|
|
44 |
speech = load_and_fix_data(input_file)
|
45 |
|
46 |
input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
|
@@ -54,14 +56,13 @@ def predict_and_greedy_decode(input_file):
|
|
54 |
return transcribed_text
|
55 |
|
56 |
def return_all_predictions(input_file, model_name):
|
57 |
-
|
58 |
-
return predict_and_ctc_decode(input_file), predict_and_greedy_decode(input_file)
|
59 |
|
60 |
|
61 |
gr.Interface(return_all_predictions,
|
62 |
inputs = [gr.inputs.Audio(source="microphone", type="filepath", label="Record/ Drop audio"), gr.inputs.Dropdown(["facebook/wav2vec2-base-960h", "facebook/hubert-large-ls960-ft"], label="Model Name")],
|
63 |
-
outputs = [gr.outputs.Textbox(label="Beam CTC
|
64 |
-
title="ASR using
|
65 |
-
description = "
|
66 |
layout = "horizontal",
|
67 |
examples = [["test1.wav", "facebook/wav2vec2-base-960h"], ["test2.wav", "facebook/hubert-large-ls960-ft"]], theme="huggingface").launch()
|
|
|
7 |
|
8 |
nltk.download("punkt")
|
9 |
|
10 |
+
|
11 |
+
def return_processor_and_model(model_name):
|
12 |
+
return AutoProcessor.from_pretrained(model_name), AutoModelForCTC.from_pretrained(model_name)
|
13 |
|
14 |
def load_and_fix_data(input_file):
|
15 |
#read the file
|
|
|
26 |
sentences = nltk.sent_tokenize(input_sentence)
|
27 |
return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
|
28 |
|
29 |
+
def predict_and_ctc_decode(input_file, model_name):
|
30 |
+
processor, model = return_processor_and_model(model_name)
|
31 |
speech = load_and_fix_data(input_file)
|
32 |
|
33 |
input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
|
|
|
41 |
|
42 |
return transcribed_text
|
43 |
|
44 |
+
def predict_and_greedy_decode(input_file, model_name):
|
45 |
+
processor, model = return_processor_and_model(model_name)
|
46 |
speech = load_and_fix_data(input_file)
|
47 |
|
48 |
input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
|
|
|
56 |
return transcribed_text
|
57 |
|
58 |
def return_all_predictions(input_file, model_name):
|
59 |
+
return predict_and_ctc_decode(input_file, model_name), predict_and_greedy_decode(input_file, model_name)
|
|
|
60 |
|
61 |
|
62 |
gr.Interface(return_all_predictions,
|
63 |
inputs = [gr.inputs.Audio(source="microphone", type="filepath", label="Record/ Drop audio"), gr.inputs.Dropdown(["facebook/wav2vec2-base-960h", "facebook/hubert-large-ls960-ft"], label="Model Name")],
|
64 |
+
outputs = [gr.outputs.Textbox(label="Beam CTC decoding"), gr.outputs.Textbox(label="Greedy decoding")],
|
65 |
+
title="ASR using Wav2Vec2/ Hubert & pyctcdecode",
|
66 |
+
description = "Comparing Wav2Vec2 & Hubert with Greedy vs Beam Search decoding",
|
67 |
layout = "horizontal",
|
68 |
examples = [["test1.wav", "facebook/wav2vec2-base-960h"], ["test2.wav", "facebook/hubert-large-ls960-ft"]], theme="huggingface").launch()
|
test.wav
DELETED
Binary file (165 kB)
|
|