Obai33 commited on
Commit
f8b4e77
·
verified ·
1 Parent(s): 6c4dd7d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1CuRN-kiD-QDBFlev8vWpV3rVkjiWlaeP
8
+ """
9
+
10
+ import torch
11
+ import torchaudio
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+
14
+ print(device)
15
+
16
+ import IPython
17
+ import matplotlib.pyplot as plt
18
+ from torchaudio.utils import download_asset
19
+
20
+ ctc_preTrained_object = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H
21
+
22
+ model = ctc_preTrained_object.get_model().to(device)
23
+
24
+ !pip install flashlight-text
25
+
26
+ from torchaudio.models.decoder import download_pretrained_files
27
+
28
+ files = download_pretrained_files('librispeech-4-gram')
29
+
30
+ f = open(files.tokens, 'r')
31
+
32
+ from torchaudio.models.decoder import ctc_decoder
33
+
34
+ beam_search_decoder = ctc_decoder(
35
+ lexicon = files.lexicon,
36
+ tokens = files.tokens,
37
+ lm = files.lm,
38
+ nbest = 3,
39
+ beam_size = 3
40
+ )
41
+
42
+ import audio_support_functions as myFunc
43
+
44
+ def theaudio(x):
45
+ waveform, sample_rate = torchaudio.load(x)
46
+ waveform = waveform.to(device)
47
+
48
+ #myFunc.play_audio(waveform.cpu(), sample_rate)
49
+
50
+ waveform = waveform if sample_rate == ctc_preTrained_object.sample_rate else torchaudio.functional.resample(waveform, sample_rate, ctc_preTrained_object.sample_rate)
51
+ with torch.inference_mode():
52
+ pred_tokens, _ = model(waveform)
53
+ #print(pred_tokens.size())
54
+
55
+ pred_tokens = pred_tokens.to('cpu')
56
+ beam_search_result = beam_search_decoder(pred_tokens)
57
+ beam_search_transcript = " ".join(beam_search_result[0][0].words).strip()
58
+ return beam_search_transcript
59
+
60
+ import gradio as gr
61
+ import librosa
62
+
63
+ iface = gr.Interface(
64
+ fn=theaudio,
65
+ inputs=gr.Audio(type="filepath"),
66
+ outputs="text",
67
+ title="Audio Input Example",
68
+ description="Upload an audio file or record one to see its duration."
69
+ )
70
+
71
+ iface.launch()