ReneeYe commited on
Commit
133436c
ยท
1 Parent(s): a59ed34
Files changed (1) hide show
  1. app.py +28 -15
app.py CHANGED
@@ -8,9 +8,10 @@
8
  """
9
 
10
  import os
 
11
  import shutil
12
  import yaml
13
- import torchaudio
14
  import gradio as gr
15
  from huggingface_hub import snapshot_download
16
 
@@ -50,10 +51,21 @@ os.system("mkdir -p data checkpoint")
50
  huggingface_model_dir = snapshot_download(repo_id="ReneeYe/ConST_en2x_models")
51
  print(huggingface_model_dir)
52
 
 
53
  def convert_audio_to_16k_wav(audio_input):
54
- num_frames = torchaudio.info(audio_input.name).num_frames
55
- filename = audio_input.name.split("/")[-1]
56
- shutil.copy(audio_input.name, f'data/{filename}')
 
 
 
 
 
 
 
 
 
 
57
  return filename, num_frames
58
 
59
 
@@ -105,16 +117,17 @@ def remove_temp_files():
105
 
106
 
107
  def run(audio_file, language):
108
- # try:
109
- converted_audio_file, n_frame = convert_audio_to_16k_wav(audio_file)
110
- prepare_tsv(converted_audio_file, n_frame, language)
111
- get_vocab_and_yaml(language)
112
- model_path = get_model(language)
113
- generated_output = generate(model_path)
114
- remove_temp_files()
115
- return generated_output
116
- # except:
117
- # return error_output(language)
 
118
 
119
 
120
  def error_output(language):
@@ -138,4 +151,4 @@ iface = gr.Interface(
138
  theme="seafoam",
139
  layout='vertical',
140
  )
141
- iface.launch()
 
8
  """
9
 
10
  import os
11
+ import traceback
12
  import shutil
13
  import yaml
14
+ from pydub import AudioSegment
15
  import gradio as gr
16
  from huggingface_hub import snapshot_download
17
 
 
51
  huggingface_model_dir = snapshot_download(repo_id="ReneeYe/ConST_en2x_models")
52
  print(huggingface_model_dir)
53
 
54
+
55
  def convert_audio_to_16k_wav(audio_input):
56
+ sound = AudioSegment.from_file(audio_input)
57
+ sample_rate = sound.frame_rate
58
+ num_channels = sound.channels
59
+ num_frames = int(sound.frame_count())
60
+ filename = audio_input.split("/")[-1]
61
+ if (num_channels > 1) or (sample_rate != 16000): # convert to mono-channel 16k wav
62
+ sound = sound.set_channels(1)
63
+ sound = sound.set_frame_rate(16000)
64
+ num_frames = int(sound.frame_count())
65
+ filename = filename.replace(".wav", "") + "_16k.wav"
66
+ sound.export(f"data/{filename}", format="wav")
67
+ else:
68
+ shutil.copy(audio_input, f'data/{filename}')
69
  return filename, num_frames
70
 
71
 
 
117
 
118
 
119
  def run(audio_file, language):
120
+ try:
121
+ converted_audio_file, n_frame = convert_audio_to_16k_wav(audio_file)
122
+ prepare_tsv(converted_audio_file, n_frame, language)
123
+ get_vocab_and_yaml(language)
124
+ model_path = get_model(language)
125
+ generated_output = generate(model_path)
126
+ remove_temp_files()
127
+ return generated_output
128
+ except:
129
+ traceback.print_exc()
130
+ return error_output(language)
131
 
132
 
133
  def error_output(language):
 
151
  theme="seafoam",
152
  layout='vertical',
153
  )
154
+ iface.launch(share=True)