HoneyTian commited on
Commit
5dd7349
·
1 Parent(s): bebc2b8
Files changed (1) hide show
  1. main.py +29 -0
main.py CHANGED
@@ -10,12 +10,15 @@ import shutil
10
  import tempfile
11
  import time
12
  from typing import Dict, Tuple
 
13
  import zipfile
14
 
15
  import gradio as gr
 
16
  from huggingface_hub import snapshot_download
17
  import matplotlib.pyplot as plt
18
  import numpy as np
 
19
 
20
  import log
21
  from project_settings import environment, project_path, log_directory, time_zone_info
@@ -63,6 +66,28 @@ def get_args():
63
  return args
64
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def shell(cmd: str):
67
  return Command.popen(cmd)
68
 
@@ -113,6 +138,10 @@ def when_click_vad_button(audio_file_t = None, audio_microphone_t = None,
113
  audio_t: Tuple = audio_file_t or audio_microphone_t
114
 
115
  sample_rate, signal = audio_t
 
 
 
 
116
  audio_duration = signal.shape[-1] // sample_rate
117
  audio = np.array(signal / (1 << 15), dtype=np.float32)
118
 
 
10
  import tempfile
11
  import time
12
  from typing import Dict, Tuple
13
+ import uuid
14
  import zipfile
15
 
16
  import gradio as gr
17
+ import librosa
18
  from huggingface_hub import snapshot_download
19
  import matplotlib.pyplot as plt
20
  import numpy as np
21
+ from scipy.io import wavfile
22
 
23
  import log
24
  from project_settings import environment, project_path, log_directory, time_zone_info
 
66
  return args
67
 
68
 
69
+ def save_input_audio(sample_rate: int, signal: np.ndarray) -> str:
70
+ if signal.dtype != np.int16:
71
+ raise AssertionError(f"only support dtype np.int16, however: {signal.dtype}")
72
+ temp_audio_dir = Path(tempfile.gettempdir()) / "input_audio"
73
+ temp_audio_dir.mkdir(parents=True, exist_ok=True)
74
+ filename = temp_audio_dir / f"{uuid.uuid4()}.wav"
75
+ filename = filename.as_posix()
76
+ wavfile.write(
77
+ filename,
78
+ sample_rate, signal
79
+ )
80
+ return filename
81
+
82
+
83
+ def convert_sample_rate(signal: np.ndarray, sample_rate: int, target_sample_rate: int):
84
+ filename = save_input_audio(sample_rate, signal)
85
+
86
+ signal, _ = librosa.load(filename, sr=target_sample_rate)
87
+ signal = np.array(signal * (1 << 15), dtype=np.int16)
88
+ return signal
89
+
90
+
91
  def shell(cmd: str):
92
  return Command.popen(cmd)
93
 
 
138
  audio_t: Tuple = audio_file_t or audio_microphone_t
139
 
140
  sample_rate, signal = audio_t
141
+ if sample_rate != 8000:
142
+ signal = convert_sample_rate(signal, sample_rate, 8000)
143
+ sample_rate = 8000
144
+
145
  audio_duration = signal.shape[-1] // sample_rate
146
  audio = np.array(signal / (1 << 15), dtype=np.float32)
147