update
Browse files
main.py
CHANGED
@@ -10,12 +10,15 @@ import shutil
|
|
10 |
import tempfile
|
11 |
import time
|
12 |
from typing import Dict, Tuple
|
|
|
13 |
import zipfile
|
14 |
|
15 |
import gradio as gr
|
|
|
16 |
from huggingface_hub import snapshot_download
|
17 |
import matplotlib.pyplot as plt
|
18 |
import numpy as np
|
|
|
19 |
|
20 |
import log
|
21 |
from project_settings import environment, project_path, log_directory, time_zone_info
|
@@ -63,6 +66,28 @@ def get_args():
|
|
63 |
return args
|
64 |
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
def shell(cmd: str):
|
67 |
return Command.popen(cmd)
|
68 |
|
@@ -113,6 +138,10 @@ def when_click_vad_button(audio_file_t = None, audio_microphone_t = None,
|
|
113 |
audio_t: Tuple = audio_file_t or audio_microphone_t
|
114 |
|
115 |
sample_rate, signal = audio_t
|
|
|
|
|
|
|
|
|
116 |
audio_duration = signal.shape[-1] // sample_rate
|
117 |
audio = np.array(signal / (1 << 15), dtype=np.float32)
|
118 |
|
|
|
10 |
import tempfile
|
11 |
import time
|
12 |
from typing import Dict, Tuple
|
13 |
+
import uuid
|
14 |
import zipfile
|
15 |
|
16 |
import gradio as gr
|
17 |
+
import librosa
|
18 |
from huggingface_hub import snapshot_download
|
19 |
import matplotlib.pyplot as plt
|
20 |
import numpy as np
|
21 |
+
from scipy.io import wavfile
|
22 |
|
23 |
import log
|
24 |
from project_settings import environment, project_path, log_directory, time_zone_info
|
|
|
66 |
return args
|
67 |
|
68 |
|
69 |
+
def save_input_audio(sample_rate: int, signal: np.ndarray) -> str:
|
70 |
+
if signal.dtype != np.int16:
|
71 |
+
raise AssertionError(f"only support dtype np.int16, however: {signal.dtype}")
|
72 |
+
temp_audio_dir = Path(tempfile.gettempdir()) / "input_audio"
|
73 |
+
temp_audio_dir.mkdir(parents=True, exist_ok=True)
|
74 |
+
filename = temp_audio_dir / f"{uuid.uuid4()}.wav"
|
75 |
+
filename = filename.as_posix()
|
76 |
+
wavfile.write(
|
77 |
+
filename,
|
78 |
+
sample_rate, signal
|
79 |
+
)
|
80 |
+
return filename
|
81 |
+
|
82 |
+
|
83 |
+
def convert_sample_rate(signal: np.ndarray, sample_rate: int, target_sample_rate: int):
|
84 |
+
filename = save_input_audio(sample_rate, signal)
|
85 |
+
|
86 |
+
signal, _ = librosa.load(filename, sr=target_sample_rate)
|
87 |
+
signal = np.array(signal * (1 << 15), dtype=np.int16)
|
88 |
+
return signal
|
89 |
+
|
90 |
+
|
91 |
def shell(cmd: str):
|
92 |
return Command.popen(cmd)
|
93 |
|
|
|
138 |
audio_t: Tuple = audio_file_t or audio_microphone_t
|
139 |
|
140 |
sample_rate, signal = audio_t
|
141 |
+
if sample_rate != 8000:
|
142 |
+
signal = convert_sample_rate(signal, sample_rate, 8000)
|
143 |
+
sample_rate = 8000
|
144 |
+
|
145 |
audio_duration = signal.shape[-1] // sample_rate
|
146 |
audio = np.array(signal / (1 << 15), dtype=np.float32)
|
147 |
|