Spaces:
Running
Running
Create my_utils.py
Browse files- GPT_SoVITS/my_utils.py +21 -0
GPT_SoVITS/my_utils.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ffmpeg
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def load_audio(file, sr):
|
| 6 |
+
try:
|
| 7 |
+
# https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
|
| 8 |
+
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
|
| 9 |
+
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
|
| 10 |
+
file = (
|
| 11 |
+
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
| 12 |
+
) # 防止小白拷路径头尾带了空格和"和回车
|
| 13 |
+
out, _ = (
|
| 14 |
+
ffmpeg.input(file, threads=0)
|
| 15 |
+
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
| 16 |
+
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
| 17 |
+
)
|
| 18 |
+
except Exception as e:
|
| 19 |
+
raise RuntimeError(f"Failed to load audio: {e}")
|
| 20 |
+
|
| 21 |
+
return np.frombuffer(out, np.float32).flatten()
|