Spaces:
Running
Running
Katock
commited on
Commit
·
9e75f13
1
Parent(s):
185cd8d
debug
Browse files- app.py +16 -18
- inference/slicer.py +1 -1
app.py
CHANGED
@@ -6,8 +6,6 @@ import gradio as gr
|
|
6 |
import gradio.processing_utils as gr_processing_utils
|
7 |
import librosa
|
8 |
import numpy as np
|
9 |
-
import soundfile
|
10 |
-
import torch
|
11 |
|
12 |
from inference.infer_tool import Svc
|
13 |
|
@@ -18,17 +16,17 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
|
18 |
|
19 |
limitation = os.getenv("SYSTEM") == "spaces" # limit audio length in huggingface spaces
|
20 |
|
21 |
-
audio_postprocess_ori = gr.Audio.postprocess
|
22 |
|
23 |
|
24 |
-
def audio_postprocess(self, y):
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
gr.Audio.postprocess = audio_postprocess
|
32 |
|
33 |
|
34 |
def create_vc_fn(model, spk):
|
@@ -40,13 +38,13 @@ def create_vc_fn(model, spk):
|
|
40 |
if duration > 20 and limitation:
|
41 |
return "请上传小于20秒的音频,或点击右上角裁剪", None
|
42 |
print("audio1: ", audio)
|
43 |
-
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
44 |
-
if len(audio.shape) > 1:
|
45 |
-
|
46 |
-
if sampling_rate != 16000:
|
47 |
-
|
48 |
-
print("audio2: ", audio)
|
49 |
-
input_audio = sampling_rate, audio
|
50 |
|
51 |
# raw_path = io.BytesIO()
|
52 |
# soundfile.write(raw_path, audio, sampling_rate, format="wav")
|
|
|
6 |
import gradio.processing_utils as gr_processing_utils
|
7 |
import librosa
|
8 |
import numpy as np
|
|
|
|
|
9 |
|
10 |
from inference.infer_tool import Svc
|
11 |
|
|
|
16 |
|
17 |
limitation = os.getenv("SYSTEM") == "spaces" # limit audio length in huggingface spaces
|
18 |
|
19 |
+
# audio_postprocess_ori = gr.Audio.postprocess
|
20 |
|
21 |
|
22 |
+
# def audio_postprocess(self, y):
|
23 |
+
# data = audio_postprocess_ori(self, y)
|
24 |
+
# if data is None:
|
25 |
+
# return None
|
26 |
+
# return gr_processing_utils.encode_url_or_file_to_base64(data["name"])
|
27 |
+
#
|
28 |
+
#
|
29 |
+
# gr.Audio.postprocess = audio_postprocess
|
30 |
|
31 |
|
32 |
def create_vc_fn(model, spk):
|
|
|
38 |
if duration > 20 and limitation:
|
39 |
return "请上传小于20秒的音频,或点击右上角裁剪", None
|
40 |
print("audio1: ", audio)
|
41 |
+
# audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
42 |
+
# if len(audio.shape) > 1:
|
43 |
+
# audio = librosa.to_mono(audio.transpose(1, 0))
|
44 |
+
# if sampling_rate != 16000:
|
45 |
+
# audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
46 |
+
# print("audio2: ", audio)
|
47 |
+
# input_audio = sampling_rate, audio
|
48 |
|
49 |
# raw_path = io.BytesIO()
|
50 |
# soundfile.write(raw_path, audio, sampling_rate, format="wav")
|
inference/slicer.py
CHANGED
@@ -134,7 +134,7 @@ def chunks2audio(input_audio, chunks):
|
|
134 |
sr, audio = input_audio
|
135 |
if len(audio.shape) == 2 and audio.shape[1] >= 2:
|
136 |
audio = torch.mean(audio, dim=0).unsqueeze(0)
|
137 |
-
|
138 |
result = []
|
139 |
for k, v in chunks.items():
|
140 |
tag = v["split_time"].split(",")
|
|
|
134 |
sr, audio = input_audio
|
135 |
if len(audio.shape) == 2 and audio.shape[1] >= 2:
|
136 |
audio = torch.mean(audio, dim=0).unsqueeze(0)
|
137 |
+
audio = audio.cpu().numpy()[0]
|
138 |
result = []
|
139 |
for k, v in chunks.items():
|
140 |
tag = v["split_time"].split(",")
|