Spaces:

GroveStreet
/

GTA_SOVITS

Running

Katock commited on Aug 9, 2023

Commit

054c3da

1 Parent(s): 5efa479

debug

Files changed (2) hide show

app.py CHANGED Viewed

@@ -90,7 +90,7 @@ if __name__ == '__main__':
                 with gr.TabItem(name):
                     with gr.Row():
                         with gr.Column():
-                            vc_input = gr.Audio(label="上传干声" + ' (小于 20 秒)' if limitation else '')
                             vc_transform = gr.Number(label="音高调整 (支持正负半音，12为一个八度)", value=0)
                             auto_f0 = gr.Checkbox(label="自动音高预测 (正常说话可选)", value=False)
                             f0_predictor = gr.Radio(label="f0预测器 (对电音有影响)",

                 with gr.TabItem(name):
                     with gr.Row():
                         with gr.Column():
+                            vc_input = gr.Audio(label="上传干声 (已支持长音频)" if limitation else '')
                             vc_transform = gr.Number(label="音高调整 (支持正负半音，12为一个八度)", value=0)
                             auto_f0 = gr.Checkbox(label="自动音高预测 (正常说话可选)", value=False)
                             f0_predictor = gr.Radio(label="f0预测器 (对电音有影响)",

inference/slicer.py CHANGED Viewed

@@ -118,8 +118,7 @@ class Slicer:
 def cut(input_audio, db_thresh=-30, min_len=5000):
-    # audio, sr = librosa.load(input_audio, sr=None)
-    sr, audio = input_audio
     slicer = Slicer(
         sr=sr,
         threshold=db_thresh,
@@ -129,9 +128,9 @@ def cut(input_audio, db_thresh=-30, min_len=5000):
     return chunks
-def chunks2audio(input_audio, chunks):
     chunks = dict(chunks)
-    sr, audio = input_audio
     if len(audio.shape) == 2 and audio.shape[1] >= 2:
         audio = torch.mean(audio, dim=0).unsqueeze(0)
     audio = audio.cpu().numpy()[0]

 def cut(input_audio, db_thresh=-30, min_len=5000):
+    audio, sr = librosa.load(input_audio, sr=None)
     slicer = Slicer(
         sr=sr,
         threshold=db_thresh,
     return chunks
+def chunks2audio(audio_path, chunks):
     chunks = dict(chunks)
+    audio, sr = torchaudio.load(audio_path)
     if len(audio.shape) == 2 and audio.shape[1] >= 2:
         audio = torch.mean(audio, dim=0).unsqueeze(0)
     audio = audio.cpu().numpy()[0]