Spaces:
Running
on
Zero
Running
on
Zero
swap to gradio 4.44 & add adaptive duration
Browse files
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: 🎧
|
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 4.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: cc-by-sa-4.0
|
|
@@ -12,4 +12,4 @@ short_description: Edit audios with text prompts
|
|
| 12 |
---
|
| 13 |
|
| 14 |
The 30-second limit was introduced to ensure that queue wait times remain reasonable, especially when there are a lot of users.
|
| 15 |
-
For that reason pull-requests that change this limit will not be merged. Please clone or duplicate the space to work locally without limits.
|
|
|
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: cc-by-sa-4.0
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
The 30-second limit was introduced to ensure that queue wait times remain reasonable, especially when there are a lot of users.
|
| 15 |
+
For that reason pull-requests that change this limit will not be merged. Please clone or duplicate the space to work locally without limits.
|
app.py
CHANGED
|
@@ -73,7 +73,31 @@ def sample(ldm_stable, zs, wts, steps, prompt_tar, tstart, cfg_scale_tar): # ,
|
|
| 73 |
|
| 74 |
return (16000, audio.squeeze().cpu().numpy())
|
| 75 |
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
def edit(
|
| 78 |
# cache_dir,
|
| 79 |
input_audio,
|
|
|
|
| 73 |
|
| 74 |
return (16000, audio.squeeze().cpu().numpy())
|
| 75 |
|
| 76 |
+
def get_duration(input_audio, model_id: str, do_inversion: bool,
|
| 77 |
+
wts: Optional[torch.Tensor], zs: Optional[torch.Tensor],
|
| 78 |
+
saved_inv_model: str, source_prompt="", target_prompt="",
|
| 79 |
+
steps=200, cfg_scale_src=3.5, cfg_scale_tar=12, t_start=45, randomize_seed=True):
|
| 80 |
+
if model_id == LDM2:
|
| 81 |
+
factor = 0.8
|
| 82 |
+
elif model_id == LDM2_LARGE:
|
| 83 |
+
factor = 1.5
|
| 84 |
+
else: # MUSIC
|
| 85 |
+
factor = 1
|
| 86 |
+
|
| 87 |
+
mult = 0
|
| 88 |
+
if do_inversion or randomize_seed:
|
| 89 |
+
mult = steps
|
| 90 |
+
|
| 91 |
+
if input_audio is None:
|
| 92 |
+
raise gr.Error('Input audio missing!')
|
| 93 |
+
duration = min(utils.get_duration(input_audio), 30)
|
| 94 |
+
|
| 95 |
+
time_per_iter_of_full = factor * ((t_start /100 * steps)*2 + mult) * 0.2
|
| 96 |
+
print('expected time:', time_per_iter_of_full / 30 * duration)
|
| 97 |
+
return time_per_iter_of_full / 30 * duration
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
@spaces.GPU(duration=get_duration)
|
| 101 |
def edit(
|
| 102 |
# cache_dir,
|
| 103 |
input_audio,
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
torch
|
|
|
|
| 2 |
torchaudio
|
| 3 |
diffusers
|
| 4 |
accelerate
|
|
|
|
| 1 |
torch
|
| 2 |
+
numpy<2
|
| 3 |
torchaudio
|
| 4 |
diffusers
|
| 5 |
accelerate
|
utils.py
CHANGED
|
@@ -2,6 +2,7 @@ import numpy as np
|
|
| 2 |
import torch
|
| 3 |
from typing import Optional, List, Tuple, NamedTuple, Union
|
| 4 |
from models import PipelineWrapper
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
class PromptEmbeddings(NamedTuple):
|
|
@@ -16,7 +17,7 @@ def load_audio(audio_path: Union[str, np.array], fn_STFT, left: int = 0, right:
|
|
| 16 |
import audioldm
|
| 17 |
import audioldm.audio
|
| 18 |
|
| 19 |
-
duration = min(
|
| 20 |
|
| 21 |
mel, _, _ = audioldm.audio.wav_to_fbank(audio_path, target_length=int(duration * 102.4), fn_STFT=fn_STFT)
|
| 22 |
mel = mel.unsqueeze(0)
|
|
|
|
| 2 |
import torch
|
| 3 |
from typing import Optional, List, Tuple, NamedTuple, Union
|
| 4 |
from models import PipelineWrapper
|
| 5 |
+
from audioldm.utils import get_duration
|
| 6 |
|
| 7 |
|
| 8 |
class PromptEmbeddings(NamedTuple):
|
|
|
|
| 17 |
import audioldm
|
| 18 |
import audioldm.audio
|
| 19 |
|
| 20 |
+
duration = min(get_duration(audio_path), 30)
|
| 21 |
|
| 22 |
mel, _, _ = audioldm.audio.wav_to_fbank(audio_path, target_length=int(duration * 102.4), fn_STFT=fn_STFT)
|
| 23 |
mel = mel.unsqueeze(0)
|