Spaces:
Runtime error
Runtime error
Hugo Flores Garcia
commited on
Commit
·
e4e3c4e
1
Parent(s):
a63cce0
c2f
Browse files- requirements.txt +1 -1
- scripts/exp/train.py +24 -0
- scripts/utils/parallel-gpu.sh +23 -0
- scripts/utils/process_folder-c2f.py +28 -16
requirements.txt
CHANGED
|
@@ -2,7 +2,7 @@ argbind>=0.3.1
|
|
| 2 |
pytorch-ignite
|
| 3 |
rich
|
| 4 |
audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
|
| 5 |
-
lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@
|
| 6 |
tqdm
|
| 7 |
tensorboard
|
| 8 |
google-cloud-logging==2.2.0
|
|
|
|
| 2 |
pytorch-ignite
|
| 3 |
rich
|
| 4 |
audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
|
| 5 |
+
lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@hf/vampnet-temp
|
| 6 |
tqdm
|
| 7 |
tensorboard
|
| 8 |
google-cloud-logging==2.2.0
|
scripts/exp/train.py
CHANGED
|
@@ -545,6 +545,30 @@ def train(
|
|
| 545 |
plot_fn=None,
|
| 546 |
)
|
| 547 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
def save_imputation(self, z: torch.Tensor):
|
| 549 |
# imputations
|
| 550 |
_prefix_amt = prefix_amt
|
|
|
|
| 545 |
plot_fn=None,
|
| 546 |
)
|
| 547 |
|
| 548 |
+
# sample in 1 step (only for coarse2fine models)
|
| 549 |
+
if accel.unwrap(model).n_conditioning_codebooks > 0:
|
| 550 |
+
sampled_argmax = accel.unwrap(model).sample(
|
| 551 |
+
codec=codec,
|
| 552 |
+
time_steps=z.shape[-1],
|
| 553 |
+
start_tokens=z[i : i + 1],
|
| 554 |
+
sample="argmax",
|
| 555 |
+
sampling_steps=1,
|
| 556 |
+
)
|
| 557 |
+
sampled_argmax.cpu().write_audio_to_tb(
|
| 558 |
+
f"sampled_1step-argmax/{i}",
|
| 559 |
+
self.writer,
|
| 560 |
+
step=self.state.epoch,
|
| 561 |
+
plot_fn=None,
|
| 562 |
+
)
|
| 563 |
+
conditioning = z[i:i+1, : accel.unwrap(model).n_conditioning_codebooks, :]
|
| 564 |
+
conditioning = accel.unwrap(model).to_signal(conditioning, codec)
|
| 565 |
+
conditioning.cpu().write_audio_to_tb(
|
| 566 |
+
f"conditioning/{i}",
|
| 567 |
+
self.writer,
|
| 568 |
+
step=self.state.epoch,
|
| 569 |
+
plot_fn=None,
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
def save_imputation(self, z: torch.Tensor):
|
| 573 |
# imputations
|
| 574 |
_prefix_amt = prefix_amt
|
scripts/utils/parallel-gpu.sh
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Get the command to execute from the user
|
| 4 |
+
command_to_execute="$1"
|
| 5 |
+
|
| 6 |
+
# Get the maximum number of GPUs to use from the user
|
| 7 |
+
max_gpus="$2"
|
| 8 |
+
|
| 9 |
+
# Get the number of instances to start per GPU from the user
|
| 10 |
+
instances_per_gpu="$3"
|
| 11 |
+
|
| 12 |
+
# Set the CUDA_VISIBLE_DEVICES flag for each GPU
|
| 13 |
+
for gpu_id in $(seq 0 $(($max_gpus - 1))); do
|
| 14 |
+
export CUDA_VISIBLE_DEVICES="$gpu_id"
|
| 15 |
+
# Start the specified number of instances for this GPU
|
| 16 |
+
for i in $(seq 1 "$instances_per_gpu"); do
|
| 17 |
+
# Run the command in the background
|
| 18 |
+
$command_to_execute &
|
| 19 |
+
done
|
| 20 |
+
done
|
| 21 |
+
|
| 22 |
+
# Wait for all instances to finish
|
| 23 |
+
wait
|
scripts/utils/process_folder-c2f.py
CHANGED
|
@@ -6,6 +6,8 @@ import argbind
|
|
| 6 |
from tqdm import tqdm
|
| 7 |
import random
|
| 8 |
|
|
|
|
|
|
|
| 9 |
from collections import defaultdict
|
| 10 |
|
| 11 |
def coarse2fine_infer(
|
|
@@ -15,14 +17,15 @@ def coarse2fine_infer(
|
|
| 15 |
device,
|
| 16 |
signal_window=3,
|
| 17 |
signal_hop=1.5,
|
| 18 |
-
max_excerpts=
|
| 19 |
):
|
| 20 |
output = defaultdict(list)
|
| 21 |
|
| 22 |
# split into 3 seconds
|
| 23 |
windows = [s for s in signal.clone().windows(signal_window, signal_hop)]
|
|
|
|
| 24 |
random.shuffle(windows)
|
| 25 |
-
for w in windows[
|
| 26 |
# batch the signal into chunks of 3
|
| 27 |
with torch.no_grad():
|
| 28 |
# get codes
|
|
@@ -68,20 +71,21 @@ def coarse2fine_infer(
|
|
| 68 |
@argbind.bind(without_prefix=True)
|
| 69 |
def main(
|
| 70 |
sources=[
|
| 71 |
-
"/
|
| 72 |
],
|
| 73 |
audio_ext="mp3",
|
| 74 |
exp_name="noise_mode",
|
| 75 |
model_paths=[
|
| 76 |
-
"ckpt/mask/best/vampnet/weights.pth",
|
| 77 |
-
"ckpt/random/best/vampnet/weights.pth",
|
| 78 |
],
|
| 79 |
model_keys=[
|
| 80 |
-
"
|
| 81 |
-
"
|
| 82 |
],
|
| 83 |
-
vqvae_path="ckpt/
|
| 84 |
-
device="cuda",
|
|
|
|
| 85 |
):
|
| 86 |
from vampnet.modules.transformer import VampNet
|
| 87 |
from lac.model.lac import LAC
|
|
@@ -99,20 +103,28 @@ def main(
|
|
| 99 |
vqvae.eval()
|
| 100 |
print("Loaded VQVAE.")
|
| 101 |
|
| 102 |
-
|
|
|
|
| 103 |
for source in sources:
|
| 104 |
print(f"Processing {source}...")
|
| 105 |
-
|
|
|
|
|
|
|
| 106 |
sig = AudioSignal(path)
|
| 107 |
sig.resample(vqvae.sample_rate).normalize(-24).ensure_max_of_audio(1.0)
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
for model_key, model in models.items():
|
| 110 |
out = coarse2fine_infer(sig, model, vqvae, device)
|
| 111 |
-
for k in out:
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
|
| 117 |
if __name__ == "__main__":
|
| 118 |
args = argbind.parse_args()
|
|
|
|
| 6 |
from tqdm import tqdm
|
| 7 |
import random
|
| 8 |
|
| 9 |
+
from typing import List
|
| 10 |
+
|
| 11 |
from collections import defaultdict
|
| 12 |
|
| 13 |
def coarse2fine_infer(
|
|
|
|
| 17 |
device,
|
| 18 |
signal_window=3,
|
| 19 |
signal_hop=1.5,
|
| 20 |
+
max_excerpts=20,
|
| 21 |
):
|
| 22 |
output = defaultdict(list)
|
| 23 |
|
| 24 |
# split into 3 seconds
|
| 25 |
windows = [s for s in signal.clone().windows(signal_window, signal_hop)]
|
| 26 |
+
windows = windows[1:] # skip first window since it's half zero padded
|
| 27 |
random.shuffle(windows)
|
| 28 |
+
for w in windows[:max_excerpts]:
|
| 29 |
# batch the signal into chunks of 3
|
| 30 |
with torch.no_grad():
|
| 31 |
# get codes
|
|
|
|
| 71 |
@argbind.bind(without_prefix=True)
|
| 72 |
def main(
|
| 73 |
sources=[
|
| 74 |
+
"/data/spotdl/audio/val", "/data/spotdl/audio/test"
|
| 75 |
],
|
| 76 |
audio_ext="mp3",
|
| 77 |
exp_name="noise_mode",
|
| 78 |
model_paths=[
|
| 79 |
+
"runs/c2f-exp-03.22.23/ckpt/mask/best/vampnet/weights.pth",
|
| 80 |
+
"runs/c2f-exp-03.22.23/ckpt/random/best/vampnet/weights.pth",
|
| 81 |
],
|
| 82 |
model_keys=[
|
| 83 |
+
"mask",
|
| 84 |
+
"random",
|
| 85 |
],
|
| 86 |
+
vqvae_path: str = "runs/codec-ckpt/codec.pth",
|
| 87 |
+
device: str = "cuda",
|
| 88 |
+
output_dir: str = ".",
|
| 89 |
):
|
| 90 |
from vampnet.modules.transformer import VampNet
|
| 91 |
from lac.model.lac import LAC
|
|
|
|
| 103 |
vqvae.eval()
|
| 104 |
print("Loaded VQVAE.")
|
| 105 |
|
| 106 |
+
output_dir = Path(output_dir) / f"{exp_name}-samples"
|
| 107 |
+
|
| 108 |
for source in sources:
|
| 109 |
print(f"Processing {source}...")
|
| 110 |
+
source_files = list(Path(source).glob(f"**/*.{audio_ext}"))
|
| 111 |
+
random.shuffle(source_files)
|
| 112 |
+
for path in tqdm(source_files):
|
| 113 |
sig = AudioSignal(path)
|
| 114 |
sig.resample(vqvae.sample_rate).normalize(-24).ensure_max_of_audio(1.0)
|
| 115 |
|
| 116 |
+
out_dir = output_dir / path.stem
|
| 117 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 118 |
+
if out_dir.exists():
|
| 119 |
+
print(f"Skipping {path.stem} since {out_dir} already exists.")
|
| 120 |
+
continue
|
| 121 |
+
|
| 122 |
for model_key, model in models.items():
|
| 123 |
out = coarse2fine_infer(sig, model, vqvae, device)
|
| 124 |
+
for k, sig_list in out.items():
|
| 125 |
+
for i, s in enumerate(sig_list):
|
| 126 |
+
s.write(out_dir / f"{model_key}-{k}-{i}.wav")
|
| 127 |
+
|
|
|
|
| 128 |
|
| 129 |
if __name__ == "__main__":
|
| 130 |
args = argbind.parse_args()
|