RemFx

Sleeping

App Files Files Community

mattricesound commited on Feb 14, 2023

Commit

1b89540

2 Parent(s): 898adbc 733f656

Merge pull request #22 from mhrice/random-fx-dataset

Browse files

Files changed (9) hide show

config.yaml +1 -3
config_guitfx.yaml +52 -0
exp/demucs.yaml +6 -3
exp/umx.yaml +6 -1
remfx/datasets.py +115 -22
remfx/effects.py +698 -0
remfx/models.py +35 -10
setup.py +2 -0
shell_vars.sh +1 -1

config.yaml CHANGED Viewed

@@ -3,7 +3,6 @@ defaults:
   - exp: null
 seed: 12345
 train: True
-length: 262144
 sample_rate: 48000
 logs_dir: "./logs"
 log_every_n_steps: 1000
@@ -22,10 +21,9 @@ callbacks:
 datamodule:
   _target_: remfx.datasets.Datamodule
   dataset:
-    _target_: remfx.datasets.GuitarFXDataset
     sample_rate: ${sample_rate}
     root: ${oc.env:DATASET_ROOT}
-    length: ${length}
     chunk_size_in_sec: 6
   val_split: 0.2
   batch_size: 16

   - exp: null
 seed: 12345
 train: True
 sample_rate: 48000
 logs_dir: "./logs"
 log_every_n_steps: 1000
 datamodule:
   _target_: remfx.datasets.Datamodule
   dataset:
+    _target_: remfx.datasets.GuitarSet
     sample_rate: ${sample_rate}
     root: ${oc.env:DATASET_ROOT}
     chunk_size_in_sec: 6
   val_split: 0.2
   batch_size: 16

config_guitfx.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+defaults:
+  - _self_
+  - exp: null
+seed: 12345
+train: True
+sample_rate: 48000
+logs_dir: "./logs"
+log_every_n_steps: 1000
+callbacks:
+  model_checkpoint:
+    _target_: pytorch_lightning.callbacks.ModelCheckpoint
+    monitor: "valid_loss"   # name of the logged metric which determines when model is improving
+    save_top_k: 1           # save k best models (determined by above metric)
+    save_last: True         # additionaly always save model from last epoch
+    mode: "min"             # can be "max" or "min"
+    verbose: False
+    dirpath: ${logs_dir}/ckpts/${now:%Y-%m-%d-%H-%M-%S}
+    filename: '{epoch:02d}-{valid_loss:.3f}'
+datamodule:
+  _target_: remfx.datasets.Datamodule
+  dataset:
+    _target_: remfx.datasets.GuitarFXDataset
+    sample_rate: ${sample_rate}
+    root: ${oc.env:DATASET_ROOT}
+    chunk_size_in_sec: 6
+  val_split: 0.2
+  batch_size: 16
+  num_workers: 8
+  pin_memory: True
+  persistent_workers: True
+logger:
+  _target_: pytorch_lightning.loggers.WandbLogger
+  project: ${oc.env:WANDB_PROJECT}
+  entity: ${oc.env:WANDB_ENTITY}
+  # offline: False  # set True to store all logs only locally
+  job_type: "train"
+  group: ""
+  save_dir: "."
+trainer:
+  _target_: pytorch_lightning.Trainer
+  precision: 32 # Precision used for tensors, default `32`
+  min_epochs: 0
+  max_epochs: -1
+  enable_model_summary: False
+  log_every_n_steps: 1 # Logs metrics every N batches
+  accumulate_grad_batches: 1
+  accelerator: null
+  devices: 1

exp/demucs.yaml CHANGED Viewed

@@ -13,8 +13,11 @@ model:
     audio_channels: 1
     nfft: 4096
     sample_rate: ${sample_rate}
 datamodule:
   dataset:
-    effect_types: ["RAT"]

     audio_channels: 1
     nfft: 4096
     sample_rate: ${sample_rate}
 datamodule:
   dataset:
+    effect_types:
+        Distortion:
+          _target_: remfx.effects.RandomPedalboardDistortion
+          sample_rate: ${sample_rate}
+          min_drive_db: -10
+          max_drive_db: 50

exp/umx.yaml CHANGED Viewed

@@ -16,4 +16,9 @@ model:
     sample_rate: ${sample_rate}
 datamodule:
   dataset:
-    effect_types: ["RAT"]

     sample_rate: ${sample_rate}
 datamodule:
   dataset:
+    effect_types:
+        Distortion:
+          _target_: remfx.effects.RandomPedalboardDistortion
+          sample_rate: ${sample_rate}
+          min_drive_db: -10
+          max_drive_db: 50

remfx/datasets.py CHANGED Viewed

@@ -6,11 +6,30 @@ import torch.nn.functional as F
 from pathlib import Path
 import pytorch_lightning as pl
 from typing import Any, List, Tuple
-# https://zenodo.org/record/7044411/
-LENGTH = 2**18  # 12 seconds
-ORIG_SR = 48000
 class GuitarFXDataset(Dataset):
@@ -18,11 +37,10 @@ class GuitarFXDataset(Dataset):
         self,
         root: str,
         sample_rate: int,
-        length: int = LENGTH,
         chunk_size_in_sec: int = 3,
         effect_types: List[str] = None,
     ):
-        self.length = length
         self.wet_files = []
         self.dry_files = []
         self.chunks = []
@@ -30,6 +48,7 @@ class GuitarFXDataset(Dataset):
         self.song_idx = []
         self.root = Path(root)
         self.chunk_size_in_sec = chunk_size_in_sec
         if effect_types is None:
             effect_types = [
@@ -46,7 +65,7 @@ class GuitarFXDataset(Dataset):
                 self.dry_files += dry_files
                 self.labels += [i] * len(wet_files)
                 for audio_file in wet_files:
-                    chunk_starts = create_sequential_chunks(
                         audio_file, self.chunk_size_in_sec
                     )
                     self.chunks += chunk_starts
@@ -56,7 +75,7 @@ class GuitarFXDataset(Dataset):
             f"Found {len(self.wet_files)} wet files and {len(self.dry_files)} dry files.\n"
             f"Total chunks: {len(self.chunks)}"
         )
-        self.resampler = T.Resample(ORIG_SR, sample_rate)
     def __len__(self):
         return len(self.chunks)
@@ -75,20 +94,91 @@ class GuitarFXDataset(Dataset):
         resampled_x = self.resampler(x)
         resampled_y = self.resampler(y)
-        # Pad to length if needed
-        if resampled_x.shape[-1] < self.length:
-            resampled_x = F.pad(resampled_x, (0, self.length - resampled_x.shape[1]))
-        if resampled_y.shape[-1] < self.length:
-            resampled_y = F.pad(resampled_y, (0, self.length - resampled_y.shape[1]))
         return (resampled_x, resampled_y, effect_label)
 def create_random_chunks(
     audio_file: str, chunk_size: int, num_chunks: int
-) -> List[Tuple[int, int]]:
     """Create num_chunks random chunks of size chunk_size (seconds)
     from an audio file.
-    Return sample_index of start of each chunk
     """
     audio, sr = torchaudio.load(audio_file)
     chunk_size_in_samples = chunk_size * sr
@@ -98,17 +188,19 @@ def create_random_chunks(
     for i in range(num_chunks):
         start = torch.randint(0, audio.shape[-1] - chunk_size_in_samples, (1,)).item()
         chunks.append(start)
-    return chunks
-def create_sequential_chunks(audio_file: str, chunk_size: int) -> List[Tuple[int, int]]:
     """Create sequential chunks of size chunk_size (seconds) from an audio file.
-    Return sample_index of start of each chunk
     """
     audio, sr = torchaudio.load(audio_file)
     chunk_size_in_samples = chunk_size * sr
     chunk_starts = torch.arange(0, audio.shape[-1], chunk_size_in_samples)
-    return chunk_starts
 class Datamodule(pl.LightningDataModule):
@@ -133,11 +225,12 @@ class Datamodule(pl.LightningDataModule):
     def setup(self, stage: Any = None) -> None:
         split = [1.0 - self.val_split, self.val_split]
-        train_size = int(split[0] * len(self.dataset))
-        val_size = int(split[1] * len(self.dataset))
         self.data_train, self.data_val = random_split(
             self.dataset, [train_size, val_size]
         )
     def train_dataloader(self) -> DataLoader:
         return DataLoader(

 from pathlib import Path
 import pytorch_lightning as pl
 from typing import Any, List, Tuple
+from remfx import effects
+from pedalboard import (
+    Pedalboard,
+    Chorus,
+    Reverb,
+    Compressor,
+    Phaser,
+    Delay,
+    Distortion,
+    Limiter,
+)
+# https://zenodo.org/record/7044411/ -> GuitarFX
+# https://zenodo.org/record/3371780  -> GuitarSet
+deterministic_effects = {
+    "Distortion": Pedalboard([Distortion()]),
+    "Compressor": Pedalboard([Compressor()]),
+    "Chorus": Pedalboard([Chorus()]),
+    "Phaser": Pedalboard([Phaser()]),
+    "Delay": Pedalboard([Delay()]),
+    "Reverb": Pedalboard([Reverb()]),
+    "Limiter": Pedalboard([Limiter()]),
+}
 class GuitarFXDataset(Dataset):
         self,
         root: str,
         sample_rate: int,
         chunk_size_in_sec: int = 3,
         effect_types: List[str] = None,
     ):
+        super().__init__()
         self.wet_files = []
         self.dry_files = []
         self.chunks = []
         self.song_idx = []
         self.root = Path(root)
         self.chunk_size_in_sec = chunk_size_in_sec
+        self.sample_rate = sample_rate
         if effect_types is None:
             effect_types = [
                 self.dry_files += dry_files
                 self.labels += [i] * len(wet_files)
                 for audio_file in wet_files:
+                    chunk_starts, orig_sr = create_sequential_chunks(
                         audio_file, self.chunk_size_in_sec
                     )
                     self.chunks += chunk_starts
             f"Found {len(self.wet_files)} wet files and {len(self.dry_files)} dry files.\n"
             f"Total chunks: {len(self.chunks)}"
         )
+        self.resampler = T.Resample(orig_sr, sample_rate)
     def __len__(self):
         return len(self.chunks)
         resampled_x = self.resampler(x)
         resampled_y = self.resampler(y)
+        # Reset chunk size to be new sample rate
+        chunk_size_in_samples = self.chunk_size_in_sec * self.sample_rate
+        # Pad to chunk_size if needed
+        if resampled_x.shape[-1] < chunk_size_in_samples:
+            resampled_x = F.pad(
+                resampled_x, (0, chunk_size_in_samples - resampled_x.shape[1])
+            )
+        if resampled_y.shape[-1] < chunk_size_in_samples:
+            resampled_y = F.pad(
+                resampled_y, (0, chunk_size_in_samples - resampled_y.shape[1])
+            )
         return (resampled_x, resampled_y, effect_label)
+class GuitarSet(Dataset):
+    def __init__(
+        self,
+        root: str,
+        sample_rate: int,
+        chunk_size_in_sec: int = 3,
+        effect_types: List[torch.nn.Module] = None,
+    ):
+        super().__init__()
+        self.chunks = []
+        self.song_idx = []
+        self.root = Path(root)
+        self.chunk_size_in_sec = chunk_size_in_sec
+        self.files = sorted(list(self.root.glob("./**/*.wav")))
+        self.sample_rate = sample_rate
+        for i, audio_file in enumerate(self.files):
+            chunk_starts, orig_sr = create_sequential_chunks(
+                audio_file, self.chunk_size_in_sec
+            )
+            self.chunks += chunk_starts
+            self.song_idx += [i] * len(chunk_starts)
+        print(f"Found {len(self.files)} files .\n" f"Total chunks: {len(self.chunks)}")
+        self.resampler = T.Resample(orig_sr, sample_rate)
+        self.effect_types = effect_types
+        self.normalize = effects.LoudnessNormalize(sample_rate, target_lufs_db=-20)
+        self.mode = "train"
+    def __len__(self):
+        return len(self.chunks)
+    def __getitem__(self, idx):
+        # Load and effect audio
+        song_idx = self.song_idx[idx]
+        x, sr = torchaudio.load(self.files[song_idx])
+        chunk_start = self.chunks[idx]
+        chunk_size_in_samples = self.chunk_size_in_sec * sr
+        x = x[:, chunk_start : chunk_start + chunk_size_in_samples]
+        resampled_x = self.resampler(x)
+        # Reset chunk size to be new sample rate
+        chunk_size_in_samples = self.chunk_size_in_sec * self.sample_rate
+        # Pad to chunk_size if needed
+        if resampled_x.shape[-1] < chunk_size_in_samples:
+            resampled_x = F.pad(
+                resampled_x, (0, chunk_size_in_samples - resampled_x.shape[1])
+            )
+        # Add random effect if train
+        if self.mode == "train":
+            random_effect_idx = torch.rand(1).item() * len(self.effect_types.keys())
+            effect_name = list(self.effect_types.keys())[int(random_effect_idx)]
+            effect = self.effect_types[effect_name]
+            effected_input = effect(resampled_x)
+        else:
+            # deterministic static effect for eval
+            effect_idx = idx % len(self.effect_types.keys())
+            effect_name = list(self.effect_types.keys())[effect_idx]
+            effect = deterministic_effects[effect_name]
+            effected_input = torch.from_numpy(
+                effect(resampled_x.numpy(), self.sample_rate)
+            )
+        normalized_input = self.normalize(effected_input)
+        normalized_target = self.normalize(resampled_x)
+        return (normalized_input, normalized_target, effect_name)
 def create_random_chunks(
     audio_file: str, chunk_size: int, num_chunks: int
+) -> Tuple[List[Tuple[int, int]], int]:
     """Create num_chunks random chunks of size chunk_size (seconds)
     from an audio file.
+    Return sample_index of start of each chunk and original sr
     """
     audio, sr = torchaudio.load(audio_file)
     chunk_size_in_samples = chunk_size * sr
     for i in range(num_chunks):
         start = torch.randint(0, audio.shape[-1] - chunk_size_in_samples, (1,)).item()
         chunks.append(start)
+    return chunks, sr
+def create_sequential_chunks(
+    audio_file: str, chunk_size: int
+) -> Tuple[List[Tuple[int, int]], int]:
     """Create sequential chunks of size chunk_size (seconds) from an audio file.
+    Return sample_index of start of each chunk and original sr
     """
     audio, sr = torchaudio.load(audio_file)
     chunk_size_in_samples = chunk_size * sr
     chunk_starts = torch.arange(0, audio.shape[-1], chunk_size_in_samples)
+    return chunk_starts, sr
 class Datamodule(pl.LightningDataModule):
     def setup(self, stage: Any = None) -> None:
         split = [1.0 - self.val_split, self.val_split]
+        train_size = round(split[0] * len(self.dataset))
+        val_size = round(split[1] * len(self.dataset))
         self.data_train, self.data_val = random_split(
             self.dataset, [train_size, val_size]
         )
+        self.data_val.dataset.mode = "val"
     def train_dataloader(self) -> DataLoader:
         return DataLoader(

remfx/effects.py ADDED Viewed

	@@ -0,0 +1,698 @@

+import torch
+import torchaudio
+import numpy as np
+import scipy.signal
+import scipy.stats
+import pyloudnorm as pyln
+from torchvision.transforms import Compose, RandomApply
+from typing import List
+from pedalboard import (
+    Pedalboard,
+    Chorus,
+    Reverb,
+    Compressor,
+    Phaser,
+    Delay,
+    Distortion,
+    Limiter,
+)
+__all__ = []
+def loguniform(low=0, high=1):
+    return scipy.stats.loguniform.rvs(low, high)
+def rand(low=0, high=1):
+    return (torch.rand(1).numpy()[0] * (high - low)) + low
+def randint(low=0, high=1):
+    return torch.randint(low, high + 1, (1,)).numpy()[0]
+def biqaud(
+    gain_db: float,
+    cutoff_freq: float,
+    q_factor: float,
+    sample_rate: float,
+    filter_type: str,
+):
+    """Use design parameters to generate coeffieicnets for a specific filter type.
+    Args:
+        gain_db (float): Shelving filter gain in dB.
+        cutoff_freq (float): Cutoff frequency in Hz.
+        q_factor (float): Q factor.
+        sample_rate (float): Sample rate in Hz.
+        filter_type (str): Filter type.
+            One of "low_shelf", "high_shelf", or "peaking"
+    Returns:
+        b (np.ndarray): Numerator filter coefficients stored as [b0, b1, b2]
+        a (np.ndarray): Denominator filter coefficients stored as [a0, a1, a2]
+    """
+    A = 10 ** (gain_db / 40.0)
+    w0 = 2.0 * np.pi * (cutoff_freq / sample_rate)
+    alpha = np.sin(w0) / (2.0 * q_factor)
+    cos_w0 = np.cos(w0)
+    sqrt_A = np.sqrt(A)
+    if filter_type == "high_shelf":
+        b0 = A * ((A + 1) + (A - 1) * cos_w0 + 2 * sqrt_A * alpha)
+        b1 = -2 * A * ((A - 1) + (A + 1) * cos_w0)
+        b2 = A * ((A + 1) + (A - 1) * cos_w0 - 2 * sqrt_A * alpha)
+        a0 = (A + 1) - (A - 1) * cos_w0 + 2 * sqrt_A * alpha
+        a1 = 2 * ((A - 1) - (A + 1) * cos_w0)
+        a2 = (A + 1) - (A - 1) * cos_w0 - 2 * sqrt_A * alpha
+    elif filter_type == "low_shelf":
+        b0 = A * ((A + 1) - (A - 1) * cos_w0 + 2 * sqrt_A * alpha)
+        b1 = 2 * A * ((A - 1) - (A + 1) * cos_w0)
+        b2 = A * ((A + 1) - (A - 1) * cos_w0 - 2 * sqrt_A * alpha)
+        a0 = (A + 1) + (A - 1) * cos_w0 + 2 * sqrt_A * alpha
+        a1 = -2 * ((A - 1) + (A + 1) * cos_w0)
+        a2 = (A + 1) + (A - 1) * cos_w0 - 2 * sqrt_A * alpha
+    elif filter_type == "peaking":
+        b0 = 1 + alpha * A
+        b1 = -2 * cos_w0
+        b2 = 1 - alpha * A
+        a0 = 1 + alpha / A
+        a1 = -2 * cos_w0
+        a2 = 1 - alpha / A
+    else:
+        pass
+        # raise ValueError(f"Invalid filter_type: {filter_type}.")
+    b = np.array([b0, b1, b2]) / a0
+    a = np.array([a0, a1, a2]) / a0
+    return b, a
+def parametric_eq(
+    x: np.ndarray,
+    sample_rate: float,
+    low_shelf_gain_db: float = 0.0,
+    low_shelf_cutoff_freq: float = 80.0,
+    low_shelf_q_factor: float = 0.707,
+    band_gains_db: List[float] = [0.0],
+    band_cutoff_freqs: List[float] = [300.0],
+    band_q_factors: List[float] = [0.707],
+    high_shelf_gain_db: float = 0.0,
+    high_shelf_cutoff_freq: float = 1000.0,
+    high_shelf_q_factor: float = 0.707,
+    dtype=np.float32,
+):
+    """Multiband parametric EQ.
+    Low-shelf -> Band 1 -> ... -> Band N -> High-shelf
+    Args:
+    """
+    assert (
+        len(band_gains_db) == len(band_cutoff_freqs) == len(band_q_factors)
+    )  # must define for all bands
+    # -------- apply low-shelf filter --------
+    b, a = biqaud(
+        low_shelf_gain_db,
+        low_shelf_cutoff_freq,
+        low_shelf_q_factor,
+        sample_rate,
+        "low_shelf",
+    )
+    x = scipy.signal.lfilter(b, a, x)
+    # -------- apply peaking filters --------
+    for gain_db, cutoff_freq, q_factor in zip(
+        band_gains_db, band_cutoff_freqs, band_q_factors
+    ):
+        b, a = biqaud(
+            gain_db,
+            cutoff_freq,
+            q_factor,
+            sample_rate,
+            "peaking",
+        )
+        x = scipy.signal.lfilter(b, a, x)
+    # -------- apply high-shelf filter --------
+    b, a = biqaud(
+        high_shelf_gain_db,
+        high_shelf_cutoff_freq,
+        high_shelf_q_factor,
+        sample_rate,
+        "high_shelf",
+    )
+    sos5 = np.concatenate((b, a))
+    x = scipy.signal.lfilter(b, a, x)
+    return x.astype(dtype)
+class RandomParametricEQ(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        num_bands: int = 3,
+        min_gain_db: float = -6.0,
+        max_gain_db: float = +6.0,
+        min_cutoff_freq: float = 1000.0,
+        max_cutoff_freq: float = 10000.0,
+        min_q_factor: float = 0.1,
+        max_q_factor: float = 4.0,
+    ):
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.num_bands = num_bands
+        self.min_gain_db = min_gain_db
+        self.max_gain_db = max_gain_db
+        self.min_cutoff_freq = min_cutoff_freq
+        self.max_cutoff_freq = max_cutoff_freq
+        self.min_q_factor = min_q_factor
+        self.max_q_factor = max_q_factor
+    def forward(self, x: torch.Tensor):
+        """
+        Args:
+            x: (torch.Tensor): Array of audio samples with shape (chs, seq_leq).
+                The filter will be applied the final dimension, and by default the same
+                filter will be applied to all channels.
+        """
+        low_shelf_gain_db = rand(self.min_gain_db, self.max_gain_db)
+        low_shelf_cutoff_freq = loguniform(20.0, 200.0)
+        low_shelf_q_factor = rand(self.min_q_factor, self.max_q_factor)
+        high_shelf_gain_db = rand(self.min_gain_db, self.max_gain_db)
+        high_shelf_cutoff_freq = loguniform(8000.0, 16000.0)
+        high_shelf_q_factor = rand(self.min_q_factor, self.max_q_factor)
+        band_gain_dbs = []
+        band_cutoff_freqs = []
+        band_q_factors = []
+        for _ in range(self.num_bands):
+            band_gain_dbs.append(rand(self.min_gain_db, self.max_gain_db))
+            band_cutoff_freqs.append(
+                loguniform(self.min_cutoff_freq, self.max_cutoff_freq)
+            )
+            band_q_factors.append(rand(self.min_q_factor, self.max_q_factor))
+        y = parametric_eq(
+            x.numpy(),
+            self.sample_rate,
+            low_shelf_gain_db=low_shelf_gain_db,
+            low_shelf_cutoff_freq=low_shelf_cutoff_freq,
+            low_shelf_q_factor=low_shelf_q_factor,
+            band_gains_db=band_gain_dbs,
+            band_cutoff_freqs=band_cutoff_freqs,
+            band_q_factors=band_q_factors,
+            high_shelf_gain_db=high_shelf_gain_db,
+            high_shelf_cutoff_freq=high_shelf_cutoff_freq,
+            high_shelf_q_factor=high_shelf_q_factor,
+        )
+        return torch.from_numpy(y)
+def stereo_widener(x: torch.Tensor, width: torch.Tensor):
+    sqrt2 = np.sqrt(2)
+    left = x[0, ...]
+    right = x[1, ...]
+    mid = (left + right) / sqrt2
+    side = (left - right) / sqrt2
+    # amplify mid and side signal seperately:
+    mid *= 2 * (1 - width)
+    side *= 2 * width
+    left = (mid + side) / sqrt2
+    right = (mid - side) / sqrt2
+    x = torch.stack((left, right), dim=0)
+    return x
+class RandomStereoWidener(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        min_width: float = 0.0,
+        max_width: float = 1.0,
+    ) -> None:
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.min_width = min_width
+        self.max_width = max_width
+    def forward(self, x: torch.Tensor):
+        width = rand(self.min_width, self.max_width)
+        return stereo_widener(x, width)
+class RandomVolumeAutomation(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        min_segments: int = 1,
+        max_segments: int = 3,
+        min_gain_db: float = -6.0,
+        max_gain_db: float = 6.0,
+    ) -> None:
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.min_segments = min_segments
+        self.max_segments = max_segments
+        self.min_gain_db = min_gain_db
+        self.max_gain_db = max_gain_db
+    def forward(self, x: torch.Tensor):
+        gain_db = torch.zeros(x.shape[-1]).type_as(x)
+        num_segments = randint(self.min_segments, self.max_segments)
+        segment_lengths = (
+            x.shape[-1]
+            * np.random.dirichlet([rand(0, 10) for _ in range(num_segments)], 1)
+        ).astype("int")[0]
+        samples_filled = 0
+        start_gain_db = 0
+        for idx in range(num_segments):
+            segment_samples = segment_lengths[idx]
+            if idx != 0:
+                start_gain_db = end_gain_db
+            # sample random end gain
+            end_gain_db = rand(self.min_gain_db, self.max_gain_db)
+            fade = torch.linspace(start_gain_db, end_gain_db, steps=segment_samples)
+            gain_db[samples_filled : samples_filled + segment_samples] = fade
+            samples_filled = samples_filled + segment_samples
+        # print(gain_db)
+        x *= 10 ** (gain_db / 20.0)
+        return x
+class RandomPedalboardCompressor(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        min_threshold_db: float = -42.0,
+        max_threshold_db: float = -6.0,
+        min_ratio: float = 1.5,
+        max_ratio: float = 4.0,
+        min_attack_ms: float = 1.0,
+        max_attack_ms: float = 50.0,
+        min_release_ms: float = 10.0,
+        max_release_ms: float = 250.0,
+    ) -> None:
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.min_threshold_db = min_threshold_db
+        self.max_threshold_db = max_threshold_db
+        self.min_ratio = min_ratio
+        self.max_ratio = max_ratio
+        self.min_attack_ms = min_attack_ms
+        self.max_attack_ms = max_attack_ms
+        self.min_release_ms = min_release_ms
+        self.max_release_ms = max_release_ms
+    def forward(self, x: torch.Tensor):
+        board = Pedalboard()
+        threshold_db = rand(self.min_threshold_db, self.max_threshold_db)
+        ratio = rand(self.min_ratio, self.max_ratio)
+        attack_ms = rand(self.min_attack_ms, self.max_attack_ms)
+        release_ms = rand(self.min_release_ms, self.max_release_ms)
+        board.append(
+            Compressor(
+                threshold_db=threshold_db,
+                ratio=ratio,
+                attack_ms=attack_ms,
+                release_ms=release_ms,
+            )
+        )
+        # process audio using the pedalboard
+        return torch.from_numpy(board(x.numpy(), self.sample_rate))
+class RandomPedalboardDelay(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        min_delay_seconds: float = 0.1,
+        max_delay_sconds: float = 1.0,
+        min_feedback: float = 0.05,
+        max_feedback: float = 0.6,
+        min_mix: float = 0.0,
+        max_mix: float = 0.7,
+    ) -> None:
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.min_delay_seconds = min_delay_seconds
+        self.max_delay_seconds = max_delay_sconds
+        self.min_feedback = min_feedback
+        self.max_feedback = max_feedback
+        self.min_mix = min_mix
+        self.max_mix = max_mix
+    def forward(self, x: torch.Tensor):
+        board = Pedalboard()
+        delay_seconds = loguniform(self.min_delay_seconds, self.max_delay_seconds)
+        feedback = rand(self.min_feedback, self.max_feedback)
+        mix = rand(self.min_mix, self.max_mix)
+        board.append(Delay(delay_seconds=delay_seconds, feedback=feedback, mix=mix))
+        return torch.from_numpy(board(x.numpy(), self.sample_rate))
+class RandomPedalboardChorus(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        min_rate_hz: float = 0.25,
+        max_rate_hz: float = 4.0,
+        min_depth: float = 0.0,
+        max_depth: float = 0.6,
+        min_centre_delay_ms: float = 5.0,
+        max_centre_delay_ms: float = 10.0,
+        min_feedback: float = 0.1,
+        max_feedback: float = 0.6,
+        min_mix: float = 0.1,
+        max_mix: float = 0.7,
+    ) -> None:
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.min_rate_hz = min_rate_hz
+        self.max_rate_hz = max_rate_hz
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+        self.min_centre_delay_ms = min_centre_delay_ms
+        self.max_centre_delay_ms = max_centre_delay_ms
+        self.min_feedback = min_feedback
+        self.max_feedback = max_feedback
+        self.min_mix = min_mix
+        self.max_mix = max_mix
+    def forward(self, x: torch.Tensor):
+        board = Pedalboard()
+        rate_hz = rand(self.min_rate_hz, self.max_rate_hz)
+        depth = rand(self.min_depth, self.max_depth)
+        centre_delay_ms = rand(self.min_centre_delay_ms, self.max_centre_delay_ms)
+        feedback = rand(self.min_feedback, self.max_feedback)
+        mix = rand(self.min_mix, self.max_mix)
+        board.append(
+            Chorus(
+                rate_hz=rate_hz,
+                depth=depth,
+                centre_delay_ms=centre_delay_ms,
+                feedback=feedback,
+                mix=mix,
+            )
+        )
+        # process audio using the pedalboard
+        return torch.from_numpy(board(x.numpy(), self.sample_rate))
+class RandomPedalboardPhaser(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        min_rate_hz: float = 0.25,
+        max_rate_hz: float = 5.0,
+        min_depth: float = 0.1,
+        max_depth: float = 0.6,
+        min_centre_frequency_hz: float = 200.0,
+        max_centre_frequency_hz: float = 600.0,
+        min_feedback: float = 0.1,
+        max_feedback: float = 0.6,
+        min_mix: float = 0.1,
+        max_mix: float = 0.7,
+    ) -> None:
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.min_rate_hz = min_rate_hz
+        self.max_rate_hz = max_rate_hz
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+        self.min_centre_frequency_hz = min_centre_frequency_hz
+        self.max_centre_frequency_hz = max_centre_frequency_hz
+        self.min_feedback = min_feedback
+        self.max_feedback = max_feedback
+        self.min_mix = min_mix
+        self.max_mix = max_mix
+    def forward(self, x: torch.Tensor):
+        board = Pedalboard()
+        rate_hz = rand(self.min_rate_hz, self.max_rate_hz)
+        depth = rand(self.min_depth, self.max_depth)
+        centre_frequency_hz = rand(
+            self.min_centre_frequency_hz, self.min_centre_frequency_hz
+        )
+        feedback = rand(self.min_feedback, self.max_feedback)
+        mix = rand(self.min_mix, self.max_mix)
+        board.append(
+            Phaser(
+                rate_hz=rate_hz,
+                depth=depth,
+                centre_frequency_hz=centre_frequency_hz,
+                feedback=feedback,
+                mix=mix,
+            )
+        )
+        # process audio using the pedalboard
+        return torch.from_numpy(board(x.numpy(), self.sample_rate))
+class RandomPedalboardLimiter(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        min_threshold_db: float = -32.0,
+        max_threshold_db: float = -6.0,
+        min_release_ms: float = 10.0,
+        max_release_ms: float = 300.0,
+    ) -> None:
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.min_threshold_db = min_threshold_db
+        self.max_threshold_db = max_threshold_db
+        self.min_release_ms = min_release_ms
+        self.max_release_ms = max_release_ms
+    def forward(self, x: torch.Tensor):
+        board = Pedalboard()
+        threshold_db = rand(self.min_threshold_db, self.max_threshold_db)
+        release_ms = rand(self.min_release_ms, self.max_release_ms)
+        board.append(
+            Limiter(
+                threshold_db=threshold_db,
+                release_ms=release_ms,
+            )
+        )
+        return torch.from_numpy(board(x.numpy(), self.sample_rate))
+class RandomPedalboardDistortion(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        min_drive_db: float = -20.0,
+        max_drive_db: float = 12.0,
+    ):
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.min_drive_db = min_drive_db
+        self.max_drive_db = max_drive_db
+    def forward(self, x: torch.Tensor):
+        board = Pedalboard()
+        drive_db = rand(self.min_drive_db, self.max_drive_db)
+        board.append(Distortion(drive_db=drive_db))
+        return torch.from_numpy(board(x.numpy(), self.sample_rate))
+class RandomSoxReverb(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        min_reverberance: float = 10.0,
+        max_reverberance: float = 100.0,
+        min_high_freq_damping: float = 0.0,
+        max_high_freq_damping: float = 100.0,
+        min_wet_dry: float = 0.0,
+        max_wet_dry: float = 1.0,
+        min_room_scale: float = 5.0,
+        max_room_scale: float = 100.0,
+        min_stereo_depth: float = 20.0,
+        max_stereo_depth: float = 100.0,
+        min_pre_delay: float = 0.0,
+        max_pre_delay: float = 100.0,
+    ) -> None:
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.min_reverberance = min_reverberance
+        self.max_reverberance = max_reverberance
+        self.min_high_freq_damping = min_high_freq_damping
+        self.max_high_freq_damping = max_high_freq_damping
+        self.min_wet_dry = min_wet_dry
+        self.max_wet_dry = max_wet_dry
+        self.min_room_scale = min_room_scale
+        self.max_room_scale = max_room_scale
+        self.min_stereo_depth = min_stereo_depth
+        self.max_stereo_depth = max_stereo_depth
+        self.min_pre_delay = min_pre_delay
+        self.max_pre_delay = max_pre_delay
+    def forward(self, x: torch.Tensor):
+        reverberance = rand(self.min_reverberance, self.max_reverberance)
+        high_freq_damping = rand(self.min_high_freq_damping, self.max_high_freq_damping)
+        room_scale = rand(self.min_room_scale, self.max_room_scale)
+        stereo_depth = rand(self.min_stereo_depth, self.max_stereo_depth)
+        wet_dry = rand(self.min_wet_dry, self.max_wet_dry)
+        pre_delay = rand(self.min_pre_delay, self.max_pre_delay)
+        effects = [
+            [
+                "reverb",
+                f"{reverberance}",
+                f"{high_freq_damping}",
+                f"{room_scale}",
+                f"{stereo_depth}",
+                f"{pre_delay}",
+                "--wet-only",
+            ]
+        ]
+        y, _ = torchaudio.sox_effects.apply_effects_tensor(
+            x, self.sample_rate, effects, channels_first=True
+        )
+        # manual wet/dry mix
+        return (x * (1 - wet_dry)) + (y * wet_dry)
+class RandomPebalboardReverb(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        min_room_size: float = 0.0,
+        max_room_size: float = 1.0,
+        min_damping: float = 0.0,
+        max_damping: float = 1.0,
+        min_wet_dry: float = 0.0,
+        max_wet_dry: float = 0.7,
+        min_width: float = 0.0,
+        max_width: float = 1.0,
+    ) -> None:
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.min_room_size = min_room_size
+        self.max_room_size = max_room_size
+        self.min_damping = min_damping
+        self.max_damping = max_damping
+        self.min_wet_dry = min_wet_dry
+        self.max_wet_dry = max_wet_dry
+        self.min_width = min_width
+        self.max_width = max_width
+    def forward(self, x: torch.Tensor):
+        board = Pedalboard()
+        room_size = rand(self.min_room_size, self.max_room_size)
+        damping = rand(self.min_damping, self.max_damping)
+        wet_dry = rand(self.min_wet_dry, self.max_wet_dry)
+        width = rand(self.min_width, self.max_width)
+        board.append(
+            Reverb(
+                room_size=room_size,
+                damping=damping,
+                wet_level=wet_dry,
+                dry_level=(1 - wet_dry),
+                width=width,
+            )
+        )
+        return torch.from_numpy(board(x.numpy(), self.sample_rate))
+class LoudnessNormalize(torch.nn.Module):
+    def __init__(self, sample_rate: float, target_lufs_db: float = -32.0) -> None:
+        super().__init__()
+        self.meter = pyln.Meter(sample_rate)
+        self.target_lufs_db = target_lufs_db
+    def forward(self, x: torch.Tensor):
+        x_lufs_db = self.meter.integrated_loudness(x.permute(1, 0).numpy())
+        delta_lufs_db = torch.tensor([self.target_lufs_db - x_lufs_db]).float()
+        gain_lin = 10.0 ** (delta_lufs_db.clamp(-120, 40.0) / 20.0)
+        return gain_lin * x
+class RandomAudioEffectsChannel(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate: float,
+        parametric_eq_prob: float = 0.7,
+        distortion_prob: float = 0.01,
+        delay_prob: float = 0.1,
+        chorus_prob: float = 0.01,
+        phaser_prob: float = 0.01,
+        compressor_prob: float = 0.4,
+        reverb_prob: float = 0.2,
+        stereo_widener_prob: float = 0.3,
+        limiter_prob: float = 0.3,
+        vol_automation_prob: float = 0.7,
+        target_lufs_db: float = -32.0,
+    ) -> None:
+        super().__init__()
+        self.transforms = Compose(
+            [
+                RandomApply(
+                    [RandomParametricEQ(sample_rate)],
+                    p=parametric_eq_prob,
+                ),
+                RandomApply(
+                    [RandomPedalboardDistortion(sample_rate)],
+                    p=distortion_prob,
+                ),
+                RandomApply(
+                    [RandomPedalboardDelay(sample_rate)],
+                    p=delay_prob,
+                ),
+                RandomApply(
+                    [RandomPedalboardChorus(sample_rate)],
+                    p=chorus_prob,
+                ),
+                RandomApply(
+                    [RandomPedalboardPhaser(sample_rate)],
+                    p=phaser_prob,
+                ),
+                RandomApply(
+                    [RandomPedalboardCompressor(sample_rate)],
+                    p=compressor_prob,
+                ),
+                RandomApply(
+                    [RandomPebalboardReverb(sample_rate)],
+                    p=reverb_prob,
+                ),
+                RandomApply(
+                    [RandomStereoWidener(sample_rate)],
+                    p=stereo_widener_prob,
+                ),
+                RandomApply(
+                    [RandomPedalboardLimiter(sample_rate)],
+                    p=limiter_prob,
+                ),
+                RandomApply(
+                    [RandomVolumeAutomation(sample_rate)],
+                    p=vol_automation_prob,
+                ),
+                LoudnessNormalize(sample_rate, target_lufs_db=target_lufs_db),
+            ]
+        )
+    def forward(self, x: torch.Tensor):
+        return self.transforms(x)

remfx/models.py CHANGED Viewed

@@ -39,7 +39,8 @@ class RemFXModel(pl.LightningModule):
             }
         )
         # Log first batch metrics input vs output only once
-        self.log_first = True
     @property
     def device(self):
@@ -87,8 +88,35 @@ class RemFXModel(pl.LightningModule):
         return loss
     def on_train_batch_start(self, batch, batch_idx):
-        if self.log_first:
             x, target, label = batch
             for metric in self.metrics:
                 # SISDR returns negative values, so negate them
                 if metric == "SISDR":
@@ -104,20 +132,17 @@ class RemFXModel(pl.LightningModule):
                     prog_bar=True,
                     sync_dist=True,
                 )
-            self.log_first = False
-    def on_validation_epoch_start(self):
-        self.log_next = True
-    def on_validation_batch_start(self, batch, batch_idx, dataloader_idx):
-        if self.log_next:
-            x, target, label = batch
             self.model.eval()
             with torch.no_grad():
                 y = self.model.sample(x)
             # Concat samples together for easier viewing in dashboard
-            concat_samples = torch.cat([y, x, target], dim=-1)
             log_wandb_audio_batch(
                 logger=self.logger,
                 id="prediction_input_target",

             }
         )
         # Log first batch metrics input vs output only once
+        self.log_first_metrics = True
+        self.log_train_audio = True
     @property
     def device(self):
         return loss
     def on_train_batch_start(self, batch, batch_idx):
+        if self.log_train_audio:
+            x, y, label = batch
+            # Concat samples together for easier viewing in dashboard
+            input_samples = rearrange(x, "b c t -> c (b t)").unsqueeze(0)
+            target_samples = rearrange(y, "b c t -> c (b t)").unsqueeze(0)
+            log_wandb_audio_batch(
+                logger=self.logger,
+                id="input_effected_audio",
+                samples=input_samples.cpu(),
+                sampling_rate=self.sample_rate,
+                caption="Training Data",
+            )
+            log_wandb_audio_batch(
+                logger=self.logger,
+                id="target_audio",
+                samples=target_samples.cpu(),
+                sampling_rate=self.sample_rate,
+                caption="Target Data",
+            )
+            self.log_train_audio = False
+    def on_validation_epoch_start(self):
+        self.log_next = True
+    def on_validation_batch_start(self, batch, batch_idx, dataloader_idx):
+        if self.log_next:
             x, target, label = batch
+            # Log Input Metrics
             for metric in self.metrics:
                 # SISDR returns negative values, so negate them
                 if metric == "SISDR":
                     prog_bar=True,
                     sync_dist=True,
                 )
             self.model.eval()
             with torch.no_grad():
                 y = self.model.sample(x)
             # Concat samples together for easier viewing in dashboard
+            # 2 seconds of silence between each sample
+            silence = torch.zeros_like(x)
+            silence = silence[:, : self.sample_rate * 2]
+            concat_samples = torch.cat([y, silence, x, silence, target], dim=-1)
             log_wandb_audio_batch(
                 logger=self.logger,
                 id="prediction_input_target",

setup.py CHANGED Viewed

@@ -44,6 +44,8 @@ setup(
         "librosa",
         "hydra-core",
         "auraloss",
     ],
     include_package_data=True,
     license="Apache License 2.0",

         "librosa",
         "hydra-core",
         "auraloss",
+        "pyloudnorm",
+        "pedalboard",
     ],
     include_package_data=True,
     license="Apache License 2.0",

shell_vars.sh CHANGED Viewed

@@ -1,3 +1,3 @@
-export DATASET_ROOT="./data/egfx"
 export WANDB_PROJECT="RemFX"
 export WANDB_ENTITY="mattricesound"

+export DATASET_ROOT="./data/GuitarSet"
 export WANDB_PROJECT="RemFX"
 export WANDB_ENTITY="mattricesound"