RemFx

Sleeping

App Files Files Community

mattricesound commited on Mar 13, 2023

Commit

e4fc05d

2 Parent(s): 35ce5ba 6448f47

Merge pull request #29 from mhrice/cjs--classifier-v2

Browse files

Files changed (6) hide show

README.md +1 -2
cfg/config.yaml +1 -0
cfg/model/classifier.yaml +14 -0
remfx/datasets.py +71 -26
remfx/effects.py +3 -3
remfx/models.py +229 -13

README.md CHANGED Viewed

@@ -9,10 +9,9 @@
 5. `pip install -e umx`
 ## Download [VocalSet Dataset](https://zenodo.org/record/1193957)
-1. `wget https://zenodo.org/record/1193957/files/VocalSet.zip?download=1`
 2. `mv VocalSet.zip?download=1 VocalSet.zip`
 3. `unzip VocalSet.zip`
-4. Manually split singers into train, val, test directories
 # Training
 ## Steps

 5. `pip install -e umx`
 ## Download [VocalSet Dataset](https://zenodo.org/record/1193957)
+1. `wget https://zenodo.org/record/1442513/files/VocalSet1-2.zip?download=1`
 2. `mv VocalSet.zip?download=1 VocalSet.zip`
 3. `unzip VocalSet.zip`
 # Training
 ## Steps

cfg/config.yaml CHANGED Viewed

@@ -16,6 +16,7 @@ max_kept_effects: -1
 max_removed_effects: -1
 shuffle_kept_effects: True
 shuffle_removed_effects: False
 effects_to_use:
   - compressor
   - distortion

 max_removed_effects: -1
 shuffle_kept_effects: True
 shuffle_removed_effects: False
+num_classes: 4
 effects_to_use:
   - compressor
   - distortion

cfg/model/classifier.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+# @package _global_
+model:
+  _target_: remfx.models.FXClassifier
+  lr: 1e-4
+  lr_weight_decay: 1e-3
+  sample_rate: ${sample_rate}
+  network:
+    _target_: remfx.models.Cnn14
+    num_classes: ${num_classes}
+    n_fft: 4096
+    hop_length: 512
+    n_mels: 128
+    sample_rate: ${sample_rate}

remfx/datasets.py CHANGED Viewed

@@ -1,16 +1,19 @@
 import torch
-from torch.utils.data import Dataset, DataLoader
-import torch.nn.functional as F
 import torchaudio
-from pathlib import Path
 import pytorch_lightning as pl
-import sys
-from typing import Any, List, Dict
-from remfx import effects
 from tqdm import tqdm
-from remfx.utils import create_sequential_chunks
-import shutil
 from ordered_set import OrderedSet
 # https://zenodo.org/record/1193957 -> VocalSet
@@ -18,6 +21,30 @@ from ordered_set import OrderedSet
 ALL_EFFECTS = effects.Pedalboard_Effects
 class VocalSet(Dataset):
     def __init__(
         self,
@@ -43,8 +70,6 @@ class VocalSet(Dataset):
         self.chunk_size = chunk_size
         self.sample_rate = sample_rate
         self.mode = mode
-        mode_path = self.root / self.mode
-        self.files = sorted(list(mode_path.glob("./**/*.wav")))
         self.max_kept_effects = max_kept_effects
         self.max_removed_effects = max_removed_effects
         self.effects_to_use = effects_to_use
@@ -53,11 +78,20 @@ class VocalSet(Dataset):
         self.effects = effect_modules
         self.shuffle_kept_effects = shuffle_kept_effects
         self.shuffle_removed_effects = shuffle_removed_effects
         effects_string = "_".join(self.effects_to_use + ["_"] + self.effects_to_remove)
         self.effects_to_keep = self.validate_effect_input()
         self.proc_root = self.render_root / "processed" / effects_string / self.mode
         if self.proc_root.exists() and len(list(self.proc_root.iterdir())) > 0:
             print("Found processed files.")
             if render_files:
@@ -86,12 +120,15 @@ class VocalSet(Dataset):
                         # Skip if chunk is too small
                         continue
-                    dry, wet, effect = self.process_effects(resampled_chunk)
                     output_dir = self.proc_root / str(self.num_chunks)
                     output_dir.mkdir(exist_ok=True)
                     torchaudio.save(output_dir / "input.wav", wet, self.sample_rate)
                     torchaudio.save(output_dir / "target.wav", dry, self.sample_rate)
-                    torch.save(effect, output_dir / "effect.pt")
                     self.num_chunks += 1
         else:
             self.num_chunks = len(list(self.proc_root.iterdir()))
@@ -107,10 +144,11 @@ class VocalSet(Dataset):
     def __getitem__(self, idx):
         input_file = self.proc_root / str(idx) / "input.wav"
         target_file = self.proc_root / str(idx) / "target.wav"
-        effect_name = torch.load(self.proc_root / str(idx) / "effect.pt")
         input, sr = torchaudio.load(input_file)
         target, sr = torchaudio.load(target_file)
-        return (input, target, effect_name)
     def validate_effect_input(self):
         for effect in self.effects.values():
@@ -154,27 +192,28 @@ class VocalSet(Dataset):
         return kept_fx
     def process_effects(self, dry: torch.Tensor):
-        labels = []
         # Apply Kept Effects
         # Shuffle effects if specified
         if self.shuffle_kept_effects:
             effect_indices = torch.randperm(len(self.effects_to_keep))
         else:
             effect_indices = torch.arange(len(self.effects_to_keep))
         # Up to max_kept_effects
         if self.max_kept_effects != -1:
             num_kept_effects = int(torch.rand(1).item() * (self.max_kept_effects)) + 1
         else:
             num_kept_effects = len(self.effects_to_keep)
         effect_indices = effect_indices[:num_kept_effects]
         # Index in effect settings
         effect_names_to_apply = [self.effects_to_keep[i] for i in effect_indices]
         effects_to_apply = [self.effects[i] for i in effect_names_to_apply]
         # Apply
         for effect in effects_to_apply:
             dry = effect(dry)
-            labels.append(ALL_EFFECTS.index(type(effect)))
         # Apply effects_to_remove
         # Shuffle effects if specified
@@ -185,9 +224,7 @@ class VocalSet(Dataset):
             effect_indices = torch.arange(len(self.effects_to_remove))
         # Up to max_removed_effects
         if self.max_removed_effects != -1:
-            num_kept_effects = (
-                int(torch.rand(1).item() * (self.max_removed_effects)) + 1
-            )
         else:
             num_kept_effects = len(self.effects_to_remove)
         effect_indices = effect_indices[: self.max_removed_effects]
@@ -195,17 +232,25 @@ class VocalSet(Dataset):
         effect_names_to_apply = [self.effects_to_remove[i] for i in effect_indices]
         effects_to_apply = [self.effects[i] for i in effect_names_to_apply]
         # Apply
         for effect in effects_to_apply:
             wet = effect(wet)
-            labels.append(ALL_EFFECTS.index(type(effect)))
-        # Convert labels to one-hot
-        one_hot = F.one_hot(torch.tensor(labels), num_classes=len(ALL_EFFECTS))
-        effects_present = torch.sum(one_hot, dim=0).float()
         # Normalize
         normalized_dry = self.normalize(dry)
         normalized_wet = self.normalize(wet)
-        return normalized_dry, normalized_wet, effects_present
 class VocalSetDatamodule(pl.LightningDataModule):

+import os
+import sys
+import glob
 import torch
+import shutil
 import torchaudio
 import pytorch_lightning as pl
+import torch.nn.functional as F
 from tqdm import tqdm
+from pathlib import Path
+from remfx import effects
 from ordered_set import OrderedSet
+from typing import Any, List, Dict
+from torch.utils.data import Dataset, DataLoader
+from remfx.utils import create_sequential_chunks
 # https://zenodo.org/record/1193957 -> VocalSet
 ALL_EFFECTS = effects.Pedalboard_Effects
+singer_splits = {
+    "train": [
+        "male1",
+        "male2",
+        "male3",
+        "male4",
+        "male5",
+        "male6",
+        "male7",
+        "male8",
+        "male9",
+        "female1",
+        "female2",
+        "female3",
+        "female4",
+        "female5",
+        "female6",
+        "female7",
+    ],
+    "val": ["male10", "female8"],
+    "test": ["male11", "female9"],
+}
 class VocalSet(Dataset):
     def __init__(
         self,
         self.chunk_size = chunk_size
         self.sample_rate = sample_rate
         self.mode = mode
         self.max_kept_effects = max_kept_effects
         self.max_removed_effects = max_removed_effects
         self.effects_to_use = effects_to_use
         self.effects = effect_modules
         self.shuffle_kept_effects = shuffle_kept_effects
         self.shuffle_removed_effects = shuffle_removed_effects
         effects_string = "_".join(self.effects_to_use + ["_"] + self.effects_to_remove)
         self.effects_to_keep = self.validate_effect_input()
         self.proc_root = self.render_root / "processed" / effects_string / self.mode
+        # find all singer directories
+        singer_dirs = glob.glob(os.path.join(self.root, "data_by_singer", "*"))
+        singer_dirs = [
+            sd for sd in singer_dirs if os.path.basename(sd) in singer_splits[mode]
+        ]
+        self.files = []
+        for singer_dir in singer_dirs:
+            self.files += glob.glob(os.path.join(singer_dir, "**", "**", "*.wav"))
+        self.files = sorted(self.files)
         if self.proc_root.exists() and len(list(self.proc_root.iterdir())) > 0:
             print("Found processed files.")
             if render_files:
                         # Skip if chunk is too small
                         continue
+                    dry, wet, dry_effects, wet_effects = self.process_effects(
+                        resampled_chunk
+                    )
                     output_dir = self.proc_root / str(self.num_chunks)
                     output_dir.mkdir(exist_ok=True)
                     torchaudio.save(output_dir / "input.wav", wet, self.sample_rate)
                     torchaudio.save(output_dir / "target.wav", dry, self.sample_rate)
+                    torch.save(dry_effects, output_dir / "dry_effects.pt")
+                    torch.save(wet_effects, output_dir / "wet_effects.pt")
                     self.num_chunks += 1
         else:
             self.num_chunks = len(list(self.proc_root.iterdir()))
     def __getitem__(self, idx):
         input_file = self.proc_root / str(idx) / "input.wav"
         target_file = self.proc_root / str(idx) / "target.wav"
+        dry_effect_names = torch.load(self.proc_root / str(idx) / "dry_effects.pt")
+        wet_effect_names = torch.load(self.proc_root / str(idx) / "wet_effects.pt")
         input, sr = torchaudio.load(input_file)
         target, sr = torchaudio.load(target_file)
+        return (input, target, dry_effect_names, wet_effect_names)
     def validate_effect_input(self):
         for effect in self.effects.values():
         return kept_fx
     def process_effects(self, dry: torch.Tensor):
         # Apply Kept Effects
         # Shuffle effects if specified
         if self.shuffle_kept_effects:
             effect_indices = torch.randperm(len(self.effects_to_keep))
         else:
             effect_indices = torch.arange(len(self.effects_to_keep))
         # Up to max_kept_effects
         if self.max_kept_effects != -1:
             num_kept_effects = int(torch.rand(1).item() * (self.max_kept_effects)) + 1
         else:
             num_kept_effects = len(self.effects_to_keep)
         effect_indices = effect_indices[:num_kept_effects]
         # Index in effect settings
         effect_names_to_apply = [self.effects_to_keep[i] for i in effect_indices]
         effects_to_apply = [self.effects[i] for i in effect_names_to_apply]
         # Apply
+        dry_labels = []
         for effect in effects_to_apply:
             dry = effect(dry)
+            dry_labels.append(ALL_EFFECTS.index(type(effect)))
         # Apply effects_to_remove
         # Shuffle effects if specified
             effect_indices = torch.arange(len(self.effects_to_remove))
         # Up to max_removed_effects
         if self.max_removed_effects != -1:
+            num_kept_effects = int(torch.rand(1).item() * (self.max_removed_effects))
         else:
             num_kept_effects = len(self.effects_to_remove)
         effect_indices = effect_indices[: self.max_removed_effects]
         effect_names_to_apply = [self.effects_to_remove[i] for i in effect_indices]
         effects_to_apply = [self.effects[i] for i in effect_names_to_apply]
         # Apply
+        wet_labels = []
         for effect in effects_to_apply:
             wet = effect(wet)
+            wet_labels.append(ALL_EFFECTS.index(type(effect)))
+        wet_labels_tensor = torch.zeros(len(ALL_EFFECTS))
+        dry_labels_tensor = torch.zeros(len(ALL_EFFECTS))
+        for label_idx in wet_labels:
+            wet_labels_tensor[label_idx] = 1.0
+        for label_idx in dry_labels:
+            dry_labels_tensor[label_idx] = 1.0
         # Normalize
         normalized_dry = self.normalize(dry)
         normalized_wet = self.normalize(wet)
+        return normalized_dry, normalized_wet, dry_labels_tensor, wet_labels_tensor
 class VocalSetDatamodule(pl.LightningDataModule):

remfx/effects.py CHANGED Viewed

@@ -701,9 +701,9 @@ class RandomAudioEffectsChannel(torch.nn.Module):
 Pedalboard_Effects = [
     RandomPedalboardReverb,
     RandomPedalboardChorus,
-    RandomPedalboardDelay,
     RandomPedalboardDistortion,
     RandomPedalboardCompressor,
-    RandomPedalboardPhaser,
-    RandomPedalboardLimiter,
 ]

 Pedalboard_Effects = [
     RandomPedalboardReverb,
     RandomPedalboardChorus,
+    # RandomPedalboardDelay,
     RandomPedalboardDistortion,
     RandomPedalboardCompressor,
+    # RandomPedalboardPhaser,
+    # RandomPedalboardLimiter,
 ]

remfx/models.py CHANGED Viewed

@@ -1,15 +1,19 @@
 import torch
-from torch import Tensor, nn
 import pytorch_lightning as pl
 from einops import rearrange
-import wandb
 from audio_diffusion_pytorch import DiffusionModel
 from auraloss.time import SISDRLoss
 from auraloss.freq import MultiResolutionSTFTLoss
-from remfx.utils import FADLoss
 from umx.openunmix.model import OpenUnmix, Separator
-from torchaudio.models import HDemucs
 class RemFXModel(pl.LightningModule):
@@ -90,9 +94,9 @@ class RemFXModel(pl.LightningModule):
         return loss
     def common_step(self, batch, batch_idx, mode: str = "train"):
-        loss, output = self.model(batch)
         self.log(f"{mode}_loss", loss)
-        x, y, label = batch
         # Metric logging
         with torch.no_grad():
             for metric in self.metrics:
@@ -119,7 +123,7 @@ class RemFXModel(pl.LightningModule):
     def on_train_batch_start(self, batch, batch_idx):
         # Log initial audio
         if self.log_train_audio:
-            x, y, label = batch
             # Concat samples together for easier viewing in dashboard
             input_samples = rearrange(x, "b c t -> c (b t)").unsqueeze(0)
             target_samples = rearrange(y, "b c t -> c (b t)").unsqueeze(0)
@@ -141,7 +145,7 @@ class RemFXModel(pl.LightningModule):
             self.log_train_audio = False
     def on_validation_batch_start(self, batch, batch_idx, dataloader_idx):
-        x, target, label = batch
         # Log Input Metrics
         for metric in self.metrics:
             # SISDR returns negative values, so negate them
@@ -185,7 +189,7 @@ class RemFXModel(pl.LightningModule):
     def on_test_batch_start(self, batch, batch_idx, dataloader_idx):
         self.on_validation_batch_start(batch, batch_idx, dataloader_idx)
         # Log FAD
-        x, target, label = batch
         self.log(
             "Input_FAD",
             self.metrics["FAD"](x, target),
@@ -233,7 +237,7 @@ class OpenUnmixModel(torch.nn.Module):
         self.l1loss = torch.nn.L1Loss()
     def forward(self, batch):
-        x, target, label = batch
         X = spectrogram(x, self.window, self.n_fft, self.hop_length, self.alpha)
         Y = self.model(X)
         sep_out = self.separator(x).squeeze(1)
@@ -256,7 +260,7 @@ class DemucsModel(torch.nn.Module):
         self.l1loss = torch.nn.L1Loss()
     def forward(self, batch):
-        x, target, label = batch
         output = self.model(x).squeeze(1)
         loss = self.mrstftloss(output, target) + self.l1loss(output, target) * 100
         return loss, output
@@ -271,7 +275,7 @@ class DiffusionGenerationModel(nn.Module):
         self.model = DiffusionModel(in_channels=n_channels)
     def forward(self, batch):
-        x, target, label = batch
         sampled_out = self.model.sample(x)
         return self.model(x), sampled_out
@@ -326,3 +330,215 @@ def spectrogram(
     X = X.view(bs, chs, X.shape[-2], X.shape[-1])
     return torch.pow(X.abs() + 1e-8, alpha)

+import wandb
 import torch
+import torchaudio
+import torchmetrics
 import pytorch_lightning as pl
+import torch.nn.functional as F
+from torch import Tensor, nn
 from einops import rearrange
+from torchaudio.models import HDemucs
 from audio_diffusion_pytorch import DiffusionModel
 from auraloss.time import SISDRLoss
 from auraloss.freq import MultiResolutionSTFTLoss
 from umx.openunmix.model import OpenUnmix, Separator
+from remfx.utils import FADLoss
 class RemFXModel(pl.LightningModule):
         return loss
     def common_step(self, batch, batch_idx, mode: str = "train"):
+        x, y, _, _ = batch
+        loss, output = self.model((x, y))
         self.log(f"{mode}_loss", loss)
         # Metric logging
         with torch.no_grad():
             for metric in self.metrics:
     def on_train_batch_start(self, batch, batch_idx):
         # Log initial audio
         if self.log_train_audio:
+            x, y, _, _ = batch
             # Concat samples together for easier viewing in dashboard
             input_samples = rearrange(x, "b c t -> c (b t)").unsqueeze(0)
             target_samples = rearrange(y, "b c t -> c (b t)").unsqueeze(0)
             self.log_train_audio = False
     def on_validation_batch_start(self, batch, batch_idx, dataloader_idx):
+        x, target, _, _ = batch
         # Log Input Metrics
         for metric in self.metrics:
             # SISDR returns negative values, so negate them
     def on_test_batch_start(self, batch, batch_idx, dataloader_idx):
         self.on_validation_batch_start(batch, batch_idx, dataloader_idx)
         # Log FAD
+        x, target, _, _ = batch
         self.log(
             "Input_FAD",
             self.metrics["FAD"](x, target),
         self.l1loss = torch.nn.L1Loss()
     def forward(self, batch):
+        x, target = batch
         X = spectrogram(x, self.window, self.n_fft, self.hop_length, self.alpha)
         Y = self.model(X)
         sep_out = self.separator(x).squeeze(1)
         self.l1loss = torch.nn.L1Loss()
     def forward(self, batch):
+        x, target = batch
         output = self.model(x).squeeze(1)
         loss = self.mrstftloss(output, target) + self.l1loss(output, target) * 100
         return loss, output
         self.model = DiffusionModel(in_channels=n_channels)
     def forward(self, batch):
+        x, target = batch
         sampled_out = self.model.sample(x)
         return self.model(x), sampled_out
     X = X.view(bs, chs, X.shape[-2], X.shape[-1])
     return torch.pow(X.abs() + 1e-8, alpha)
+# adapted from https://github.com/qiuqiangkong/audioset_tagging_cnn/blob/master/pytorch/models.py
+def init_layer(layer):
+    """Initialize a Linear or Convolutional layer."""
+    nn.init.xavier_uniform_(layer.weight)
+    if hasattr(layer, "bias"):
+        if layer.bias is not None:
+            layer.bias.data.fill_(0.0)
+def init_bn(bn):
+    """Initialize a Batchnorm layer."""
+    bn.bias.data.fill_(0.0)
+    bn.weight.data.fill_(1.0)
+class ConvBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(ConvBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=(3, 3),
+            stride=(1, 1),
+            padding=(1, 1),
+            bias=False,
+        )
+        self.conv2 = nn.Conv2d(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=(3, 3),
+            stride=(1, 1),
+            padding=(1, 1),
+            bias=False,
+        )
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+        self.init_weight()
+    def init_weight(self):
+        init_layer(self.conv1)
+        init_layer(self.conv2)
+        init_bn(self.bn1)
+        init_bn(self.bn2)
+    def forward(self, input, pool_size=(2, 2), pool_type="avg"):
+        x = input
+        x = F.relu_(self.bn1(self.conv1(x)))
+        x = F.relu_(self.bn2(self.conv2(x)))
+        if pool_type == "max":
+            x = F.max_pool2d(x, kernel_size=pool_size)
+        elif pool_type == "avg":
+            x = F.avg_pool2d(x, kernel_size=pool_size)
+        elif pool_type == "avg+max":
+            x1 = F.avg_pool2d(x, kernel_size=pool_size)
+            x2 = F.max_pool2d(x, kernel_size=pool_size)
+            x = x1 + x2
+        else:
+            raise Exception("Incorrect argument!")
+        return x
+class Cnn14(nn.Module):
+    def __init__(
+        self,
+        num_classes: int,
+        sample_rate: float,
+        n_fft: int = 2048,
+        hop_length: int = 512,
+        n_mels: int = 128,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.n_fft = n_fft
+        self.hop_length = hop_length
+        window = torch.hann_window(n_fft)
+        self.register_buffer("window", window)
+        self.melspec = torchaudio.transforms.MelSpectrogram(
+            sample_rate,
+            n_fft,
+            hop_length=hop_length,
+            n_mels=n_mels,
+        )
+        self.bn0 = nn.BatchNorm2d(n_mels)
+        self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
+        self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
+        self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
+        self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
+        self.conv_block5 = ConvBlock(in_channels=512, out_channels=1024)
+        self.conv_block6 = ConvBlock(in_channels=1024, out_channels=2048)
+        self.fc1 = nn.Linear(2048, 2048, bias=True)
+        self.fc_audioset = nn.Linear(2048, num_classes, bias=True)
+        self.init_weight()
+    def init_weight(self):
+        init_bn(self.bn0)
+        init_layer(self.fc1)
+        init_layer(self.fc_audioset)
+    def forward(self, x: torch.Tensor):
+        """
+        Input: (batch_size, data_length)"""
+        x = self.melspec(x)
+        x = x.permute(0, 2, 1, 3)
+        x = self.bn0(x)
+        x = x.permute(0, 2, 1, 3)
+        if self.training:
+            pass
+            # x = self.spec_augmenter(x)
+        x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg")
+        x = F.dropout(x, p=0.2, training=self.training)
+        x = self.conv_block2(x, pool_size=(2, 2), pool_type="avg")
+        x = F.dropout(x, p=0.2, training=self.training)
+        x = self.conv_block3(x, pool_size=(2, 2), pool_type="avg")
+        x = F.dropout(x, p=0.2, training=self.training)
+        x = self.conv_block4(x, pool_size=(2, 2), pool_type="avg")
+        x = F.dropout(x, p=0.2, training=self.training)
+        x = self.conv_block5(x, pool_size=(2, 2), pool_type="avg")
+        x = F.dropout(x, p=0.2, training=self.training)
+        x = self.conv_block6(x, pool_size=(1, 1), pool_type="avg")
+        x = F.dropout(x, p=0.2, training=self.training)
+        x = torch.mean(x, dim=3)
+        (x1, _) = torch.max(x, dim=2)
+        x2 = torch.mean(x, dim=2)
+        x = x1 + x2
+        x = F.dropout(x, p=0.5, training=self.training)
+        x = F.relu_(self.fc1(x))
+        clipwise_output = self.fc_audioset(x)
+        return clipwise_output
+class FXClassifier(pl.LightningModule):
+    def __init__(
+        self,
+        lr: float,
+        lr_weight_decay: float,
+        sample_rate: float,
+        network: nn.Module,
+    ):
+        super().__init__()
+        self.lr = lr
+        self.lr_weight_decay = lr_weight_decay
+        self.sample_rate = sample_rate
+        self.network = network
+    def forward(self, x: torch.Tensor):
+        return self.network(x)
+    def common_step(self, batch, batch_idx, mode: str = "train"):
+        x, y, dry_label, wet_label = batch
+        pred_label = self.network(x)
+        loss = torch.nn.functional.cross_entropy(pred_label, dry_label)
+        self.log(
+            f"{mode}_loss",
+            loss,
+            on_step=True,
+            on_epoch=True,
+            prog_bar=True,
+            logger=True,
+            sync_dist=True,
+        )
+        self.log(
+            f"{mode}_mAP",
+            torchmetrics.functional.retrieval_average_precision(
+                pred_label, dry_label.long()
+            ),
+            on_step=True,
+            on_epoch=True,
+            prog_bar=True,
+            logger=True,
+            sync_dist=True,
+        )
+        return loss
+    def training_step(self, batch, batch_idx):
+        return self.common_step(batch, batch_idx, mode="train")
+    def validation_step(self, batch, batch_idx):
+        return self.common_step(batch, batch_idx, mode="valid")
+    def test_step(self, batch, batch_idx):
+        return self.common_step(batch, batch_idx, mode="test")
+    def configure_optimizers(self):
+        optimizer = torch.optim.AdamW(
+            self.network.parameters(),
+            lr=self.lr,
+            weight_decay=self.lr_weight_decay,
+        )
+        return optimizer