mattricesound commited on
Commit
c7f4805
·
1 Parent(s): 9072475

Fix chunk sample indices

Browse files
Files changed (2) hide show
  1. config.yaml +1 -2
  2. remfx/datasets.py +11 -6
config.yaml CHANGED
@@ -26,8 +26,7 @@ datamodule:
26
  sample_rate: ${sample_rate}
27
  root: ${oc.env:DATASET_ROOT}
28
  length: ${length}
29
- chunk_size_in_sec: 6
30
- num_chunks: 10
31
  val_split: 0.2
32
  batch_size: 16
33
  num_workers: 8
 
26
  sample_rate: ${sample_rate}
27
  root: ${oc.env:DATASET_ROOT}
28
  length: ${length}
29
+ chunk_size_in_sec: 3
 
30
  val_split: 0.2
31
  batch_size: 16
32
  num_workers: 8
remfx/datasets.py CHANGED
@@ -27,6 +27,7 @@ class GuitarFXDataset(Dataset):
27
  self.dry_files = []
28
  self.chunks = []
29
  self.labels = []
 
30
  self.root = Path(root)
31
  self.chunk_size_in_sec = chunk_size_in_sec
32
 
@@ -34,6 +35,7 @@ class GuitarFXDataset(Dataset):
34
  effect_types = [
35
  d.name for d in self.root.iterdir() if d.is_dir() and d != "Clean"
36
  ]
 
37
  for i, effect in enumerate(effect_types):
38
  for pickup in Path(self.root / effect).iterdir():
39
  wet_files = sorted(list(pickup.glob("*.wav")))
@@ -45,9 +47,11 @@ class GuitarFXDataset(Dataset):
45
  self.labels += [i] * len(wet_files)
46
  for audio_file in wet_files:
47
  chunk_starts = create_sequential_chunks(
48
- audio_file, self.chunk_size_in_sec, self.num_chunks
49
  )
50
  self.chunks += chunk_starts
 
 
51
  print(
52
  f"Found {len(self.wet_files)} wet files and {len(self.dry_files)} dry files.\n"
53
  f"Total chunks: {len(self.chunks)}"
@@ -59,15 +63,16 @@ class GuitarFXDataset(Dataset):
59
 
60
  def __getitem__(self, idx):
61
  # Load effected and "clean" audio
62
- song_idx = idx // self.num_chunks
 
63
  x, sr = torchaudio.load(self.wet_files[song_idx])
64
  y, sr = torchaudio.load(self.dry_files[song_idx])
65
  effect_label = self.labels[song_idx] # Effect label
66
 
67
- chunk_indices = self.chunks[idx]
68
- chunk_size_in_samples = self.chunk_size * sr
69
- x = x[:, chunk_indices[0] : chunk_indices[0] + chunk_size_in_samples]
70
- y = y[:, chunk_indices[0] : chunk_indices[0] + chunk_size_in_samples]
71
 
72
  resampled_x = self.resampler(x)
73
  resampled_y = self.resampler(y)
 
27
  self.dry_files = []
28
  self.chunks = []
29
  self.labels = []
30
+ self.song_idx = []
31
  self.root = Path(root)
32
  self.chunk_size_in_sec = chunk_size_in_sec
33
 
 
35
  effect_types = [
36
  d.name for d in self.root.iterdir() if d.is_dir() and d != "Clean"
37
  ]
38
+ current_file = 0
39
  for i, effect in enumerate(effect_types):
40
  for pickup in Path(self.root / effect).iterdir():
41
  wet_files = sorted(list(pickup.glob("*.wav")))
 
47
  self.labels += [i] * len(wet_files)
48
  for audio_file in wet_files:
49
  chunk_starts = create_sequential_chunks(
50
+ audio_file, self.chunk_size_in_sec
51
  )
52
  self.chunks += chunk_starts
53
+ self.song_idx += [current_file] * len(chunk_starts)
54
+ current_file += 1
55
  print(
56
  f"Found {len(self.wet_files)} wet files and {len(self.dry_files)} dry files.\n"
57
  f"Total chunks: {len(self.chunks)}"
 
63
 
64
  def __getitem__(self, idx):
65
  # Load effected and "clean" audio
66
+ print("HEY")
67
+ song_idx = self.song_idx[idx]
68
  x, sr = torchaudio.load(self.wet_files[song_idx])
69
  y, sr = torchaudio.load(self.dry_files[song_idx])
70
  effect_label = self.labels[song_idx] # Effect label
71
 
72
+ chunk_start = self.chunks[idx]
73
+ chunk_size_in_samples = self.chunk_size_in_sec * sr
74
+ x = x[:, chunk_start : chunk_start + chunk_size_in_samples]
75
+ y = y[:, chunk_start : chunk_start + chunk_size_in_samples]
76
 
77
  resampled_x = self.resampler(x)
78
  resampled_y = self.resampler(y)