HoneyTian commited on
Commit
ef3c782
·
1 Parent(s): 6046c0c
toolbox/pydub/volume.py CHANGED
@@ -1,6 +1,7 @@
1
  #!/usr/bin/python3
2
  # -*- coding: utf-8 -*-
3
  import argparse
 
4
  from typing import List
5
 
6
  import librosa
@@ -42,9 +43,10 @@ def set_volume(waveform: np.ndarray, sample_rate: int = 8000, volume: int = 0):
42
 
43
  map_list = [
44
  [0, -150],
45
- [10, -60],
46
- [50, -35],
47
- [100, -20],
 
48
  ]
49
  stages = [a for a, b in map_list]
50
  scores = [b for a, b in map_list]
@@ -56,6 +58,9 @@ def set_volume(waveform: np.ndarray, sample_rate: int = 8000, volume: int = 0):
56
  scores=list(reversed(scores)),
57
  )
58
 
 
 
 
59
  audio_segment = audio_segment.apply_gain(target_db - audio_segment.dBFS)
60
 
61
  samples = np.array(audio_segment.get_array_of_samples())
@@ -84,13 +89,15 @@ def get_args():
84
 
85
  def main():
86
  args = get_args()
 
 
87
 
88
  waveform, sample_rate = librosa.load(args.filename, sr=8000)
89
 
90
  waveform = set_volume(
91
  waveform=waveform,
92
  sample_rate=sample_rate,
93
- volume=10
94
  )
95
  waveform = np.array(waveform * (1 << 15), dtype=np.int16)
96
 
 
1
  #!/usr/bin/python3
2
  # -*- coding: utf-8 -*-
3
  import argparse
4
+ import math
5
  from typing import List
6
 
7
  import librosa
 
43
 
44
  map_list = [
45
  [0, -150],
46
+ [10, -40],
47
+ [50, -12],
48
+ [75, -6],
49
+ [100, 0],
50
  ]
51
  stages = [a for a, b in map_list]
52
  scores = [b for a, b in map_list]
 
58
  scores=list(reversed(scores)),
59
  )
60
 
61
+ # min_db, max_db = -40, 0
62
+ # target_db = min_db + (max_db - min_db) * math.log10(1 + 9 * volume / 100)
63
+
64
  audio_segment = audio_segment.apply_gain(target_db - audio_segment.dBFS)
65
 
66
  samples = np.array(audio_segment.get_array_of_samples())
 
89
 
90
  def main():
91
  args = get_args()
92
+ from pydub.utils import ratio_to_db
93
+ print(ratio_to_db(0.3))
94
 
95
  waveform, sample_rate = librosa.load(args.filename, sr=8000)
96
 
97
  waveform = set_volume(
98
  waveform=waveform,
99
  sample_rate=sample_rate,
100
+ volume=100
101
  )
102
  waveform = np.array(waveform * (1 << 15), dtype=np.int16)
103
 
toolbox/torch/utils/data/dataset/vad_padding_jsonl_dataset.py CHANGED
@@ -139,7 +139,7 @@ class VadPaddingJsonlDataset(IterableDataset):
139
  speech_wave_np = self.make_sure_duration(speech_wave_np, self.expected_sample_rate, self.speech_target_duration)
140
 
141
  # volume enhancement
142
- volume = random.randint(5, 100)
143
  speech_wave_np = set_volume(speech_wave_np, sample_rate=self.expected_sample_rate, volume=volume)
144
 
145
  noise_wave_list = list()
 
139
  speech_wave_np = self.make_sure_duration(speech_wave_np, self.expected_sample_rate, self.speech_target_duration)
140
 
141
  # volume enhancement
142
+ volume = random.randint(10, 80)
143
  speech_wave_np = set_volume(speech_wave_np, sample_rate=self.expected_sample_rate, volume=volume)
144
 
145
  noise_wave_list = list()