Spaces:

qgyd2021
/

cc_vad

Running

HoneyTian commited on 6 days ago

Commit

ef3c782

1 Parent(s): 6046c0c

update

Files changed (2) hide show

toolbox/pydub/volume.py CHANGED Viewed

@@ -1,6 +1,7 @@
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 import argparse
 from typing import List
 import librosa
@@ -42,9 +43,10 @@ def set_volume(waveform: np.ndarray, sample_rate: int = 8000, volume: int = 0):
     map_list = [
         [0, -150],
-        [10, -60],
-        [50, -35],
-        [100, -20],
     ]
     stages = [a for a, b in map_list]
     scores = [b for a, b in map_list]
@@ -56,6 +58,9 @@ def set_volume(waveform: np.ndarray, sample_rate: int = 8000, volume: int = 0):
         scores=list(reversed(scores)),
     )
     audio_segment = audio_segment.apply_gain(target_db - audio_segment.dBFS)
     samples = np.array(audio_segment.get_array_of_samples())
@@ -84,13 +89,15 @@ def get_args():
 def main():
     args = get_args()
     waveform, sample_rate = librosa.load(args.filename, sr=8000)
     waveform = set_volume(
         waveform=waveform,
         sample_rate=sample_rate,
-        volume=10
     )
     waveform = np.array(waveform * (1 << 15), dtype=np.int16)

 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 import argparse
+import math
 from typing import List
 import librosa
     map_list = [
         [0, -150],
+        [10, -40],
+        [50, -12],
+        [75, -6],
+        [100, 0],
     ]
     stages = [a for a, b in map_list]
     scores = [b for a, b in map_list]
         scores=list(reversed(scores)),
     )
+    # min_db, max_db = -40, 0
+    # target_db = min_db + (max_db - min_db) * math.log10(1 + 9 * volume / 100)
     audio_segment = audio_segment.apply_gain(target_db - audio_segment.dBFS)
     samples = np.array(audio_segment.get_array_of_samples())
 def main():
     args = get_args()
+    from pydub.utils import ratio_to_db
+    print(ratio_to_db(0.3))
     waveform, sample_rate = librosa.load(args.filename, sr=8000)
     waveform = set_volume(
         waveform=waveform,
         sample_rate=sample_rate,
+        volume=100
     )
     waveform = np.array(waveform * (1 << 15), dtype=np.int16)

toolbox/torch/utils/data/dataset/vad_padding_jsonl_dataset.py CHANGED Viewed

@@ -139,7 +139,7 @@ class VadPaddingJsonlDataset(IterableDataset):
         speech_wave_np = self.make_sure_duration(speech_wave_np, self.expected_sample_rate, self.speech_target_duration)
         # volume enhancement
-        volume = random.randint(5, 100)
         speech_wave_np = set_volume(speech_wave_np, sample_rate=self.expected_sample_rate, volume=volume)
         noise_wave_list = list()

         speech_wave_np = self.make_sure_duration(speech_wave_np, self.expected_sample_rate, self.speech_target_duration)
         # volume enhancement
+        volume = random.randint(10, 80)
         speech_wave_np = set_volume(speech_wave_np, sample_rate=self.expected_sample_rate, volume=volume)
         noise_wave_list = list()