update
Browse files
toolbox/pydub/volume.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
#!/usr/bin/python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
import argparse
|
|
|
4 |
from typing import List
|
5 |
|
6 |
import librosa
|
@@ -42,9 +43,10 @@ def set_volume(waveform: np.ndarray, sample_rate: int = 8000, volume: int = 0):
|
|
42 |
|
43 |
map_list = [
|
44 |
[0, -150],
|
45 |
-
[10, -
|
46 |
-
[50, -
|
47 |
-
[
|
|
|
48 |
]
|
49 |
stages = [a for a, b in map_list]
|
50 |
scores = [b for a, b in map_list]
|
@@ -56,6 +58,9 @@ def set_volume(waveform: np.ndarray, sample_rate: int = 8000, volume: int = 0):
|
|
56 |
scores=list(reversed(scores)),
|
57 |
)
|
58 |
|
|
|
|
|
|
|
59 |
audio_segment = audio_segment.apply_gain(target_db - audio_segment.dBFS)
|
60 |
|
61 |
samples = np.array(audio_segment.get_array_of_samples())
|
@@ -84,13 +89,15 @@ def get_args():
|
|
84 |
|
85 |
def main():
|
86 |
args = get_args()
|
|
|
|
|
87 |
|
88 |
waveform, sample_rate = librosa.load(args.filename, sr=8000)
|
89 |
|
90 |
waveform = set_volume(
|
91 |
waveform=waveform,
|
92 |
sample_rate=sample_rate,
|
93 |
-
volume=
|
94 |
)
|
95 |
waveform = np.array(waveform * (1 << 15), dtype=np.int16)
|
96 |
|
|
|
1 |
#!/usr/bin/python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
import argparse
|
4 |
+
import math
|
5 |
from typing import List
|
6 |
|
7 |
import librosa
|
|
|
43 |
|
44 |
map_list = [
|
45 |
[0, -150],
|
46 |
+
[10, -40],
|
47 |
+
[50, -12],
|
48 |
+
[75, -6],
|
49 |
+
[100, 0],
|
50 |
]
|
51 |
stages = [a for a, b in map_list]
|
52 |
scores = [b for a, b in map_list]
|
|
|
58 |
scores=list(reversed(scores)),
|
59 |
)
|
60 |
|
61 |
+
# min_db, max_db = -40, 0
|
62 |
+
# target_db = min_db + (max_db - min_db) * math.log10(1 + 9 * volume / 100)
|
63 |
+
|
64 |
audio_segment = audio_segment.apply_gain(target_db - audio_segment.dBFS)
|
65 |
|
66 |
samples = np.array(audio_segment.get_array_of_samples())
|
|
|
89 |
|
90 |
def main():
|
91 |
args = get_args()
|
92 |
+
from pydub.utils import ratio_to_db
|
93 |
+
print(ratio_to_db(0.3))
|
94 |
|
95 |
waveform, sample_rate = librosa.load(args.filename, sr=8000)
|
96 |
|
97 |
waveform = set_volume(
|
98 |
waveform=waveform,
|
99 |
sample_rate=sample_rate,
|
100 |
+
volume=100
|
101 |
)
|
102 |
waveform = np.array(waveform * (1 << 15), dtype=np.int16)
|
103 |
|
toolbox/torch/utils/data/dataset/vad_padding_jsonl_dataset.py
CHANGED
@@ -139,7 +139,7 @@ class VadPaddingJsonlDataset(IterableDataset):
|
|
139 |
speech_wave_np = self.make_sure_duration(speech_wave_np, self.expected_sample_rate, self.speech_target_duration)
|
140 |
|
141 |
# volume enhancement
|
142 |
-
volume = random.randint(
|
143 |
speech_wave_np = set_volume(speech_wave_np, sample_rate=self.expected_sample_rate, volume=volume)
|
144 |
|
145 |
noise_wave_list = list()
|
|
|
139 |
speech_wave_np = self.make_sure_duration(speech_wave_np, self.expected_sample_rate, self.speech_target_duration)
|
140 |
|
141 |
# volume enhancement
|
142 |
+
volume = random.randint(10, 80)
|
143 |
speech_wave_np = set_volume(speech_wave_np, sample_rate=self.expected_sample_rate, volume=volume)
|
144 |
|
145 |
noise_wave_list = list()
|