WebashalarForML commited on
Commit
0e1dba2
Β·
verified Β·
1 Parent(s): 04e0ce2

Update audio.py

Browse files
Files changed (1) hide show
  1. audio.py +41 -46
audio.py CHANGED
@@ -1,33 +1,34 @@
1
- # Monkey‑patch Numba’s JIT dispatcher so it never tries to cache
2
- # (silences that β€œno locator available” RuntimeError).
3
- try:
4
- import numba.core.decorators as _nd
5
- _nd.JitDispatcher.enable_caching = lambda self: None
6
- except Exception:
7
- pass
8
- # ────────────────────────────────────────────────────────────
9
-
10
- import librosa
11
- import librosa.filters
12
  import numpy as np
13
- # import tensorflow as tf
14
- from scipy import signal
15
  from scipy.io import wavfile
 
 
16
  from hparams import hparams as hp
17
 
18
  def load_wav(path, sr):
19
- return librosa.core.load(path, sr=sr)[0]
 
 
 
20
 
21
- def load_wav(path, sr):
22
- return librosa.core.load(path, sr=sr)[0]
 
 
 
 
23
 
24
- def save_wav(wav, path, sr):
25
- wav *= 32767 / max(0.01, np.max(np.abs(wav)))
26
- #proposed by @dsmiller
27
- wavfile.write(path, sr, wav.astype(np.int16))
28
 
29
- def save_wavenet_wav(wav, path, sr):
30
- librosa.output.write_wav(path, wav, sr=sr)
 
 
 
 
 
 
31
 
32
  def preemphasis(wav, k, preemphasize=True):
33
  if preemphasize:
@@ -49,18 +50,14 @@ def get_hop_size():
49
  def linearspectrogram(wav):
50
  D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
51
  S = _amp_to_db(np.abs(D)) - hp.ref_level_db
52
-
53
- if hp.signal_normalization:
54
- return _normalize(S)
55
- return S
56
 
57
  def melspectrogram(wav):
58
  D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
59
  S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
60
-
61
- if hp.signal_normalization:
62
- return _normalize(S)
63
- return S
64
 
65
  def _lws_processor():
66
  import lws
@@ -68,15 +65,12 @@ def _lws_processor():
68
 
69
  def _stft(y):
70
  if hp.use_lws:
71
- return _lws_processor(hp).stft(y).T
72
  else:
 
73
  return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size)
74
 
75
- ##########################################################
76
- #Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!)
77
  def num_frames(length, fsize, fshift):
78
- """Compute number of time frames of spectrogram
79
- """
80
  pad = (fsize - fshift)
81
  if length % fshift == 0:
82
  M = (length + pad * 2 - fsize) // fshift + 1
@@ -84,40 +78,41 @@ def num_frames(length, fsize, fshift):
84
  M = (length + pad * 2 - fsize) // fshift + 2
85
  return M
86
 
87
-
88
  def pad_lr(x, fsize, fshift):
89
- """Compute left and right padding
90
- """
91
  M = num_frames(len(x), fsize, fshift)
92
  pad = (fsize - fshift)
93
  T = len(x) + 2 * pad
94
  r = (M - 1) * fshift + fsize - T
95
  return pad, pad + r
96
- ##########################################################
97
- #Librosa correct padding
98
  def librosa_pad_lr(x, fsize, fshift):
99
  return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0]
100
 
101
- # Conversions
102
  _mel_basis = None
103
 
104
- def _linear_to_mel(spectogram):
105
  global _mel_basis
106
  if _mel_basis is None:
107
  _mel_basis = _build_mel_basis()
108
- return np.dot(_mel_basis, spectogram)
109
 
110
  def _build_mel_basis():
 
111
  assert hp.fmax <= hp.sample_rate // 2
112
- return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels,
113
- fmin=hp.fmin, fmax=hp.fmax)
 
 
 
 
 
114
 
115
  def _amp_to_db(x):
116
  min_level = np.exp(hp.min_level_db / 20 * np.log(10))
117
  return 20 * np.log10(np.maximum(min_level, x))
118
 
119
  def _db_to_amp(x):
120
- return np.power(10.0, (x) * 0.05)
121
 
122
  def _normalize(S):
123
  if hp.allow_clipping_in_normalization:
 
1
+ import os
 
 
 
 
 
 
 
 
 
 
2
  import numpy as np
 
 
3
  from scipy.io import wavfile
4
+ from scipy import signal
5
+ import resampy
6
  from hparams import hparams as hp
7
 
8
  def load_wav(path, sr):
9
+ """
10
+ Load a WAV file and resample it using scipy + resampy.
11
+ """
12
+ orig_sr, audio = wavfile.read(path)
13
 
14
+ # Normalize if needed
15
+ if audio.dtype.kind == 'i':
16
+ max_val = np.iinfo(audio.dtype).max
17
+ audio = audio.astype(np.float32) / max_val
18
+ else:
19
+ audio = audio.astype(np.float32)
20
 
21
+ if orig_sr != sr:
22
+ audio = resampy.resample(audio, orig_sr, sr)
 
 
23
 
24
+ return audio
25
+
26
+ def save_wav(wav, path, sr):
27
+ """
28
+ Save a float32 waveform to disk as 16-bit PCM WAV.
29
+ """
30
+ wav_int16 = (wav * 32767).clip(-32767, 32767).astype(np.int16)
31
+ wavfile.write(path, sr, wav_int16)
32
 
33
  def preemphasis(wav, k, preemphasize=True):
34
  if preemphasize:
 
50
  def linearspectrogram(wav):
51
  D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
52
  S = _amp_to_db(np.abs(D)) - hp.ref_level_db
53
+
54
+ return _normalize(S) if hp.signal_normalization else S
 
 
55
 
56
  def melspectrogram(wav):
57
  D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
58
  S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
59
+
60
+ return _normalize(S) if hp.signal_normalization else S
 
 
61
 
62
  def _lws_processor():
63
  import lws
 
65
 
66
  def _stft(y):
67
  if hp.use_lws:
68
+ return _lws_processor().stft(y).T
69
  else:
70
+ import librosa # Safe to import inside function
71
  return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size)
72
 
 
 
73
  def num_frames(length, fsize, fshift):
 
 
74
  pad = (fsize - fshift)
75
  if length % fshift == 0:
76
  M = (length + pad * 2 - fsize) // fshift + 1
 
78
  M = (length + pad * 2 - fsize) // fshift + 2
79
  return M
80
 
 
81
  def pad_lr(x, fsize, fshift):
 
 
82
  M = num_frames(len(x), fsize, fshift)
83
  pad = (fsize - fshift)
84
  T = len(x) + 2 * pad
85
  r = (M - 1) * fshift + fsize - T
86
  return pad, pad + r
87
+
 
88
  def librosa_pad_lr(x, fsize, fshift):
89
  return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0]
90
 
 
91
  _mel_basis = None
92
 
93
+ def _linear_to_mel(spectrogram):
94
  global _mel_basis
95
  if _mel_basis is None:
96
  _mel_basis = _build_mel_basis()
97
+ return np.dot(_mel_basis, spectrogram)
98
 
99
  def _build_mel_basis():
100
+ import librosa.filters # Imported only when needed
101
  assert hp.fmax <= hp.sample_rate // 2
102
+ return librosa.filters.mel(
103
+ sr=hp.sample_rate,
104
+ n_fft=hp.n_fft,
105
+ n_mels=hp.num_mels,
106
+ fmin=hp.fmin,
107
+ fmax=hp.fmax
108
+ )
109
 
110
  def _amp_to_db(x):
111
  min_level = np.exp(hp.min_level_db / 20 * np.log(10))
112
  return 20 * np.log10(np.maximum(min_level, x))
113
 
114
  def _db_to_amp(x):
115
+ return np.power(10.0, x * 0.05)
116
 
117
  def _normalize(S):
118
  if hp.allow_clipping_in_normalization: