Spaces:
Sleeping
Sleeping
Delete audio_processing.py
Browse files- audio_processing.py +0 -93
audio_processing.py
DELETED
@@ -1,93 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import numpy as np
|
3 |
-
from scipy.signal import get_window
|
4 |
-
import librosa.util as librosa_util
|
5 |
-
|
6 |
-
|
7 |
-
def window_sumsquare(window, n_frames, hop_length=200, win_length=800,
|
8 |
-
n_fft=800, dtype=np.float32, norm=None):
|
9 |
-
"""
|
10 |
-
# from librosa 0.6
|
11 |
-
Compute the sum-square envelope of a window function at a given hop length.
|
12 |
-
|
13 |
-
This is used to estimate modulation effects induced by windowing
|
14 |
-
observations in short-time fourier transforms.
|
15 |
-
|
16 |
-
Parameters
|
17 |
-
----------
|
18 |
-
window : string, tuple, number, callable, or list-like
|
19 |
-
Window specification, as in `get_window`
|
20 |
-
|
21 |
-
n_frames : int > 0
|
22 |
-
The number of analysis frames
|
23 |
-
|
24 |
-
hop_length : int > 0
|
25 |
-
The number of samples to advance between frames
|
26 |
-
|
27 |
-
win_length : [optional]
|
28 |
-
The length of the window function. By default, this matches `n_fft`.
|
29 |
-
|
30 |
-
n_fft : int > 0
|
31 |
-
The length of each analysis frame.
|
32 |
-
|
33 |
-
dtype : np.dtype
|
34 |
-
The data type of the output
|
35 |
-
|
36 |
-
Returns
|
37 |
-
-------
|
38 |
-
wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
|
39 |
-
The sum-squared envelope of the window function
|
40 |
-
"""
|
41 |
-
if win_length is None:
|
42 |
-
win_length = n_fft
|
43 |
-
|
44 |
-
n = n_fft + hop_length * (n_frames - 1)
|
45 |
-
x = np.zeros(n, dtype=dtype)
|
46 |
-
|
47 |
-
# Compute the squared window at the desired length
|
48 |
-
win_sq = get_window(window, win_length, fftbins=True)
|
49 |
-
win_sq = librosa_util.normalize(win_sq, norm=norm)**2
|
50 |
-
win_sq = librosa_util.pad_center(win_sq, n_fft)
|
51 |
-
|
52 |
-
# Fill the envelope
|
53 |
-
for i in range(n_frames):
|
54 |
-
sample = i * hop_length
|
55 |
-
x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]
|
56 |
-
return x
|
57 |
-
|
58 |
-
|
59 |
-
def griffin_lim(magnitudes, stft_fn, n_iters=30):
|
60 |
-
"""
|
61 |
-
PARAMS
|
62 |
-
------
|
63 |
-
magnitudes: spectrogram magnitudes
|
64 |
-
stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
|
65 |
-
"""
|
66 |
-
|
67 |
-
angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
|
68 |
-
angles = angles.astype(np.float32)
|
69 |
-
angles = torch.autograd.Variable(torch.from_numpy(angles))
|
70 |
-
signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
|
71 |
-
|
72 |
-
for i in range(n_iters):
|
73 |
-
_, angles = stft_fn.transform(signal)
|
74 |
-
signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
|
75 |
-
return signal
|
76 |
-
|
77 |
-
|
78 |
-
def dynamic_range_compression(x, C=1, clip_val=1e-5):
|
79 |
-
"""
|
80 |
-
PARAMS
|
81 |
-
------
|
82 |
-
C: compression factor
|
83 |
-
"""
|
84 |
-
return torch.log(torch.clamp(x, min=clip_val) * C)
|
85 |
-
|
86 |
-
|
87 |
-
def dynamic_range_decompression(x, C=1):
|
88 |
-
"""
|
89 |
-
PARAMS
|
90 |
-
------
|
91 |
-
C: compression factor used to compress
|
92 |
-
"""
|
93 |
-
return torch.exp(x) / C
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|