Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,762 Bytes
7f2690b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import copy
import numpy as np
import scipy.io.wavfile
import scipy.signal
from . import utils as ut
import pdb
def load_sound(wav_fname):
rate, samples = scipy.io.wavfile.read(wav_fname)
times = (1./rate) * np.arange(len(samples))
return Sound(times, rate, samples)
class Sound:
def __init__(self, times, rate, samples=None):
# Allow Sound(samples, sr)
if samples is None:
samples = times
times = None
if samples.dtype == np.float32:
samples = samples.astype('float64')
self.rate = rate
# self.samples = ut.atleast_2d_col(samples)
self.samples = samples
self.length = samples.shape[0]
if times is None:
self.times = np.arange(len(self.samples)) / float(self.rate)
else:
self.times = times
def copy(self):
return copy.deepcopy(self)
def parts(self):
return (self.times, self.rate, self.samples)
def __getslice__(self, *args):
return Sound(self.times.__getslice__(*args), self.rate,
self.samples.__getslice__(*args))
def duration(self):
return self.samples.shape[0] / float(self.rate)
def normalized(self, check=True):
if self.samples.dtype == np.double:
assert (not check) or np.max(np.abs(self.samples)) <= 4.
x = copy.deepcopy(self)
x.samples = np.clip(x.samples, -1., 1.)
return x
else:
s = copy.deepcopy(self)
s.samples = np.array(s.samples, 'double') / np.iinfo(s.samples.dtype).max
s.samples[s.samples < -1] = -1
s.samples[s.samples > 1] = 1
return s
def unnormalized(self, dtype_name='int32'):
s = self.normalized()
inf = np.iinfo(np.dtype(dtype_name))
samples = np.clip(s.samples, -1., 1.)
samples = inf.max * samples
samples = np.array(np.clip(samples, inf.min, inf.max), dtype_name)
s.samples = samples
return s
def sample_from_time(self, t, bound=False):
if bound:
return min(max(0, int(np.round(t * self.rate))), self.samples.shape[0]-1)
else:
return int(np.round(t * self.rate))
# st = sample_from_time
def shift_zero(self):
s = copy.deepcopy(self)
s.times -= s.times[0]
return s
def select_channel(self, c):
s = copy.deepcopy(self)
s.samples = s.samples[:, c]
return s
def left_pad_silence(self, n):
if n == 0:
return self.shift_zero()
else:
if np.ndim(self.samples) == 1:
samples = np.concatenate([[0] * n, self.samples])
else:
samples = np.vstack(
[np.zeros((n, self.samples.shape[1]), self.samples.dtype), self.samples])
return Sound(None, self.rate, samples)
def right_pad_silence(self, n):
if n == 0:
return self.shift_zero()
else:
if np.ndim(self.samples) == 1:
samples = np.concatenate([self.samples, [0] * n])
else:
samples = np.vstack([self.samples, np.zeros(
(n, self.samples.shape[1]), self.samples.dtype)])
return Sound(None, self.rate, samples)
def pad_slice(self, s1, s2):
assert s1 < self.samples.shape[0] and s2 >= 0
s = self[max(0, s1): min(s2, self.samples.shape[0])]
s = s.left_pad_silence(max(0, -s1))
s = s.right_pad_silence(max(0, s2 - self.samples.shape[0]))
return s
def to_mono(self, force_copy= True):
s = copy.deepcopy(self)
s.samples = make_mono(s.samples)
return s
def slice_time(self, t1, t2):
return self[self.st(t1): self.st(t2)]
@property
def nchannels(self):
return 1 if np.ndim(self.samples) == 1 else self.samples.shape[1]
def save(self, fname):
s = self.unnormalized('int16')
scipy.io.wavfile.write(fname, s.rate, s.samples.transpose())
def resampled(self, new_rate, clip= True):
if new_rate == self.rate:
return copy.deepcopy(self)
else:
#assert self.samples.shape[1] == 1
return Sound(None, new_rate, self.resample(self.samples, float(new_rate)/self.rate, clip= clip))
def trim_to_size(self, n):
return Sound(None, self.rate, self.samples[:n])
def resample(self, signal, sc, clip = True, num_samples = None):
n = int(round(signal.shape[0] * sc)) if num_samples is None else num_samples
r = scipy.signal.resample(signal, n)
if clip:
r = np.clip(r, -1, 1)
else:
r = r.astype(np.int16)
return r
|