Spaces:
Running
on
Zero
Running
on
Zero
Update dataloader/dataloader.py
Browse files- dataloader/dataloader.py +7 -5
dataloader/dataloader.py
CHANGED
@@ -36,7 +36,7 @@ def audioread(path, sampling_rate):
|
|
36 |
data, fs = sf.read(path)
|
37 |
|
38 |
# Normalize the audio data.
|
39 |
-
data = audio_norm(data)
|
40 |
|
41 |
# Resample the audio if the sample rate is different from the target sampling rate.
|
42 |
if fs != sampling_rate:
|
@@ -47,7 +47,7 @@ def audioread(path, sampling_rate):
|
|
47 |
data = data[:, 0]
|
48 |
|
49 |
# Return the processed audio data.
|
50 |
-
return data
|
51 |
|
52 |
def audio_norm(x):
|
53 |
"""
|
@@ -87,7 +87,7 @@ def audio_norm(x):
|
|
87 |
x = x * scalarx
|
88 |
|
89 |
# Return the doubly normalized audio signal.
|
90 |
-
return x
|
91 |
|
92 |
class DataReader(object):
|
93 |
"""
|
@@ -155,13 +155,15 @@ class DataReader(object):
|
|
155 |
utt_id = path.split('/')[-1]
|
156 |
|
157 |
# Read and normalize the audio data, converting it to float32 for processing.
|
158 |
-
data = audioread(path, self.sampling_rate).astype(np.float32)
|
|
|
|
|
159 |
|
160 |
# Reshape the data to ensure it's in the format [1, data_length].
|
161 |
inputs = np.reshape(data, [1, data.shape[0]])
|
162 |
|
163 |
# Return the reshaped audio data, utterance ID, and the length of the original data.
|
164 |
-
return inputs, utt_id, data.shape[0]
|
165 |
|
166 |
class Wave_Processor(object):
|
167 |
"""
|
|
|
36 |
data, fs = sf.read(path)
|
37 |
|
38 |
# Normalize the audio data.
|
39 |
+
data, scalar = audio_norm(data)
|
40 |
|
41 |
# Resample the audio if the sample rate is different from the target sampling rate.
|
42 |
if fs != sampling_rate:
|
|
|
47 |
data = data[:, 0]
|
48 |
|
49 |
# Return the processed audio data.
|
50 |
+
return data, scalar
|
51 |
|
52 |
def audio_norm(x):
|
53 |
"""
|
|
|
87 |
x = x * scalarx
|
88 |
|
89 |
# Return the doubly normalized audio signal.
|
90 |
+
return x, , 1/(scalar * scalarx + EPS)
|
91 |
|
92 |
class DataReader(object):
|
93 |
"""
|
|
|
155 |
utt_id = path.split('/')[-1]
|
156 |
|
157 |
# Read and normalize the audio data, converting it to float32 for processing.
|
158 |
+
#data = audioread(path, self.sampling_rate).astype(np.float32)
|
159 |
+
data, scalar = audioread(path, self.sampling_rate)
|
160 |
+
data = data.astype(np.float32)
|
161 |
|
162 |
# Reshape the data to ensure it's in the format [1, data_length].
|
163 |
inputs = np.reshape(data, [1, data.shape[0]])
|
164 |
|
165 |
# Return the reshaped audio data, utterance ID, and the length of the original data.
|
166 |
+
return inputs, utt_id, data.shape[0], scalar
|
167 |
|
168 |
class Wave_Processor(object):
|
169 |
"""
|