alibabasglab commited on
Commit
1690585
·
verified ·
1 Parent(s): 8b8ef61

Update dataloader/dataloader.py

Browse files
Files changed (1) hide show
  1. dataloader/dataloader.py +7 -5
dataloader/dataloader.py CHANGED
@@ -36,7 +36,7 @@ def audioread(path, sampling_rate):
36
  data, fs = sf.read(path)
37
 
38
  # Normalize the audio data.
39
- data = audio_norm(data)
40
 
41
  # Resample the audio if the sample rate is different from the target sampling rate.
42
  if fs != sampling_rate:
@@ -47,7 +47,7 @@ def audioread(path, sampling_rate):
47
  data = data[:, 0]
48
 
49
  # Return the processed audio data.
50
- return data
51
 
52
  def audio_norm(x):
53
  """
@@ -87,7 +87,7 @@ def audio_norm(x):
87
  x = x * scalarx
88
 
89
  # Return the doubly normalized audio signal.
90
- return x
91
 
92
  class DataReader(object):
93
  """
@@ -155,13 +155,15 @@ class DataReader(object):
155
  utt_id = path.split('/')[-1]
156
 
157
  # Read and normalize the audio data, converting it to float32 for processing.
158
- data = audioread(path, self.sampling_rate).astype(np.float32)
 
 
159
 
160
  # Reshape the data to ensure it's in the format [1, data_length].
161
  inputs = np.reshape(data, [1, data.shape[0]])
162
 
163
  # Return the reshaped audio data, utterance ID, and the length of the original data.
164
- return inputs, utt_id, data.shape[0]
165
 
166
  class Wave_Processor(object):
167
  """
 
36
  data, fs = sf.read(path)
37
 
38
  # Normalize the audio data.
39
+ data, scalar = audio_norm(data)
40
 
41
  # Resample the audio if the sample rate is different from the target sampling rate.
42
  if fs != sampling_rate:
 
47
  data = data[:, 0]
48
 
49
  # Return the processed audio data.
50
+ return data, scalar
51
 
52
  def audio_norm(x):
53
  """
 
87
  x = x * scalarx
88
 
89
  # Return the doubly normalized audio signal.
90
+ return x, , 1/(scalar * scalarx + EPS)
91
 
92
  class DataReader(object):
93
  """
 
155
  utt_id = path.split('/')[-1]
156
 
157
  # Read and normalize the audio data, converting it to float32 for processing.
158
+ #data = audioread(path, self.sampling_rate).astype(np.float32)
159
+ data, scalar = audioread(path, self.sampling_rate)
160
+ data = data.astype(np.float32)
161
 
162
  # Reshape the data to ensure it's in the format [1, data_length].
163
  inputs = np.reshape(data, [1, data.shape[0]])
164
 
165
  # Return the reshaped audio data, utterance ID, and the length of the original data.
166
+ return inputs, utt_id, data.shape[0], scalar
167
 
168
  class Wave_Processor(object):
169
  """