Prasada commited on
Commit
34fabd3
·
1 Parent(s): 871287c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -4,6 +4,8 @@ import numpy as np
4
  import torch
5
  import os
6
  import torch
 
 
7
  from speechbrain.pretrained import EncoderClassifier
8
  from transformers import AutoProcessor, AutoModelForTextToSpectrogram, SpeechT5HifiGan
9
 
@@ -29,10 +31,13 @@ def create_speaker_embedding(waveform):
29
 
30
  def prepare_data(temp_text, temp_audio):
31
  rate, audio_data = temp_audio
 
 
 
32
  example = processor(
33
  text=temp_text,
34
  audio_target=audio_data,
35
- sampling_rate=rate,
36
  return_attention_mask=False,)
37
  example["speaker_embeddings"] = create_speaker_embedding(audio_data)
38
  example_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
 
4
  import torch
5
  import os
6
  import torch
7
+ from scipy.io import wavfile
8
+ import scipy.signal as sps
9
  from speechbrain.pretrained import EncoderClassifier
10
  from transformers import AutoProcessor, AutoModelForTextToSpectrogram, SpeechT5HifiGan
11
 
 
31
 
32
  def prepare_data(temp_text, temp_audio):
33
  rate, audio_data = temp_audio
34
+ new_rate = 16000
35
+ number_of_samples = round(len(audio_data) * float(new_rate) / rate)
36
+ audio_data = sps.resample(audio_data, number_of_samples)
37
  example = processor(
38
  text=temp_text,
39
  audio_target=audio_data,
40
+ sampling_rate=16000,
41
  return_attention_mask=False,)
42
  example["speaker_embeddings"] = create_speaker_embedding(audio_data)
43
  example_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)