yasserrmd commited on
Commit
720869b
·
verified ·
1 Parent(s): 7fd5b72

Update generate_audio.py

Browse files
Files changed (1) hide show
  1. generate_audio.py +4 -14
generate_audio.py CHANGED
@@ -43,8 +43,8 @@ class TTSGenerator:
43
  """
44
 
45
  # Load Bark model and processor for Speaker 2
46
- self.bark_processor = AutoProcessor.from_pretrained("suno/bark")
47
- self.bark_model = BarkModel.from_pretrained("suno/bark", torch_dtype=torch.float16).to(self.device)
48
  self.bark_sampling_rate = 24000
49
  self.voice_preset = "v2/en_speaker_6"
50
 
@@ -116,18 +116,8 @@ class TTSGenerator:
116
  # audio_arr = speech_output[0].cpu().numpy()
117
  # return audio_arr, self.bark_sampling_rate
118
  # Tokenize input text and obtain input IDs and attention mask
119
- inputs = self.bark_processor(text, voice_preset=self.voice_preset, return_tensors="pt", padding=True).to(self.device)
120
- input_ids = inputs.input_ids.to(self.device)
121
- attention_mask = inputs.attention_mask.to(self.device)
122
-
123
- # Generate speech output with both input IDs and attention mask
124
- speech_output = self.bark_model.generate(
125
- input_ids=input_ids,
126
- attention_mask=attention_mask,
127
- temperature=0.9,
128
- semantic_temperature=0.8
129
- )
130
- # Convert the generated audio to numpy array
131
  audio_arr = speech_output[0].cpu().numpy()
132
  return audio_arr, self.bark_sampling_rate
133
 
 
43
  """
44
 
45
  # Load Bark model and processor for Speaker 2
46
+ self.bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
47
+ self.bark_model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(self.device)
48
  self.bark_sampling_rate = 24000
49
  self.voice_preset = "v2/en_speaker_6"
50
 
 
116
  # audio_arr = speech_output[0].cpu().numpy()
117
  # return audio_arr, self.bark_sampling_rate
118
  # Tokenize input text and obtain input IDs and attention mask
119
+ inputs = bark_processor(text, voice_preset="v2/en_speaker_6").to(device)
120
+ speech_output = bark_model.generate(**inputs, temperature=0.9, semantic_temperature=0.8)
 
 
 
 
 
 
 
 
 
 
121
  audio_arr = speech_output[0].cpu().numpy()
122
  return audio_arr, self.bark_sampling_rate
123