NotebookLlamaGroq

Running

yasserrmd commited on Oct 30, 2024

Commit

720869b

verified ·

1 Parent(s): 7fd5b72

Update generate_audio.py

Files changed (1) hide show

generate_audio.py CHANGED Viewed

@@ -43,8 +43,8 @@ class TTSGenerator:
         """
         # Load Bark model and processor for Speaker 2
-        self.bark_processor = AutoProcessor.from_pretrained("suno/bark")
-        self.bark_model = BarkModel.from_pretrained("suno/bark", torch_dtype=torch.float16).to(self.device)
         self.bark_sampling_rate = 24000
         self.voice_preset = "v2/en_speaker_6"
@@ -116,18 +116,8 @@ class TTSGenerator:
         # audio_arr = speech_output[0].cpu().numpy()
         # return audio_arr, self.bark_sampling_rate
         # Tokenize input text and obtain input IDs and attention mask
-        inputs = self.bark_processor(text, voice_preset=self.voice_preset, return_tensors="pt", padding=True).to(self.device)
-        input_ids = inputs.input_ids.to(self.device)
-        attention_mask = inputs.attention_mask.to(self.device)
-        # Generate speech output with both input IDs and attention mask
-        speech_output = self.bark_model.generate(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            temperature=0.9,
-            semantic_temperature=0.8
-        )
-        # Convert the generated audio to numpy array
         audio_arr = speech_output[0].cpu().numpy()
         return audio_arr, self.bark_sampling_rate

         """
         # Load Bark model and processor for Speaker 2
+        self.bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
+        self.bark_model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(self.device)
         self.bark_sampling_rate = 24000
         self.voice_preset = "v2/en_speaker_6"
         # audio_arr = speech_output[0].cpu().numpy()
         # return audio_arr, self.bark_sampling_rate
         # Tokenize input text and obtain input IDs and attention mask
+        inputs = bark_processor(text, voice_preset="v2/en_speaker_6").to(device)
+        speech_output = bark_model.generate(**inputs, temperature=0.9, semantic_temperature=0.8)
         audio_arr = speech_output[0].cpu().numpy()
         return audio_arr, self.bark_sampling_rate