Spaces:
Running
Running
Update generate_audio.py
Browse files- generate_audio.py +4 -14
generate_audio.py
CHANGED
@@ -43,8 +43,8 @@ class TTSGenerator:
|
|
43 |
"""
|
44 |
|
45 |
# Load Bark model and processor for Speaker 2
|
46 |
-
self.bark_processor = AutoProcessor.from_pretrained("suno/bark")
|
47 |
-
self.bark_model = BarkModel.from_pretrained("suno/bark", torch_dtype=torch.float16).to(self.device)
|
48 |
self.bark_sampling_rate = 24000
|
49 |
self.voice_preset = "v2/en_speaker_6"
|
50 |
|
@@ -116,18 +116,8 @@ class TTSGenerator:
|
|
116 |
# audio_arr = speech_output[0].cpu().numpy()
|
117 |
# return audio_arr, self.bark_sampling_rate
|
118 |
# Tokenize input text and obtain input IDs and attention mask
|
119 |
-
inputs =
|
120 |
-
|
121 |
-
attention_mask = inputs.attention_mask.to(self.device)
|
122 |
-
|
123 |
-
# Generate speech output with both input IDs and attention mask
|
124 |
-
speech_output = self.bark_model.generate(
|
125 |
-
input_ids=input_ids,
|
126 |
-
attention_mask=attention_mask,
|
127 |
-
temperature=0.9,
|
128 |
-
semantic_temperature=0.8
|
129 |
-
)
|
130 |
-
# Convert the generated audio to numpy array
|
131 |
audio_arr = speech_output[0].cpu().numpy()
|
132 |
return audio_arr, self.bark_sampling_rate
|
133 |
|
|
|
43 |
"""
|
44 |
|
45 |
# Load Bark model and processor for Speaker 2
|
46 |
+
self.bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
|
47 |
+
self.bark_model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(self.device)
|
48 |
self.bark_sampling_rate = 24000
|
49 |
self.voice_preset = "v2/en_speaker_6"
|
50 |
|
|
|
116 |
# audio_arr = speech_output[0].cpu().numpy()
|
117 |
# return audio_arr, self.bark_sampling_rate
|
118 |
# Tokenize input text and obtain input IDs and attention mask
|
119 |
+
inputs = bark_processor(text, voice_preset="v2/en_speaker_6").to(device)
|
120 |
+
speech_output = bark_model.generate(**inputs, temperature=0.9, semantic_temperature=0.8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
audio_arr = speech_output[0].cpu().numpy()
|
122 |
return audio_arr, self.bark_sampling_rate
|
123 |
|