File size: 1,074 Bytes
9659078
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr

import torch
from transformers import BarkModel
from optimum.bettertransformer import BetterTransformer

bark_model = BarkModel.from_pretrained("suno/bark", torch_dtype=torch.float16)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
bark_model = bark_model.to(device)

from transformers import AutoProcessor
processor = AutoProcessor.from_pretrained("suno/bark")

# Use bettertransform for flash attention
bark_model = BetterTransformer.transform(bark_model, keep_original_model=False)

# Enable CPU offload
bark_model.enable_cpu_offload()

from TTS.tts.configs.bark_config import BarkConfig
from TTS.tts.models.bark import Bark

config = BarkConfig()
model = Bark.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir=bark_model, eval=True)

def infer(prompt):

    text = "Hello, my name is Manmay , how are you?"

    # with random speaker
    output_dict = model.synthesize(text, config, speaker_id="random", voice_dirs=None)

    return "done"

gr.Interface(fn=infer, inputs=[gr.Textbox()], outputs=[gr.Textbox()]).launch()