File size: 1,640 Bytes
fe62fb4
 
d912185
 
 
 
 
 
 
 
 
 
 
 
e70ad00
d912185
 
 
 
 
 
e70ad00
d912185
e70ad00
 
 
d912185
e70ad00
 
 
 
 
 
 
 
 
 
 
 
d912185
 
 
 
 
 
e70ad00
 
 
d912185
 
 
 
 
 
 
 
c5e1f80
 
 
731cb10
c5e1f80
e70ad00
c5e1f80
 
e366cd5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import audiofile
import numpy as np
import torch
from audiocraft.loaders import load_compression_model, load_lm_model
from audiocraft.conditioners import ConditioningAttributes




class AudioGen():
    
    def __init__(self,
                 compression_model=None,
                 lm=None,
                 duration=.74):

        self.compression_model = compression_model
        self.lm = lm
        self.duration = duration

    @property
    def frame_rate(self):
        return self.compression_model.frame_rate
    
    def generate(self,
                 descriptions):
        with torch.no_grad():
            attributes = [
                ConditioningAttributes(text={'description': d}) for d in descriptions]
            gen_tokens = self.lm.generate(
                conditions=attributes,
                max_gen_len=int(self.duration * self.frame_rate)) #[n_draw, 4, 37]
            x = self.compression_model.decode(gen_tokens, None)   #[n_draw, 1, 11840]
            n_draw, _, n_time_samples = x.shape
            x = x.reshape(1, n_draw * n_time_samples)  # linearise n_draw
        return x




device = 'cuda:0'
  # https://huggingface.co/facebook/audiogen-medium


sound_generator = AudioGen(
    compression_model=load_compression_model('facebook/audiogen-medium', device=device).eval(),
    lm=load_lm_model('facebook/audiogen-medium', device=device).to(torch.float).eval(), 
    duration=.74)









print('\n\n\n\n___________________')

txt = 'dogs barging in the street'

x = sound_generator.generate([txt])[0].detach().cpu().numpy()
x /= np.abs(x).max() + 1e-7

audiofile.write('del_seane.wav', x, 16000)