poonehmousavi commited on
Commit
d2efa38
·
verified ·
1 Parent(s): 04455e3

Delete custom_model.py

Browse files
Files changed (1) hide show
  1. custom_model.py +0 -100
custom_model.py DELETED
@@ -1,100 +0,0 @@
1
- import torch
2
-
3
-
4
- class AttentionMLP(torch.nn.Module):
5
- def __init__(self, input_dim, hidden_dim):
6
- super(AttentionMLP, self).__init__()
7
- self.layers = torch.nn.Sequential(
8
- torch.nn.Linear(input_dim, hidden_dim),
9
- torch.nn.ReLU(),
10
- torch.nn.Linear(hidden_dim, 1, bias=False),
11
- )
12
-
13
- def forward(self, x):
14
- x = self.layers(x)
15
- att_w = torch.nn.functional.softmax(x, dim=2)
16
- return att_w
17
-
18
-
19
- class Discrete_EmbeddingLayer(torch.nn.Module):
20
- """This class handles embedding layers for discrete tokens.
21
-
22
- Arguments
23
- ---------
24
- num_codebooks: int ,
25
- number of codebooks of the tokenizer.
26
- vocab_size : int,
27
- size of the dictionary of embeddings
28
- emb_dim: int ,
29
- the size of each embedding vector
30
- pad_index: int (default: 0),
31
- If specified, the entries at padding_idx do not contribute to the gradient.
32
- init: boolean (default: False):
33
- If set to True, init the embedding with the tokenizer embedding otherwise init randomly.
34
- freeze: boolean (default: False)
35
- If True, the embedding is frozen. If False, the model will be trained
36
- alongside with the rest of the pipeline.
37
-
38
- Example
39
- -------
40
- >>> from speechbrain.lobes.models.huggingface_transformers.encodec import Encodec
41
- >>> model_hub = "facebook/encodec_24khz"
42
- >>> save_path = "savedir"
43
- >>> model = Encodec(model_hub, save_path)
44
- >>> audio = torch.randn(4, 1000)
45
- >>> length = torch.tensor([1.0, .5, .75, 1.0])
46
- >>> tokens, emb = model.encode(audio, length)
47
- >>> print(tokens.shape)
48
- torch.Size([4, 4, 2])
49
- >>> emb= Discrete_EmbeddingLayer(2, 1024, 1024)
50
- >>> in_emb = emb(tokens)
51
- >>> print(in_emb.shape)
52
- torch.Size([4, 4, 2, 1024])
53
- """
54
-
55
- def __init__(
56
- self,
57
- num_codebooks,
58
- vocab_size,
59
- emb_dim,
60
- pad_index=0,
61
- init=False,
62
- freeze=False,
63
- ):
64
- super(Discrete_EmbeddingLayer, self).__init__()
65
- self.vocab_size = vocab_size
66
- self.num_codebooks = num_codebooks
67
- self.freeze = freeze
68
- self.embedding = torch.nn.Embedding(
69
- num_codebooks * vocab_size, emb_dim
70
- ).requires_grad_(not self.freeze)
71
- self.init = init
72
-
73
- def init_embedding(self, weights):
74
- with torch.no_grad():
75
- self.embedding.weight = torch.nn.Parameter(weights)
76
-
77
- def forward(self, in_tokens):
78
- """Computes the embedding for discrete tokens.
79
- a sample.
80
-
81
- Arguments
82
- ---------
83
- in_tokens : torch.Tensor
84
- A (Batch x Time x num_codebooks)
85
- audio sample
86
- Returns
87
- -------
88
- in_embs : torch.Tensor
89
- """
90
- with torch.set_grad_enabled(not self.freeze):
91
- # Add unique token IDs across diffrent codebooks by adding num_codebooks * vocab_size
92
- in_tokens += torch.arange(
93
- 0,
94
- self.num_codebooks * self.vocab_size,
95
- self.vocab_size,
96
- device=in_tokens.device,
97
- )
98
- # Forward Pass to embedding and
99
- in_embs = self.embedding(in_tokens)
100
- return in_embs