amaye15 commited on
Commit
5b9ffda
·
1 Parent(s): 41dba9a

Upload AutoEncoder

Browse files
Files changed (3) hide show
  1. config.json +4 -1
  2. model.safetensors +1 -1
  3. modeling_autoencoder.py +205 -59
config.json CHANGED
@@ -9,11 +9,14 @@
9
  "bidirectional": false,
10
  "compression_rate": 0.5,
11
  "dropout_rate": 0.1,
 
12
  "input_dim": 128,
13
  "latent_dim": 64,
14
  "layer_types": "linear",
 
15
  "model_type": "autoencoder",
16
  "num_layers": 3,
17
  "torch_dtype": "float32",
18
- "transformers_version": "4.35.2"
 
19
  }
 
9
  "bidirectional": false,
10
  "compression_rate": 0.5,
11
  "dropout_rate": 0.1,
12
+ "embed": false,
13
  "input_dim": 128,
14
  "latent_dim": 64,
15
  "layer_types": "linear",
16
+ "max_position": false,
17
  "model_type": "autoencoder",
18
  "num_layers": 3,
19
  "torch_dtype": "float32",
20
+ "transformers_version": "4.35.2",
21
+ "vocab_size": false
22
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f689c83a1c9d45dd2745cefa4b1180db30864ad397bf4df16142b117a8531d6
3
  size 133840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:214c2fd1c62f2246bbf01150156b1eae89b22a382f5e0f8d026e86f759f2367f
3
  size 133840
modeling_autoencoder.py CHANGED
@@ -1,7 +1,10 @@
1
 
2
  from typing import Optional, Sequence
 
 
3
  from torch import nn, Tensor
4
  from transformers import PretrainedConfig, PreTrainedModel, AutoConfig, AutoModel
 
5
 
6
  # from huggingface_hub import notebook_login
7
 
@@ -21,18 +24,62 @@ from transformers import PretrainedConfig, PreTrainedModel, AutoConfig, AutoMode
21
  # autoencoder = AutoModel.from_config(config, trust_remote_code = True)
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  class AutoEncoderConfig(PretrainedConfig):
25
  """
26
  Configuration class for AutoEncoder. This class stores the parameters for the autoencoder model.
27
 
28
  Attributes:
29
- input_dim (int): The dimensionality of the input data (default: 128).
30
- latent_dim (int): The dimensionality of the latent representation (default: 64).
31
- layer_types (str): The type of layers used, e.g., 'linear', 'lstm', 'gru', 'rnn' (default: 'linear').
32
- dropout_rate (float): The dropout rate applied after each layer (except for the last layer) (default: 0.1).
33
- num_layers (int): The number of layers in the encoder/decoder (default: 3).
34
- compression_rate (float): Factor by which to compress the dimensions through layers (default: 0.5).
35
- bidirectional (bool): Whether the sequence layers should be bidirectional (default: False).
 
 
 
 
 
 
36
  """
37
  model_type = "autoencoder"
38
 
@@ -45,6 +92,9 @@ class AutoEncoderConfig(PretrainedConfig):
45
  num_layers: int = 3,
46
  compression_rate: float = 0.5,
47
  bidirectional: bool = False,
 
 
 
48
  **kwargs
49
  ):
50
  super().__init__(**kwargs)
@@ -55,6 +105,19 @@ class AutoEncoderConfig(PretrainedConfig):
55
  self.num_layers = num_layers
56
  self.compression_rate = compression_rate
57
  self.bidirectional = bidirectional
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  def create_layers(
60
  model_section: str,
@@ -64,7 +127,8 @@ def create_layers(
64
  num_layers: int,
65
  dropout_rate: float,
66
  compression_rate: float,
67
- bidirectional: bool
 
68
  ) -> nn.Sequential:
69
  """
70
  Creates a sequence of layers for the encoder or decoder part of the autoencoder.
@@ -78,45 +142,69 @@ def create_layers(
78
  dropout_rate (float): The dropout rate to apply between layers.
79
  compression_rate (float): The compression rate for reducing dimensions through layers.
80
  bidirectional (bool): Whether the RNN layers should be bidirectional.
81
-
 
 
82
  Returns:
83
- A nn.Sequential module containing the created layers.
 
 
 
84
  """
85
- layers = []
86
- current_dim = input_dim
 
87
 
 
88
  input_dimensions = []
89
  output_dimensions = []
90
 
 
91
  for _ in range(num_layers):
92
- input_dimensions.append(current_dim)
93
- next_dim = max(int(current_dim * compression_rate), latent_dim)
94
- current_dim = next_dim
95
- output_dimensions.append(current_dim)
96
 
 
97
  output_dimensions[num_layers - 1] = latent_dim
98
 
 
99
  if model_section == "decoder":
 
100
  input_dimensions, output_dimensions = output_dimensions, input_dimensions
101
- input_dimensions.reverse()
102
  output_dimensions.reverse()
103
 
 
 
 
 
 
104
  if bidirectional and (layer_types in ['lstm', 'rnn', 'gru']):
105
  output_dimensions = [2 * value for value in output_dimensions]
106
 
 
107
  for idx, (input_dim, output_dim) in enumerate(zip(input_dimensions, output_dimensions)):
 
108
  if layer_types == 'linear':
109
  layers.append(nn.Linear(input_dim, output_dim))
110
- elif layer_types == 'lstm':
111
- layers.append(nn.LSTM(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional))
112
- elif layer_types == 'rnn':
113
- layers.append(nn.RNN(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional))
114
- elif layer_types == 'gru':
115
- layers.append(nn.GRU(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional))
116
  if (idx != num_layers - 1) and (dropout_rate is not None):
117
  layers.append(nn.Dropout(dropout_rate))
 
 
118
  return nn.Sequential(*layers)
119
 
 
 
 
 
120
  class AutoEncoder(PreTrainedModel):
121
  """
122
  AutoEncoder model for creating an encoder-decoder architecture.
@@ -130,52 +218,110 @@ class AutoEncoder(PreTrainedModel):
130
 
131
  def __init__(self, config: AutoEncoderConfig):
132
  super(AutoEncoder, self).__init__(config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- self.encoder = create_layers(
135
- "encoder",
136
- config.layer_types, config.input_dim, config.latent_dim,
137
- config.num_layers, config.dropout_rate, config.compression_rate,
138
- config.bidirectional
139
- )
140
- # Assuming symmetry between encoder and decoder
141
- self.decoder = create_layers(
142
- "decoder",
143
- config.layer_types, config.input_dim, config.latent_dim,
144
- config.num_layers, config.dropout_rate, config.compression_rate,
145
- config.bidirectional
146
- )
147
-
148
- def forward(self, x: Tensor) -> Tensor:
149
- """
150
- Forward pass through the autoencoder.
151
-
152
- Args:
153
- x (Tensor): The input tensor to encode and decode.
154
-
155
- Returns:
156
- A Tensor that is the output of the decoder.
157
- """
158
- # Assuming self.config.layer_types contains only a single layer type as a string.
159
- # If using sequence models, handle each layer's outputs
160
  if self.config.layer_types in ['lstm', 'rnn', 'gru']:
 
161
  for layer in self.encoder:
162
  if isinstance(layer, nn.LSTM):
163
- x, (h_n, c_n) = layer(x)
164
  elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU):
165
- x, h_o = layer(x)
166
  else:
167
- x = layer(x)
168
-
 
 
169
  for layer in self.decoder:
170
  if isinstance(layer, nn.LSTM):
171
- x, (h_n, c_n) = layer(x)
172
  elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU):
173
- x, h_o = layer(x)
174
  else:
175
- x = layer(x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  else:
177
- x = self.encoder(x)
178
- x = self.decoder(x)
179
 
180
- return x
181
 
 
1
 
2
  from typing import Optional, Sequence
3
+ import torch
4
+ from dataclasses import dataclass
5
  from torch import nn, Tensor
6
  from transformers import PretrainedConfig, PreTrainedModel, AutoConfig, AutoModel
7
+ from transformers.utils import ModelOutput
8
 
9
  # from huggingface_hub import notebook_login
10
 
 
24
  # autoencoder = AutoModel.from_config(config, trust_remote_code = True)
25
 
26
 
27
+ # Stucture
28
+ # Example
29
+ # Model Outputs
30
+ # Model Configuration
31
+ # Model Layers
32
+ # Model
33
+
34
+
35
+ ##########################################################################################
36
+ #################################### Outputs #############################################
37
+ ##########################################################################################
38
+
39
+ @dataclass
40
+ class AutoencoderModelOutput(ModelOutput):
41
+ """
42
+ Represents the output of an autoencoder model. This class holds various
43
+ important tensors that are the result of passing data through an autoencoder.
44
+
45
+ Attributes:
46
+ logits (torch.FloatTensor, optional): The reconstructed output from the autoencoder.
47
+ This is typically the direct output of the decoder part of the model.
48
+ labels (torch.FloatTensor, optional): The true labels associated with the input data,
49
+ if available. Useful for supervised training scenarios or evaluation.
50
+ hidden_state (torch.FloatTensor, optional): The encoded representation of the input data.
51
+ This is the output of the encoder part of the model and serves as a compressed
52
+ representation of the input data.
53
+ loss (torch.FloatTensor, optional): The computed loss value when comparing the reconstructed
54
+ output to the original input data. This is essential for training and evaluating the model's performance.
55
+ """
56
+ logits: torch.FloatTensor = None
57
+ labels: torch.FloatTensor = None
58
+ hidden_state: torch.FloatTensor = None
59
+ loss: torch.FloatTensor = None
60
+
61
+ ##########################################################################################
62
+ ################################# Configuration ##########################################
63
+ ##########################################################################################
64
+
65
  class AutoEncoderConfig(PretrainedConfig):
66
  """
67
  Configuration class for AutoEncoder. This class stores the parameters for the autoencoder model.
68
 
69
  Attributes:
70
+ input_dim (int): The dimensionality of the input data. Default is 128.
71
+ latent_dim (int): The dimensionality of the latent representation. Default is 64.
72
+ layer_types (str): The type of layers used, e.g., 'linear', 'lstm', 'gru', 'rnn'. Default is 'linear'.
73
+ dropout_rate (float): The dropout rate applied after each layer (except for the last layer). Default is 0.1.
74
+ num_layers (int): The number of layers in the encoder/decoder. Default is 3.
75
+ compression_rate (float): Factor by which to compress the dimensions through layers. Default is 0.5.
76
+ bidirectional (bool): Whether the sequence layers should be bidirectional. Default is False.
77
+ embed (bool): Whether to use embedding for input data. If True, `vocab_size` and `max_position` must be specified. Default is False.
78
+ vocab_size (int): The size of the vocabulary. Required if `embed` is True.
79
+ max_position (int): The maximum position for positional encoding. Required if `embed` is True.
80
+
81
+ Raises:
82
+ ValueError: If `embed` is True and either `vocab_size` or `max_position` is not defined as an integer.
83
  """
84
  model_type = "autoencoder"
85
 
 
92
  num_layers: int = 3,
93
  compression_rate: float = 0.5,
94
  bidirectional: bool = False,
95
+ embed: bool = False,
96
+ vocab_size: int|bool = False,
97
+ max_position: int|bool = False,
98
  **kwargs
99
  ):
100
  super().__init__(**kwargs)
 
105
  self.num_layers = num_layers
106
  self.compression_rate = compression_rate
107
  self.bidirectional = bidirectional
108
+ self.embed = embed
109
+ self.vocab_size = vocab_size
110
+ self.max_position = max_position
111
+
112
+ if self.embed:
113
+ if not self.vocab_size and isinstance(self.vocab_size, int):
114
+ raise ValueError("vocab_size needs to be defined when embed is True - AutoEncoderConfig(embed = True, vocab_size = 10_000, max_postion = 512")
115
+ if not self.max_position and isinstance(self.max_position, int):
116
+ raise ValueError("max_position needs to be defined when embed is True - AutoEncoderConfig(embed = True, vocab_size = 10_000, max_postion = 512)")
117
+
118
+ ##########################################################################################
119
+ ############################# Block/Encoder/Decoder ######################################
120
+ ##########################################################################################
121
 
122
  def create_layers(
123
  model_section: str,
 
127
  num_layers: int,
128
  dropout_rate: float,
129
  compression_rate: float,
130
+ bidirectional: bool,
131
+ classes: bool|int = False
132
  ) -> nn.Sequential:
133
  """
134
  Creates a sequence of layers for the encoder or decoder part of the autoencoder.
 
142
  dropout_rate (float): The dropout rate to apply between layers.
143
  compression_rate (float): The compression rate for reducing dimensions through layers.
144
  bidirectional (bool): Whether the RNN layers should be bidirectional.
145
+ classes (bool|int): If an integer is provided, it defines the output dimension of the last layer in the decoder.
146
+ It's ignored for the encoder or if the value is False.
147
+
148
  Returns:
149
+ A nn.Sequential module containing the created layers. The configuration of these layers is determined by the arguments provided.
150
+
151
+ Raises:
152
+ ValueError: If certain layer type conditions are not met or if required parameters for specific configurations are missing.
153
  """
154
+
155
+ layers = [] # Initialize an empty list to store the layers.
156
+ current_dim = input_dim # Start with the initial input dimension.
157
 
158
+ # Lists to store input and output dimensions for each layer.
159
  input_dimensions = []
160
  output_dimensions = []
161
 
162
+ # Calculate input and output dimensions for each layer.
163
  for _ in range(num_layers):
164
+ input_dimensions.append(current_dim) # Store current dimension.
165
+ next_dim = max(int(current_dim * compression_rate), latent_dim) # Calculate next dimension with compression.
166
+ current_dim = next_dim # Update current dimension.
167
+ output_dimensions.append(current_dim) # Store output dimension.
168
 
169
+ # Ensure the last layer's output dimension is the latent dimension.
170
  output_dimensions[num_layers - 1] = latent_dim
171
 
172
+ # Adjust dimensions for decoder configuration.
173
  if model_section == "decoder":
174
+ # Swap input and output dimensions for decoder.
175
  input_dimensions, output_dimensions = output_dimensions, input_dimensions
176
+ input_dimensions.reverse() # Reverse the order for decoder stack.
177
  output_dimensions.reverse()
178
 
179
+ # Set the final layer's dimension to classes if specified and valid.
180
+ if isinstance(classes, int) and not isinstance(classes, bool):
181
+ output_dimensions[-1] = classes
182
+
183
+ # Adjust dimensions for bidirectional RNN layers.
184
  if bidirectional and (layer_types in ['lstm', 'rnn', 'gru']):
185
  output_dimensions = [2 * value for value in output_dimensions]
186
 
187
+ # Construct layers based on the specified layer type.
188
  for idx, (input_dim, output_dim) in enumerate(zip(input_dimensions, output_dimensions)):
189
+ # Add layers according to the specified type.
190
  if layer_types == 'linear':
191
  layers.append(nn.Linear(input_dim, output_dim))
192
+ elif layer_types in ['lstm', 'rnn', 'gru']:
193
+ rnn_layer = getattr(nn, layer_types.upper()) # Dynamically get the RNN layer class.
194
+ half_output_dim = output_dim // (2 if bidirectional else 1)
195
+ layers.append(rnn_layer(input_dim, half_output_dim, batch_first=True, bidirectional=bidirectional))
196
+
197
+ # Add dropout layer between layers, except for the last layer.
198
  if (idx != num_layers - 1) and (dropout_rate is not None):
199
  layers.append(nn.Dropout(dropout_rate))
200
+
201
+ # Return the sequence of layers as an nn.Sequential module.
202
  return nn.Sequential(*layers)
203
 
204
+ ##########################################################################################
205
+ ##################################### Model ##############################################
206
+ ##########################################################################################
207
+
208
  class AutoEncoder(PreTrainedModel):
209
  """
210
  AutoEncoder model for creating an encoder-decoder architecture.
 
218
 
219
  def __init__(self, config: AutoEncoderConfig):
220
  super(AutoEncoder, self).__init__(config)
221
+
222
+ # Embeddings
223
+ if config.embed:
224
+ # Word Embeddings
225
+ self.word_embeddings = nn.Embedding(config.vocab_size,
226
+ config.input_dim,)
227
+ # Postional Embeddings
228
+ self.position_embeddings = nn.Embedding(config.max_position,
229
+ config.input_dim,)
230
+ # Encoder
231
+ self.encoder = create_layers("encoder",
232
+ config.layer_types,
233
+ config.input_dim,
234
+ config.latent_dim,
235
+ config.num_layers,
236
+ config.dropout_rate,
237
+ config.compression_rate,
238
+ config.bidirectional,)
239
+ # Decoder
240
+ if config.embed:
241
+ # Assuming symmetry between encoder and decoder
242
+ self.decoder = create_layers("decoder",
243
+ config.layer_types,
244
+ config.input_dim,
245
+ config.latent_dim,
246
+ config.num_layers,
247
+ config.dropout_rate,
248
+ config.compression_rate,
249
+ config.bidirectional,
250
+ config.vocab_size,)
251
+ else:
252
+ # Assuming symmetry between encoder and decoder
253
+ self.decoder = create_layers("decoder",
254
+ config.layer_types,
255
+ config.input_dim,
256
+ config.latent_dim,
257
+ config.num_layers,
258
+ config.dropout_rate,
259
+ config.compression_rate,
260
+ config.bidirectional,)
261
+
262
+
263
+ def forward(self, input_ids: Tensor, position_ids: Optional[Tensor] = None, labels: Optional[Tensor] = None) -> Tensor:
264
+
265
+ # Define Data Class
266
+ outputs = AutoencoderModelOutput()
267
+
268
+ outputs.labels = labels if labels != None else input_ids
269
 
270
+ # Embeddings
271
+ if self.config.embed:
272
+ # Word Embeddings
273
+ input_embeddings = self.word_embeddings(input_ids)
274
+
275
+ # Positional Embeddings
276
+ seq_length = input_ids.size(1)
277
+ position_ids = position_ids or torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
278
+ position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
279
+ position_embeddings = self.position_embeddings(position_ids)
280
+
281
+ # Combine Embeddings
282
+ input_ids = input_embeddings + position_embeddings
283
+
284
+ # Non-Linear Encoding & Decoding
 
 
 
 
 
 
 
 
 
 
 
285
  if self.config.layer_types in ['lstm', 'rnn', 'gru']:
286
+ # Encoding
287
  for layer in self.encoder:
288
  if isinstance(layer, nn.LSTM):
289
+ input_ids, (h_n, c_n) = layer(input_ids)
290
  elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU):
291
+ input_ids, h_o = layer(input_ids)
292
  else:
293
+ input_ids = layer(input_ids)
294
+ # Hidden Vector
295
+ outputs.hidden_state = input_ids
296
+ # Decoding
297
  for layer in self.decoder:
298
  if isinstance(layer, nn.LSTM):
299
+ input_ids, (h_n, c_n) = layer(input_ids)
300
  elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU):
301
+ input_ids, h_o = layer(input_ids)
302
  else:
303
+ input_ids = layer(input_ids)
304
+
305
+ # Linear Encoding & Decoding
306
+ else:
307
+ # Encoding
308
+ input_ids = self.encoder(input_ids)
309
+ # Hidden Vector
310
+ outputs.hidden_state = input_ids
311
+ # Decoding
312
+ input_ids = self.decoder(input_ids)
313
+
314
+ outputs.logits = input_ids
315
+
316
+ # Choose loss function based on dtype
317
+ if torch.is_floating_point(outputs.labels):
318
+ loss_fn = nn.MSELoss()
319
+ outputs.loss = loss_fn(outputs.logits.view(-1), outputs.labels.view(-1))
320
+ elif not torch.is_floating_point(outputs.labels) and not torch.is_complex(outputs.labels):
321
+ loss_fn = nn.CrossEntropyLoss()
322
+ outputs.loss = loss_fn(outputs.logits.view(-1, self.config.vocab_size), outputs.labels.view(-1))
323
  else:
324
+ raise ValueError("Unsupported tensor dtype for these loss functions")
 
325
 
326
+ return outputs
327