Upload AutoEncoder

Browse files

Files changed (3) hide show

config.json +4 -1
model.safetensors +1 -1
modeling_autoencoder.py +205 -59

config.json CHANGED Viewed

@@ -9,11 +9,14 @@
   "bidirectional": false,
   "compression_rate": 0.5,
   "dropout_rate": 0.1,
   "input_dim": 128,
   "latent_dim": 64,
   "layer_types": "linear",
   "model_type": "autoencoder",
   "num_layers": 3,
   "torch_dtype": "float32",
-  "transformers_version": "4.35.2"
 }

   "bidirectional": false,
   "compression_rate": 0.5,
   "dropout_rate": 0.1,
+  "embed": false,
   "input_dim": 128,
   "latent_dim": 64,
   "layer_types": "linear",
+  "max_position": false,
   "model_type": "autoencoder",
   "num_layers": 3,
   "torch_dtype": "float32",
+  "transformers_version": "4.35.2",
+  "vocab_size": false
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f689c83a1c9d45dd2745cefa4b1180db30864ad397bf4df16142b117a8531d6
 size 133840

 version https://git-lfs.github.com/spec/v1
+oid sha256:214c2fd1c62f2246bbf01150156b1eae89b22a382f5e0f8d026e86f759f2367f
 size 133840

modeling_autoencoder.py CHANGED Viewed

@@ -1,7 +1,10 @@
 from typing import Optional, Sequence
 from torch import nn, Tensor
 from transformers import PretrainedConfig, PreTrainedModel, AutoConfig, AutoModel
 # from huggingface_hub import notebook_login
@@ -21,18 +24,62 @@ from transformers import PretrainedConfig, PreTrainedModel, AutoConfig, AutoMode
 # autoencoder = AutoModel.from_config(config, trust_remote_code = True)
 class AutoEncoderConfig(PretrainedConfig):
     """
     Configuration class for AutoEncoder. This class stores the parameters for the autoencoder model.
     Attributes:
-        input_dim (int): The dimensionality of the input data (default: 128).
-        latent_dim (int): The dimensionality of the latent representation (default: 64).
-        layer_types (str): The type of layers used, e.g., 'linear', 'lstm', 'gru', 'rnn' (default: 'linear').
-        dropout_rate (float): The dropout rate applied after each layer (except for the last layer) (default: 0.1).
-        num_layers (int): The number of layers in the encoder/decoder (default: 3).
-        compression_rate (float): Factor by which to compress the dimensions through layers (default: 0.5).
-        bidirectional (bool): Whether the sequence layers should be bidirectional (default: False).
     """
     model_type = "autoencoder"
@@ -45,6 +92,9 @@ class AutoEncoderConfig(PretrainedConfig):
         num_layers: int = 3,
         compression_rate: float = 0.5,
         bidirectional: bool = False,
         **kwargs
     ):
         super().__init__(**kwargs)
@@ -55,6 +105,19 @@ class AutoEncoderConfig(PretrainedConfig):
         self.num_layers = num_layers
         self.compression_rate = compression_rate
         self.bidirectional = bidirectional
 def create_layers(
     model_section: str,
@@ -64,7 +127,8 @@ def create_layers(
     num_layers: int,
     dropout_rate: float,
     compression_rate: float,
-    bidirectional: bool
 ) -> nn.Sequential:
     """
     Creates a sequence of layers for the encoder or decoder part of the autoencoder.
@@ -78,45 +142,69 @@ def create_layers(
         dropout_rate (float): The dropout rate to apply between layers.
         compression_rate (float): The compression rate for reducing dimensions through layers.
         bidirectional (bool): Whether the RNN layers should be bidirectional.
     Returns:
-        A nn.Sequential module containing the created layers.
     """
-    layers = []
-    current_dim = input_dim
     input_dimensions = []
     output_dimensions = []
     for _ in range(num_layers):
-        input_dimensions.append(current_dim)
-        next_dim = max(int(current_dim * compression_rate), latent_dim)
-        current_dim = next_dim
-        output_dimensions.append(current_dim)
     output_dimensions[num_layers - 1] = latent_dim
     if model_section == "decoder":
         input_dimensions, output_dimensions = output_dimensions, input_dimensions
-        input_dimensions.reverse()
         output_dimensions.reverse()
         if bidirectional and (layer_types in ['lstm', 'rnn', 'gru']):
             output_dimensions = [2 * value for value in output_dimensions]
     for idx, (input_dim, output_dim) in enumerate(zip(input_dimensions, output_dimensions)):
         if layer_types == 'linear':
             layers.append(nn.Linear(input_dim, output_dim))
-        elif layer_types == 'lstm':
-            layers.append(nn.LSTM(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional))
-        elif layer_types == 'rnn':
-            layers.append(nn.RNN(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional))
-        elif layer_types == 'gru':
-            layers.append(nn.GRU(input_dim, output_dim // (2 if bidirectional else 1), batch_first=True, bidirectional=bidirectional))
         if (idx != num_layers - 1) and (dropout_rate is not None):
             layers.append(nn.Dropout(dropout_rate))
     return nn.Sequential(*layers)
 class AutoEncoder(PreTrainedModel):
     """
     AutoEncoder model for creating an encoder-decoder architecture.
@@ -130,52 +218,110 @@ class AutoEncoder(PreTrainedModel):
     def __init__(self, config: AutoEncoderConfig):
         super(AutoEncoder, self).__init__(config)
-        self.encoder = create_layers(
-            "encoder",
-            config.layer_types, config.input_dim, config.latent_dim,
-            config.num_layers, config.dropout_rate, config.compression_rate,
-            config.bidirectional
-        )
-        # Assuming symmetry between encoder and decoder
-        self.decoder = create_layers(
-            "decoder",
-            config.layer_types, config.input_dim, config.latent_dim,
-            config.num_layers, config.dropout_rate, config.compression_rate,
-            config.bidirectional
-        )
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Forward pass through the autoencoder.
-        Args:
-            x (Tensor): The input tensor to encode and decode.
-        Returns:
-            A Tensor that is the output of the decoder.
-        """
-        # Assuming self.config.layer_types contains only a single layer type as a string.
-        # If using sequence models, handle each layer's outputs
         if self.config.layer_types in ['lstm', 'rnn', 'gru']:
             for layer in self.encoder:
                 if isinstance(layer, nn.LSTM):
-                    x, (h_n, c_n) = layer(x)
                 elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU):
-                    x, h_o = layer(x)
                 else:
-                    x = layer(x)
             for layer in self.decoder:
                 if isinstance(layer, nn.LSTM):
-                    x, (h_n, c_n) = layer(x)
                 elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU):
-                    x, h_o = layer(x)
                 else:
-                    x = layer(x)
         else:
-            x = self.encoder(x)
-            x = self.decoder(x)
-        return x

 from typing import Optional, Sequence
+import torch
+from dataclasses import dataclass
 from torch import nn, Tensor
 from transformers import PretrainedConfig, PreTrainedModel, AutoConfig, AutoModel
+from transformers.utils import ModelOutput
 # from huggingface_hub import notebook_login
 # autoencoder = AutoModel.from_config(config, trust_remote_code = True)
+# Stucture
+    # Example
+    # Model Outputs
+    # Model Configuration
+    # Model Layers
+    # Model
+##########################################################################################
+#################################### Outputs #############################################
+##########################################################################################
+@dataclass
+class AutoencoderModelOutput(ModelOutput):
+    """
+    Represents the output of an autoencoder model. This class holds various
+    important tensors that are the result of passing data through an autoencoder.
+    Attributes:
+        logits (torch.FloatTensor, optional): The reconstructed output from the autoencoder.
+            This is typically the direct output of the decoder part of the model.
+        labels (torch.FloatTensor, optional): The true labels associated with the input data,
+            if available. Useful for supervised training scenarios or evaluation.
+        hidden_state (torch.FloatTensor, optional): The encoded representation of the input data.
+            This is the output of the encoder part of the model and serves as a compressed
+            representation of the input data.
+        loss (torch.FloatTensor, optional): The computed loss value when comparing the reconstructed
+            output to the original input data. This is essential for training and evaluating the model's performance.
+    """
+    logits: torch.FloatTensor = None
+    labels: torch.FloatTensor = None
+    hidden_state: torch.FloatTensor = None
+    loss: torch.FloatTensor = None
+##########################################################################################
+################################# Configuration ##########################################
+##########################################################################################
 class AutoEncoderConfig(PretrainedConfig):
     """
     Configuration class for AutoEncoder. This class stores the parameters for the autoencoder model.
     Attributes:
+        input_dim (int): The dimensionality of the input data. Default is 128.
+        latent_dim (int): The dimensionality of the latent representation. Default is 64.
+        layer_types (str): The type of layers used, e.g., 'linear', 'lstm', 'gru', 'rnn'. Default is 'linear'.
+        dropout_rate (float): The dropout rate applied after each layer (except for the last layer). Default is 0.1.
+        num_layers (int): The number of layers in the encoder/decoder. Default is 3.
+        compression_rate (float): Factor by which to compress the dimensions through layers. Default is 0.5.
+        bidirectional (bool): Whether the sequence layers should be bidirectional. Default is False.
+        embed (bool): Whether to use embedding for input data. If True, `vocab_size` and `max_position` must be specified. Default is False.
+        vocab_size (int): The size of the vocabulary. Required if `embed` is True.
+        max_position (int): The maximum position for positional encoding. Required if `embed` is True.
+    Raises:
+        ValueError: If `embed` is True and either `vocab_size` or `max_position` is not defined as an integer.
     """
     model_type = "autoencoder"
         num_layers: int = 3,
         compression_rate: float = 0.5,
         bidirectional: bool = False,
+        embed: bool = False,
+        vocab_size: int|bool = False,
+        max_position: int|bool = False,
         **kwargs
     ):
         super().__init__(**kwargs)
         self.num_layers = num_layers
         self.compression_rate = compression_rate
         self.bidirectional = bidirectional
+        self.embed = embed
+        self.vocab_size = vocab_size
+        self.max_position = max_position
+        if self.embed:
+            if not self.vocab_size and isinstance(self.vocab_size, int):
+                raise ValueError("vocab_size needs to be defined when embed is True - AutoEncoderConfig(embed = True, vocab_size = 10_000, max_postion = 512")
+            if not self.max_position and isinstance(self.max_position, int):
+                raise ValueError("max_position needs to be defined when embed is True - AutoEncoderConfig(embed = True, vocab_size = 10_000, max_postion = 512)")
+##########################################################################################
+############################# Block/Encoder/Decoder ######################################
+##########################################################################################
 def create_layers(
     model_section: str,
     num_layers: int,
     dropout_rate: float,
     compression_rate: float,
+    bidirectional: bool,
+    classes: bool|int = False
 ) -> nn.Sequential:
     """
     Creates a sequence of layers for the encoder or decoder part of the autoencoder.
         dropout_rate (float): The dropout rate to apply between layers.
         compression_rate (float): The compression rate for reducing dimensions through layers.
         bidirectional (bool): Whether the RNN layers should be bidirectional.
+        classes (bool|int): If an integer is provided, it defines the output dimension of the last layer in the decoder.
+                            It's ignored for the encoder or if the value is False.
     Returns:
+        A nn.Sequential module containing the created layers. The configuration of these layers is determined by the arguments provided.
+    Raises:
+        ValueError: If certain layer type conditions are not met or if required parameters for specific configurations are missing.
     """
+    layers = []  # Initialize an empty list to store the layers.
+    current_dim = input_dim  # Start with the initial input dimension.
+    # Lists to store input and output dimensions for each layer.
     input_dimensions = []
     output_dimensions = []
+    # Calculate input and output dimensions for each layer.
     for _ in range(num_layers):
+        input_dimensions.append(current_dim)  # Store current dimension.
+        next_dim = max(int(current_dim * compression_rate), latent_dim)  # Calculate next dimension with compression.
+        current_dim = next_dim  # Update current dimension.
+        output_dimensions.append(current_dim)  # Store output dimension.
+    # Ensure the last layer's output dimension is the latent dimension.
     output_dimensions[num_layers - 1] = latent_dim
+    # Adjust dimensions for decoder configuration.
     if model_section == "decoder":
+        # Swap input and output dimensions for decoder.
         input_dimensions, output_dimensions = output_dimensions, input_dimensions
+        input_dimensions.reverse()  # Reverse the order for decoder stack.
         output_dimensions.reverse()
+        # Set the final layer's dimension to classes if specified and valid.
+        if isinstance(classes, int) and not isinstance(classes, bool):
+            output_dimensions[-1] = classes
+        # Adjust dimensions for bidirectional RNN layers.
         if bidirectional and (layer_types in ['lstm', 'rnn', 'gru']):
             output_dimensions = [2 * value for value in output_dimensions]
+    # Construct layers based on the specified layer type.
     for idx, (input_dim, output_dim) in enumerate(zip(input_dimensions, output_dimensions)):
+        # Add layers according to the specified type.
         if layer_types == 'linear':
             layers.append(nn.Linear(input_dim, output_dim))
+        elif layer_types in ['lstm', 'rnn', 'gru']:
+            rnn_layer = getattr(nn, layer_types.upper())  # Dynamically get the RNN layer class.
+            half_output_dim = output_dim // (2 if bidirectional else 1)
+            layers.append(rnn_layer(input_dim, half_output_dim, batch_first=True, bidirectional=bidirectional))
+        # Add dropout layer between layers, except for the last layer.
         if (idx != num_layers - 1) and (dropout_rate is not None):
             layers.append(nn.Dropout(dropout_rate))
+    # Return the sequence of layers as an nn.Sequential module.
     return nn.Sequential(*layers)
+##########################################################################################
+##################################### Model ##############################################
+##########################################################################################
 class AutoEncoder(PreTrainedModel):
     """
     AutoEncoder model for creating an encoder-decoder architecture.
     def __init__(self, config: AutoEncoderConfig):
         super(AutoEncoder, self).__init__(config)
+         # Embeddings
+        if config.embed:
+            # Word Embeddings
+            self.word_embeddings = nn.Embedding(config.vocab_size,
+                                                config.input_dim,)
+            # Postional Embeddings
+            self.position_embeddings = nn.Embedding(config.max_position,
+                                                    config.input_dim,)
+        # Encoder
+        self.encoder = create_layers("encoder",
+                                     config.layer_types,
+                                     config.input_dim,
+                                     config.latent_dim,
+                                     config.num_layers,
+                                     config.dropout_rate,
+                                     config.compression_rate,
+                                     config.bidirectional,)
+        # Decoder
+        if config.embed:
+            # Assuming symmetry between encoder and decoder
+            self.decoder = create_layers("decoder",
+                                         config.layer_types,
+                                         config.input_dim,
+                                         config.latent_dim,
+                                         config.num_layers,
+                                         config.dropout_rate,
+                                         config.compression_rate,
+                                         config.bidirectional,
+                                         config.vocab_size,)
+        else:
+            # Assuming symmetry between encoder and decoder
+            self.decoder = create_layers("decoder",
+                                         config.layer_types,
+                                         config.input_dim,
+                                         config.latent_dim,
+                                         config.num_layers,
+                                         config.dropout_rate,
+                                         config.compression_rate,
+                                         config.bidirectional,)
+    def forward(self, input_ids: Tensor, position_ids: Optional[Tensor] = None, labels: Optional[Tensor] = None) -> Tensor:
+        # Define Data Class
+        outputs = AutoencoderModelOutput()
+        outputs.labels = labels if labels != None else input_ids
+        # Embeddings
+        if self.config.embed:
+            # Word Embeddings
+            input_embeddings = self.word_embeddings(input_ids)
+            # Positional Embeddings
+            seq_length = input_ids.size(1)
+            position_ids = position_ids or torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
+            position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
+            position_embeddings = self.position_embeddings(position_ids)
+            # Combine Embeddings
+            input_ids = input_embeddings + position_embeddings
+        # Non-Linear Encoding & Decoding
         if self.config.layer_types in ['lstm', 'rnn', 'gru']:
+            # Encoding
             for layer in self.encoder:
                 if isinstance(layer, nn.LSTM):
+                    input_ids, (h_n, c_n) = layer(input_ids)
                 elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU):
+                    input_ids, h_o = layer(input_ids)
                 else:
+                    input_ids = layer(input_ids)
+            # Hidden Vector
+            outputs.hidden_state = input_ids
+            # Decoding
             for layer in self.decoder:
                 if isinstance(layer, nn.LSTM):
+                    input_ids, (h_n, c_n) = layer(input_ids)
                 elif isinstance(layer, nn.RNN) or isinstance(layer, nn.GRU):
+                    input_ids, h_o = layer(input_ids)
                 else:
+                    input_ids = layer(input_ids)
+        # Linear Encoding & Decoding
+        else:
+            # Encoding
+            input_ids = self.encoder(input_ids)
+            # Hidden Vector
+            outputs.hidden_state = input_ids
+            # Decoding
+            input_ids = self.decoder(input_ids)
+        outputs.logits = input_ids
+        # Choose loss function based on dtype
+        if torch.is_floating_point(outputs.labels):
+            loss_fn = nn.MSELoss()
+            outputs.loss = loss_fn(outputs.logits.view(-1), outputs.labels.view(-1))
+        elif not torch.is_floating_point(outputs.labels) and not torch.is_complex(outputs.labels):
+            loss_fn = nn.CrossEntropyLoss()
+            outputs.loss = loss_fn(outputs.logits.view(-1, self.config.vocab_size), outputs.labels.view(-1))
         else:
+            raise ValueError("Unsupported tensor dtype for these loss functions")
+        return outputs