.

Browse files

Files changed (6) hide show

.gitignore +1 -0
__init__.py +0 -0
config.json +81 -0
configuration_mlp.py +37 -0
model.safetensors +3 -0
modeling_mlp.py +85 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__

__init__.py ADDED Viewed

File without changes

config.json ADDED Viewed

	@@ -0,0 +1,81 @@

+{
+  "architectures": [
+    "MLP"
+  ],
+  "dropout": 0.1,
+  "embedding_size": 8,
+  "hidden_size": 2048,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12",
+    "13": "LABEL_13",
+    "14": "LABEL_14",
+    "15": "LABEL_15",
+    "16": "LABEL_16",
+    "17": "LABEL_17",
+    "18": "LABEL_18",
+    "19": "LABEL_19",
+    "20": "LABEL_20",
+    "21": "LABEL_21",
+    "22": "LABEL_22",
+    "23": "LABEL_23",
+    "24": "LABEL_24",
+    "25": "LABEL_25",
+    "26": "LABEL_26",
+    "27": "LABEL_27",
+    "28": "LABEL_28",
+    "29": "LABEL_29",
+    "30": "LABEL_30",
+    "31": "LABEL_31"
+  },
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_13": 13,
+    "LABEL_14": 14,
+    "LABEL_15": 15,
+    "LABEL_16": 16,
+    "LABEL_17": 17,
+    "LABEL_18": 18,
+    "LABEL_19": 19,
+    "LABEL_2": 2,
+    "LABEL_20": 20,
+    "LABEL_21": 21,
+    "LABEL_22": 22,
+    "LABEL_23": 23,
+    "LABEL_24": 24,
+    "LABEL_25": 25,
+    "LABEL_26": 26,
+    "LABEL_27": 27,
+    "LABEL_28": 28,
+    "LABEL_29": 29,
+    "LABEL_3": 3,
+    "LABEL_30": 30,
+    "LABEL_31": 31,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8,
+    "LABEL_9": 9
+  },
+  "model_type": "mlp",
+  "num_hidden_layers": 3,
+  "sequence_length": 8,
+  "torch_dtype": "float32",
+  "transformers_version": "4.47.1"
+}

configuration_mlp.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from transformers import PretrainedConfig, AutoConfig
+class MLPConfig(PretrainedConfig):
+    r"""
+    Config for the MLP model.
+    Args:
+        embedding_size (int, 8): Size of the input embeddings (last dimension of the 3D input).
+        sequence_length (int, 8): Number of tokens in input sequence (middle dimension; must be fixed).
+        num_labels (int, 32): Number of output labels (for multi-label classification).
+        hidden_size (int, 2048): Size of each hidden layer.
+        num_hidden_layers (int, 3): How many hidden layers to stack.
+        dropout (float, 0.1): Dropout probability for hidden layers.
+    """
+    model_type = "mlp"
+    def __init__(
+        self,
+        embedding_size=8,
+        sequence_length=8,
+        num_labels=32,
+        hidden_size=2048,
+        num_hidden_layers=3,
+        dropout=0.1,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.embedding_size = embedding_size
+        self.sequence_length = sequence_length
+        self.num_labels = num_labels
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.dropout = dropout
+AutoConfig.register("mlp", MLPConfig)

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39a75b7029436cf1d029a3f782c1e2205797b2ebd0425ef60ca0993999c7e392
+size 34366240

modeling_mlp.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import torch.nn as nn
+from transformers import PreTrainedModel, AutoModelForSequenceClassification
+from transformers.modeling_outputs import SequenceClassifierOutput
+from .configuration_mlp import MLPConfig
+class MLP(PreTrainedModel):
+    r"""
+    A simple MLP model that takes a 3D input [batch_size, seq_length, embedding_size]
+    and performs multi-label classification using BCE loss.
+    """
+    config_class = MLPConfig
+    def __init__(self, config: MLPConfig):
+        super().__init__(config)
+        self.config = config
+        # Define an MLP stack
+        layers = []
+        input_dim = config.embedding_size * config.sequence_length
+        for _ in range(config.num_hidden_layers):
+            layers.append(nn.Linear(input_dim, config.hidden_size))
+            layers.append(nn.ReLU())
+            layers.append(nn.Dropout(config.dropout))
+            input_dim = config.hidden_size
+        # Final layer: hidden -> num_labels
+        layers.append(nn.Linear(input_dim, config.num_labels))
+        self.mlp = nn.Sequential(*layers)
+        # Initialize weights using standard HF utility
+        self.post_init()
+    def forward(
+        self,
+        inputs_embeds=None,
+        labels=None,
+        **kwargs
+    ):
+        """
+        Forward pass of the MLP.
+        Args:
+            inputs_embeds (torch.FloatTensor):
+                A 3D tensor of shape [batch_size, seq_length, embedding_size].
+            labels (torch.FloatTensor):
+                Multi-hot labels for multi-label classification, shape [batch_size, num_labels].
+        Returns:
+            SequenceClassifierOutput with fields:
+                - loss (optional)
+                - logits
+                - hidden_states (None)
+                - attentions (None)
+        """
+        # inputs_embeds is [B, L, E]
+        # Flatten over seq_length if desired, or do a pooling:
+        # Option A: Flatten everything: B x (L*E)
+        B, L, E = inputs_embeds.shape
+        assert L == self.config.sequence_length and E == self.config.embedding_size
+        x = inputs_embeds.reshape(B, L * E)
+        # Option B: Mean-pool across tokens (comment out if you prefer flattening)
+        # x = inputs_embeds.mean(dim=1)  # shape: B x E
+        # (If you do mean-pooling, remember to adjust 'input_dim' in the __init__ to E, not L*E)
+        # Pass through MLP
+        logits = self.mlp(x)  # shape: [B, num_labels]
+        loss = None
+        if labels is not None:
+            # For multi-label classification, use BCEWithLogitsLoss
+            loss_fct = nn.BCEWithLogitsLoss()
+            # Ensure labels is float
+            loss = loss_fct(logits, labels.float())
+        return SequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=None,
+            attentions=None
+        )
+AutoModelForSequenceClassification.register(MLPConfig, MLP)