denizyuret commited on
Commit
6afc4db
·
1 Parent(s): f298750
Files changed (6) hide show
  1. .gitignore +1 -0
  2. __init__.py +0 -0
  3. config.json +81 -0
  4. configuration_mlp.py +37 -0
  5. model.safetensors +3 -0
  6. modeling_mlp.py +85 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
__init__.py ADDED
File without changes
config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MLP"
4
+ ],
5
+ "dropout": 0.1,
6
+ "embedding_size": 8,
7
+ "hidden_size": 2048,
8
+ "id2label": {
9
+ "0": "LABEL_0",
10
+ "1": "LABEL_1",
11
+ "2": "LABEL_2",
12
+ "3": "LABEL_3",
13
+ "4": "LABEL_4",
14
+ "5": "LABEL_5",
15
+ "6": "LABEL_6",
16
+ "7": "LABEL_7",
17
+ "8": "LABEL_8",
18
+ "9": "LABEL_9",
19
+ "10": "LABEL_10",
20
+ "11": "LABEL_11",
21
+ "12": "LABEL_12",
22
+ "13": "LABEL_13",
23
+ "14": "LABEL_14",
24
+ "15": "LABEL_15",
25
+ "16": "LABEL_16",
26
+ "17": "LABEL_17",
27
+ "18": "LABEL_18",
28
+ "19": "LABEL_19",
29
+ "20": "LABEL_20",
30
+ "21": "LABEL_21",
31
+ "22": "LABEL_22",
32
+ "23": "LABEL_23",
33
+ "24": "LABEL_24",
34
+ "25": "LABEL_25",
35
+ "26": "LABEL_26",
36
+ "27": "LABEL_27",
37
+ "28": "LABEL_28",
38
+ "29": "LABEL_29",
39
+ "30": "LABEL_30",
40
+ "31": "LABEL_31"
41
+ },
42
+ "label2id": {
43
+ "LABEL_0": 0,
44
+ "LABEL_1": 1,
45
+ "LABEL_10": 10,
46
+ "LABEL_11": 11,
47
+ "LABEL_12": 12,
48
+ "LABEL_13": 13,
49
+ "LABEL_14": 14,
50
+ "LABEL_15": 15,
51
+ "LABEL_16": 16,
52
+ "LABEL_17": 17,
53
+ "LABEL_18": 18,
54
+ "LABEL_19": 19,
55
+ "LABEL_2": 2,
56
+ "LABEL_20": 20,
57
+ "LABEL_21": 21,
58
+ "LABEL_22": 22,
59
+ "LABEL_23": 23,
60
+ "LABEL_24": 24,
61
+ "LABEL_25": 25,
62
+ "LABEL_26": 26,
63
+ "LABEL_27": 27,
64
+ "LABEL_28": 28,
65
+ "LABEL_29": 29,
66
+ "LABEL_3": 3,
67
+ "LABEL_30": 30,
68
+ "LABEL_31": 31,
69
+ "LABEL_4": 4,
70
+ "LABEL_5": 5,
71
+ "LABEL_6": 6,
72
+ "LABEL_7": 7,
73
+ "LABEL_8": 8,
74
+ "LABEL_9": 9
75
+ },
76
+ "model_type": "mlp",
77
+ "num_hidden_layers": 3,
78
+ "sequence_length": 8,
79
+ "torch_dtype": "float32",
80
+ "transformers_version": "4.47.1"
81
+ }
configuration_mlp.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig, AutoConfig
2
+
3
+ class MLPConfig(PretrainedConfig):
4
+ r"""
5
+ Config for the MLP model.
6
+
7
+ Args:
8
+ embedding_size (int, 8): Size of the input embeddings (last dimension of the 3D input).
9
+ sequence_length (int, 8): Number of tokens in input sequence (middle dimension; must be fixed).
10
+ num_labels (int, 32): Number of output labels (for multi-label classification).
11
+ hidden_size (int, 2048): Size of each hidden layer.
12
+ num_hidden_layers (int, 3): How many hidden layers to stack.
13
+ dropout (float, 0.1): Dropout probability for hidden layers.
14
+ """
15
+
16
+ model_type = "mlp"
17
+
18
+ def __init__(
19
+ self,
20
+ embedding_size=8,
21
+ sequence_length=8,
22
+ num_labels=32,
23
+ hidden_size=2048,
24
+ num_hidden_layers=3,
25
+ dropout=0.1,
26
+ **kwargs
27
+ ):
28
+ super().__init__(**kwargs)
29
+ self.embedding_size = embedding_size
30
+ self.sequence_length = sequence_length
31
+ self.num_labels = num_labels
32
+ self.hidden_size = hidden_size
33
+ self.num_hidden_layers = num_hidden_layers
34
+ self.dropout = dropout
35
+
36
+
37
+ AutoConfig.register("mlp", MLPConfig)
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39a75b7029436cf1d029a3f782c1e2205797b2ebd0425ef60ca0993999c7e392
3
+ size 34366240
modeling_mlp.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ from transformers import PreTrainedModel, AutoModelForSequenceClassification
3
+ from transformers.modeling_outputs import SequenceClassifierOutput
4
+ from .configuration_mlp import MLPConfig
5
+
6
+
7
+ class MLP(PreTrainedModel):
8
+ r"""
9
+ A simple MLP model that takes a 3D input [batch_size, seq_length, embedding_size]
10
+ and performs multi-label classification using BCE loss.
11
+ """
12
+ config_class = MLPConfig
13
+
14
+ def __init__(self, config: MLPConfig):
15
+ super().__init__(config)
16
+ self.config = config
17
+
18
+ # Define an MLP stack
19
+ layers = []
20
+ input_dim = config.embedding_size * config.sequence_length
21
+ for _ in range(config.num_hidden_layers):
22
+ layers.append(nn.Linear(input_dim, config.hidden_size))
23
+ layers.append(nn.ReLU())
24
+ layers.append(nn.Dropout(config.dropout))
25
+ input_dim = config.hidden_size
26
+ # Final layer: hidden -> num_labels
27
+ layers.append(nn.Linear(input_dim, config.num_labels))
28
+
29
+ self.mlp = nn.Sequential(*layers)
30
+
31
+ # Initialize weights using standard HF utility
32
+ self.post_init()
33
+
34
+ def forward(
35
+ self,
36
+ inputs_embeds=None,
37
+ labels=None,
38
+ **kwargs
39
+ ):
40
+ """
41
+ Forward pass of the MLP.
42
+
43
+ Args:
44
+ inputs_embeds (torch.FloatTensor):
45
+ A 3D tensor of shape [batch_size, seq_length, embedding_size].
46
+ labels (torch.FloatTensor):
47
+ Multi-hot labels for multi-label classification, shape [batch_size, num_labels].
48
+
49
+ Returns:
50
+ SequenceClassifierOutput with fields:
51
+ - loss (optional)
52
+ - logits
53
+ - hidden_states (None)
54
+ - attentions (None)
55
+ """
56
+ # inputs_embeds is [B, L, E]
57
+ # Flatten over seq_length if desired, or do a pooling:
58
+ # Option A: Flatten everything: B x (L*E)
59
+ B, L, E = inputs_embeds.shape
60
+ assert L == self.config.sequence_length and E == self.config.embedding_size
61
+ x = inputs_embeds.reshape(B, L * E)
62
+
63
+ # Option B: Mean-pool across tokens (comment out if you prefer flattening)
64
+ # x = inputs_embeds.mean(dim=1) # shape: B x E
65
+ # (If you do mean-pooling, remember to adjust 'input_dim' in the __init__ to E, not L*E)
66
+
67
+ # Pass through MLP
68
+ logits = self.mlp(x) # shape: [B, num_labels]
69
+
70
+ loss = None
71
+ if labels is not None:
72
+ # For multi-label classification, use BCEWithLogitsLoss
73
+ loss_fct = nn.BCEWithLogitsLoss()
74
+ # Ensure labels is float
75
+ loss = loss_fct(logits, labels.float())
76
+
77
+ return SequenceClassifierOutput(
78
+ loss=loss,
79
+ logits=logits,
80
+ hidden_states=None,
81
+ attentions=None
82
+ )
83
+
84
+
85
+ AutoModelForSequenceClassification.register(MLPConfig, MLP)