grez72 commited on
Commit
cc10c23
·
1 Parent(s): b8a70cd
Files changed (4) hide show
  1. config.json +41 -0
  2. configuration.py +34 -0
  3. model.py +58 -0
  4. pytorch_model.bin +3 -0
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "multilabel_bert_base_uncased",
3
+ "architectures": [
4
+ "MultiLabelClassifierModel"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration.MultiLabelClassifierConfig",
8
+ "AutoModelForSequenceClassification": "model.MultiLabelClassifierModel"
9
+ },
10
+ "bidirectional": true,
11
+ "dropout": 0.3,
12
+ "embedding_dim": 768,
13
+ "hidden_dim": 256,
14
+ "id2label": {
15
+ "0": "Amusing",
16
+ "1": "Emotional",
17
+ "2": "Suspenseful",
18
+ "3": "Dark",
19
+ "4": "Thrilling"
20
+ },
21
+ "label2id": {
22
+ "Amusing": 0,
23
+ "Dark": 3,
24
+ "Emotional": 1,
25
+ "Suspenseful": 2,
26
+ "Thrilling": 4
27
+ },
28
+ "labels": [
29
+ "Amusing",
30
+ "Emotional",
31
+ "Suspenseful",
32
+ "Dark",
33
+ "Thrilling"
34
+ ],
35
+ "model_type": "multi_label_classification",
36
+ "num_classes": 5,
37
+ "num_layers": 2,
38
+ "torch_dtype": "float32",
39
+ "transformer_name": "bert-base-uncased",
40
+ "transformers_version": "4.21.1"
41
+ }
configuration.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import PretrainedConfig
3
+ from typing import List
4
+ from pdb import set_trace
5
+
6
+ class MultiLabelClassifierConfig(PretrainedConfig):
7
+ model_type = "multi_label_classification"
8
+ problem_type = "multi_label_classification"
9
+
10
+ def __init__(
11
+ self,
12
+ embedding_dim: int=768,
13
+ labels: List[str]=[],
14
+ transformer_name: str = "bert-base-uncased",
15
+ hidden_dim: int = 256,
16
+ num_layers: int = 2,
17
+ bidirectional: bool = True,
18
+ dropout: float =.3,
19
+ **kwargs,
20
+ ):
21
+ self.transformer_name = transformer_name
22
+ self.hidden_dim = hidden_dim
23
+ self.labels = labels
24
+ self.num_layers = num_layers
25
+ self.bidirectional = bidirectional
26
+ self.dropout = dropout
27
+ self.num_classes = len(labels)
28
+ self.embedding_dim = embedding_dim
29
+
30
+ #self.nlp_config = config.to_dict()
31
+ if 'id2label' not in kwargs: kwargs['id2label'] = {idx:label for idx, label in enumerate(labels)}
32
+ if 'label2id' not in kwargs: kwargs['label2id'] = {label:idx for idx, label in enumerate(labels)}
33
+ super().__init__(**kwargs)
34
+
model.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from transformers import PreTrainedModel
4
+ from collections import OrderedDict
5
+ from transformers.modeling_outputs import SequenceClassifierOutput
6
+ from typing import List, Optional, Tuple, Union
7
+ from .configuration import MultiLabelClassifierConfig
8
+
9
+ class MultiLabelClassifierModel(PreTrainedModel):
10
+ config_class = MultiLabelClassifierConfig
11
+
12
+ def __init__(self, config):
13
+ super().__init__(config)
14
+
15
+ self.nlp_model = torch.hub.load('huggingface/pytorch-transformers', 'model', config.transformer_name)
16
+ self.rnn = nn.GRU(config.embedding_dim,
17
+ config.hidden_dim,
18
+ num_layers = config.num_layers,
19
+ bidirectional = config.bidirectional,
20
+ batch_first = True,
21
+ dropout = 0 if config.num_layers < 2 else config.dropout)
22
+ self.dropout = nn.Dropout(config.dropout)
23
+ self.out = nn.Linear(config.hidden_dim * 2 if config.bidirectional else config.hidden_dim, config.num_classes)
24
+
25
+ def forward(self,
26
+ input_ids: Optional[torch.Tensor] = None,
27
+ attention_mask: Optional[torch.Tensor] = None,
28
+ token_type_ids: Optional[torch.Tensor] = None,
29
+ position_ids: Optional[torch.Tensor] = None,
30
+ head_mask: Optional[torch.Tensor] = None,
31
+ inputs_embeds: Optional[torch.Tensor] = None,
32
+ output_attentions: Optional[bool] = None,
33
+ output_hidden_states: Optional[bool] = None,
34
+ return_dict: Optional[bool] = None,
35
+ )-> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
36
+ output = self.nlp_model(
37
+ input_ids=input_ids,
38
+ attention_mask=attention_mask,
39
+ token_type_ids=token_type_ids,
40
+ position_ids=position_ids,
41
+ head_mask=head_mask,
42
+ inputs_embeds=inputs_embeds,
43
+ output_attentions=output_attentions,
44
+ output_hidden_states=output_hidden_states,
45
+ return_dict=return_dict,
46
+ )
47
+ _, hidden = self.rnn(output['last_hidden_state'])
48
+ if self.rnn.bidirectional:
49
+ hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
50
+ else:
51
+ hidden = self.dropout(hidden[-1,:,:])
52
+
53
+ logits = self.out(hidden)
54
+ return SequenceClassifierOutput(
55
+ logits=logits,
56
+ hidden_states=output.hidden_states,
57
+ attentions=output.attentions,
58
+ )
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c79c3c4a98b6685e775eaf1f6dadd5cc50a98aae9d7bab28f7b005046b464e15
3
+ size 449047757