sunileman commited on
Commit
c247077
·
1 Parent(s): 5f37d53

Add new SentenceTransformer model.

Browse files
README.md CHANGED
@@ -5,9 +5,7 @@ tags:
5
  - sentence-transformers
6
  - feature-extraction
7
  - sentence-similarity
8
- - transformers
9
- datasets:
10
- - snli
11
  ---
12
 
13
  # sunileman/nli-distilroberta-base-v2
@@ -37,44 +35,6 @@ print(embeddings)
37
 
38
 
39
 
40
- ## Usage (HuggingFace Transformers)
41
- Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
42
-
43
- ```python
44
- from transformers import AutoTokenizer, AutoModel
45
- import torch
46
-
47
-
48
- #Mean Pooling - Take attention mask into account for correct averaging
49
- def mean_pooling(model_output, attention_mask):
50
- token_embeddings = model_output[0] #First element of model_output contains all token embeddings
51
- input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
52
- return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
53
-
54
-
55
- # Sentences we want sentence embeddings for
56
- sentences = ['This is an example sentence', 'Each sentence is converted']
57
-
58
- # Load model from HuggingFace Hub
59
- tokenizer = AutoTokenizer.from_pretrained('sunileman/nli-distilroberta-base-v2')
60
- model = AutoModel.from_pretrained('sunileman/nli-distilroberta-base-v2')
61
-
62
- # Tokenize sentences
63
- encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
64
-
65
- # Compute token embeddings
66
- with torch.no_grad():
67
- model_output = model(**encoded_input)
68
-
69
- # Perform pooling. In this case, mean pooling.
70
- sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
71
-
72
- print("Sentence embeddings:")
73
- print(sentence_embeddings)
74
- ```
75
-
76
-
77
-
78
  ## Evaluation Results
79
 
80
  <!--- Describe how your model was evaluated -->
@@ -87,7 +47,7 @@ The model was trained with the parameters:
87
 
88
  **DataLoader**:
89
 
90
- `torch.utils.data.dataloader.DataLoader` of length 1 with parameters:
91
  ```
92
  {'batch_size': 16, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
93
  ```
@@ -99,7 +59,7 @@ The model was trained with the parameters:
99
  Parameters of the fit()-Method:
100
  ```
101
  {
102
- "epochs": 4,
103
  "evaluation_steps": 0,
104
  "evaluator": "NoneType",
105
  "max_grad_norm": 1,
@@ -109,7 +69,7 @@ Parameters of the fit()-Method:
109
  },
110
  "scheduler": "WarmupLinear",
111
  "steps_per_epoch": null,
112
- "warmup_steps": 100,
113
  "weight_decay": 0.01
114
  }
115
  ```
@@ -118,8 +78,9 @@ Parameters of the fit()-Method:
118
  ## Full Model Architecture
119
  ```
120
  SentenceTransformer(
121
- (0): Transformer({'max_seq_length': 75, 'do_lower_case': False}) with Transformer model: RobertaModel
122
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})
 
123
  )
124
  ```
125
 
 
5
  - sentence-transformers
6
  - feature-extraction
7
  - sentence-similarity
8
+
 
 
9
  ---
10
 
11
  # sunileman/nli-distilroberta-base-v2
 
35
 
36
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  ## Evaluation Results
39
 
40
  <!--- Describe how your model was evaluated -->
 
47
 
48
  **DataLoader**:
49
 
50
+ `torch.utils.data.dataloader.DataLoader` of length 531 with parameters:
51
  ```
52
  {'batch_size': 16, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
53
  ```
 
59
  Parameters of the fit()-Method:
60
  ```
61
  {
62
+ "epochs": 1,
63
  "evaluation_steps": 0,
64
  "evaluator": "NoneType",
65
  "max_grad_norm": 1,
 
69
  },
70
  "scheduler": "WarmupLinear",
71
  "steps_per_epoch": null,
72
+ "warmup_steps": 1,
73
  "weight_decay": 0.01
74
  }
75
  ```
 
78
  ## Full Model Architecture
79
  ```
80
  SentenceTransformer(
81
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: RobertaModel
82
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})
83
+ (2): Normalize()
84
  )
85
  ```
86
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "sentence-transformers/nli-distilroberta-base-v2",
3
  "architectures": [
4
  "RobertaModel"
5
  ],
 
1
  {
2
+ "_name_or_path": "sentence-transformers/all-distilroberta-v1",
3
  "architectures": [
4
  "RobertaModel"
5
  ],
config_sentence_transformers.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.0.0",
4
- "transformers": "4.7.0",
5
- "pytorch": "1.9.0+cu102"
6
  }
7
  }
 
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.0.0",
4
+ "transformers": "4.6.1",
5
+ "pytorch": "1.8.1"
6
  }
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7e1ea94c966662a6508f42dee9ccbbf6e053f08180fd0b70a6af1dc87d9af88
3
  size 328485128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e1327aaa9c8f67fe465c1339e58b5feb760e811b3ec5e9582a0143009deb767
3
  size 328485128
modules.json CHANGED
@@ -10,5 +10,11 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]
sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 75,
3
  "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 512,
3
  "do_lower_case": false
4
  }
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 75,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -48,10 +48,17 @@
48
  "eos_token": "</s>",
49
  "errors": "replace",
50
  "mask_token": "<mask>",
 
51
  "model_max_length": 512,
 
52
  "pad_token": "<pad>",
 
 
53
  "sep_token": "</s>",
 
54
  "tokenizer_class": "RobertaTokenizer",
55
  "trim_offsets": true,
 
 
56
  "unk_token": "<unk>"
57
  }
 
48
  "eos_token": "</s>",
49
  "errors": "replace",
50
  "mask_token": "<mask>",
51
+ "max_length": 128,
52
  "model_max_length": 512,
53
+ "pad_to_multiple_of": null,
54
  "pad_token": "<pad>",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
  "sep_token": "</s>",
58
+ "stride": 0,
59
  "tokenizer_class": "RobertaTokenizer",
60
  "trim_offsets": true,
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
  "unk_token": "<unk>"
64
  }