hmm404/triplet_728
Browse files- 1_Pooling/config.json +10 -0
- README.md +393 -0
- config.json +24 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- runs/Feb18_06-37-18_9d18770862ef/events.out.tfevents.1739860645.9d18770862ef.7189.0 +3 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +0 -0
- tokenizer_config.json +73 -0
- training_args.bin +3 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,393 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:25880
|
8 |
+
- loss:TripletLoss
|
9 |
+
base_model: sentence-transformers/all-mpnet-base-v2
|
10 |
+
widget:
|
11 |
+
- source_sentence: Die Verabschiedung des NetzDG hat wichtige Symbolkraft.
|
12 |
+
sentences:
|
13 |
+
- Der Symbolwert von Gesetzen sollte nicht maßgeblich für ihre Erstellung, Umsetzung
|
14 |
+
und Bewertung sein. Gesetzte sollten einen praktischen Mehrwert haben.
|
15 |
+
- Politicians should not take limited job opportunities that would have otherwise
|
16 |
+
been available to other people in need of work.
|
17 |
+
- Symbole wirken als Kommunikationsmittel \(Dirk Hülst, S. 207\); das NetzDG beinhaltet
|
18 |
+
zentrale Botschaften.
|
19 |
+
- source_sentence: Capital punishment protects public safety by ensuring that convicted
|
20 |
+
criminals do not ever offend again.
|
21 |
+
sentences:
|
22 |
+
- That is a statement of fact, but is not an argument for why capital punishment
|
23 |
+
is just. Technically, capital punishment can also be used to against people who
|
24 |
+
drive over the speed limit, but we understand that there are more humane methods
|
25 |
+
of punishment.
|
26 |
+
- Society has an absolute right to protect itself with the execution of killers
|
27 |
+
who have committed premeditated murder and so are apt to kill again. In such
|
28 |
+
instances of premeditated murder, since the penalty suits the crime, society should
|
29 |
+
not be obligated to incur the extraordinary expense as well as the hazard of a
|
30 |
+
life sentence in lieu of the death penalty. Such obligations -- the extraordinary
|
31 |
+
cost and hazard -- in effect penalize society rather than the murderer who committed
|
32 |
+
the crime.
|
33 |
+
- 2 Nephi 3:6-22 in the Book of Mormon describes Joseph Smith as a "choice seer."
|
34 |
+
- source_sentence: Daenerys has proven that she is incapable of ruling even a single
|
35 |
+
city.
|
36 |
+
sentences:
|
37 |
+
- She now has Tyrion Lannister to do that for her, she only has to provide the looks.
|
38 |
+
- Hemp is known for having effective bioremediation/phyto-remediation qualities,
|
39 |
+
meaning it can be used to cleanse the soil.
|
40 |
+
- Daenerys takes the moral high ground to a fault. Her tendency to choose what is
|
41 |
+
"right" over what is a better strategy could be her undoing.
|
42 |
+
- source_sentence: Consumers within public health systems should be empowered to make
|
43 |
+
their own choices about their treatment.
|
44 |
+
sentences:
|
45 |
+
- People have a right to choose what kind of treatment they want regarding something
|
46 |
+
as important as their own health.
|
47 |
+
- The money available to public health systems is limited. One patient's decisions
|
48 |
+
to use homeopathy limits another's access to a more effective treatment.
|
49 |
+
- Wizard healing can cure any "normal" illness \(any illness Muggles might contract\).
|
50 |
+
- source_sentence: 'Nobody should have to take the responsibility for another couple''s
|
51 |
+
bad decisions: to put a child up for adoption is selfish because you are deciding
|
52 |
+
to have a better life for yourself rather than raise a kid.'
|
53 |
+
sentences:
|
54 |
+
- It is not selfish to have children as long as you love them and care for them.
|
55 |
+
Putting a child up for adoption is selfish \(depending on the reason\) because
|
56 |
+
the child is always going to wonder why they were left.
|
57 |
+
- In order to go from "is" to "ought," one must introduce something from outside
|
58 |
+
the realm of the "is."
|
59 |
+
- Putting a child up for adoption can be considered a selfless act if you do it
|
60 |
+
because someone else can give the child a better life than you can.
|
61 |
+
pipeline_tag: sentence-similarity
|
62 |
+
library_name: sentence-transformers
|
63 |
+
---
|
64 |
+
|
65 |
+
# SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
|
66 |
+
|
67 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
68 |
+
|
69 |
+
## Model Details
|
70 |
+
|
71 |
+
### Model Description
|
72 |
+
- **Model Type:** Sentence Transformer
|
73 |
+
- **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision 9a3225965996d404b775526de6dbfe85d3368642 -->
|
74 |
+
- **Maximum Sequence Length:** 384 tokens
|
75 |
+
- **Output Dimensionality:** 768 dimensions
|
76 |
+
- **Similarity Function:** Cosine Similarity
|
77 |
+
<!-- - **Training Dataset:** Unknown -->
|
78 |
+
<!-- - **Language:** Unknown -->
|
79 |
+
<!-- - **License:** Unknown -->
|
80 |
+
|
81 |
+
### Model Sources
|
82 |
+
|
83 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
84 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
85 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
86 |
+
|
87 |
+
### Full Model Architecture
|
88 |
+
|
89 |
+
```
|
90 |
+
SentenceTransformer(
|
91 |
+
(0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel
|
92 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
93 |
+
(2): Normalize()
|
94 |
+
)
|
95 |
+
```
|
96 |
+
|
97 |
+
## Usage
|
98 |
+
|
99 |
+
### Direct Usage (Sentence Transformers)
|
100 |
+
|
101 |
+
First install the Sentence Transformers library:
|
102 |
+
|
103 |
+
```bash
|
104 |
+
pip install -U sentence-transformers
|
105 |
+
```
|
106 |
+
|
107 |
+
Then you can load this model and run inference.
|
108 |
+
```python
|
109 |
+
from sentence_transformers import SentenceTransformer
|
110 |
+
|
111 |
+
# Download from the 🤗 Hub
|
112 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
113 |
+
# Run inference
|
114 |
+
sentences = [
|
115 |
+
"Nobody should have to take the responsibility for another couple's bad decisions: to put a child up for adoption is selfish because you are deciding to have a better life for yourself rather than raise a kid.",
|
116 |
+
'It is not selfish to have children as long as you love them and care for them. Putting a child up for adoption is selfish \\(depending on the reason\\) because the child is always going to wonder why they were left.',
|
117 |
+
'Putting a child up for adoption can be considered a selfless act if you do it because someone else can give the child a better life than you can.',
|
118 |
+
]
|
119 |
+
embeddings = model.encode(sentences)
|
120 |
+
print(embeddings.shape)
|
121 |
+
# [3, 768]
|
122 |
+
|
123 |
+
# Get the similarity scores for the embeddings
|
124 |
+
similarities = model.similarity(embeddings, embeddings)
|
125 |
+
print(similarities.shape)
|
126 |
+
# [3, 3]
|
127 |
+
```
|
128 |
+
|
129 |
+
<!--
|
130 |
+
### Direct Usage (Transformers)
|
131 |
+
|
132 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
133 |
+
|
134 |
+
</details>
|
135 |
+
-->
|
136 |
+
|
137 |
+
<!--
|
138 |
+
### Downstream Usage (Sentence Transformers)
|
139 |
+
|
140 |
+
You can finetune this model on your own dataset.
|
141 |
+
|
142 |
+
<details><summary>Click to expand</summary>
|
143 |
+
|
144 |
+
</details>
|
145 |
+
-->
|
146 |
+
|
147 |
+
<!--
|
148 |
+
### Out-of-Scope Use
|
149 |
+
|
150 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
151 |
+
-->
|
152 |
+
|
153 |
+
<!--
|
154 |
+
## Bias, Risks and Limitations
|
155 |
+
|
156 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
157 |
+
-->
|
158 |
+
|
159 |
+
<!--
|
160 |
+
### Recommendations
|
161 |
+
|
162 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
163 |
+
-->
|
164 |
+
|
165 |
+
## Training Details
|
166 |
+
|
167 |
+
### Training Dataset
|
168 |
+
|
169 |
+
#### Unnamed Dataset
|
170 |
+
|
171 |
+
* Size: 25,880 training samples
|
172 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
173 |
+
* Approximate statistics based on the first 1000 samples:
|
174 |
+
| | anchor | positive | negative |
|
175 |
+
|:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
176 |
+
| type | string | string | string |
|
177 |
+
| details | <ul><li>min: 3 tokens</li><li>mean: 27.9 tokens</li><li>max: 186 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 34.46 tokens</li><li>max: 140 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 35.42 tokens</li><li>max: 185 tokens</li></ul> |
|
178 |
+
* Samples:
|
179 |
+
| anchor | positive | negative |
|
180 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
181 |
+
| <code>Security services such as the USA's NSA, or UK's GCHQ require secrecy about what actions they are taking. Open sourcing all code would reveal those actions.</code> | <code>Writing implants/malware would not be possible in open source.</code> | <code>This could be a good thing if you side with whistleblowers \(eg Snowden/Wikileaks\), as it would have revealed the governments actions sooner.</code> |
|
182 |
+
| <code>Monarchs are still extremely vulnerable to the effects of public opinion, meaning that the use of their power is implicitly determined by the will of the people.</code> | <code>In Liechtenstein, according to the Constitution, the Prince is subject to a popular vote of no confidence \(Art. 13ter\). Furthermore, the Constitution itself provides a procedure to abolish the monarchy upon popular request \(Art. 113\).</code> | <code>Being subject to the pressure of public opinion is very different from representing the will of the people.</code> |
|
183 |
+
| <code>The Church of Jesus Christ of Latter-day Saints \(The "Mormon" Church\) teaches that free will, or "Agency," is essential the God's plan of Happiness for mankind and our freedom to choose is one of mankind's greatest blessings.</code> | <code>The Church of Jesus Christ of Latter-day Saints also teaches that one of Satan's greatest sins was that he ["sought to destroy the agency \[i.e. free will\] of man, which I, the Lord God, had given him."](https://www.lds.org/scriptures/pgp/moses/4.3-4)</code> | <code>There's no logic or rational thinking into your claim other than implicit authority. It'd be the same as claiming that Harry Potter teaches us that free will or "Agency" is an illusion on page 137. If you don't take Harry Potter as an authority this claim means nothing to you. That's why we have science and logic to support any explanation without implicit authority.</code> |
|
184 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
185 |
+
```json
|
186 |
+
{
|
187 |
+
"distance_metric": "TripletDistanceMetric.COSINE",
|
188 |
+
"triplet_margin": 0.3
|
189 |
+
}
|
190 |
+
```
|
191 |
+
|
192 |
+
### Training Hyperparameters
|
193 |
+
|
194 |
+
#### All Hyperparameters
|
195 |
+
<details><summary>Click to expand</summary>
|
196 |
+
|
197 |
+
- `overwrite_output_dir`: False
|
198 |
+
- `do_predict`: False
|
199 |
+
- `eval_strategy`: no
|
200 |
+
- `prediction_loss_only`: True
|
201 |
+
- `per_device_train_batch_size`: 8
|
202 |
+
- `per_device_eval_batch_size`: 8
|
203 |
+
- `per_gpu_train_batch_size`: None
|
204 |
+
- `per_gpu_eval_batch_size`: None
|
205 |
+
- `gradient_accumulation_steps`: 1
|
206 |
+
- `eval_accumulation_steps`: None
|
207 |
+
- `torch_empty_cache_steps`: None
|
208 |
+
- `learning_rate`: 5e-05
|
209 |
+
- `weight_decay`: 0.0
|
210 |
+
- `adam_beta1`: 0.9
|
211 |
+
- `adam_beta2`: 0.999
|
212 |
+
- `adam_epsilon`: 1e-08
|
213 |
+
- `max_grad_norm`: 1.0
|
214 |
+
- `num_train_epochs`: 3.0
|
215 |
+
- `max_steps`: -1
|
216 |
+
- `lr_scheduler_type`: linear
|
217 |
+
- `lr_scheduler_kwargs`: {}
|
218 |
+
- `warmup_ratio`: 0.0
|
219 |
+
- `warmup_steps`: 0
|
220 |
+
- `log_level`: passive
|
221 |
+
- `log_level_replica`: warning
|
222 |
+
- `log_on_each_node`: True
|
223 |
+
- `logging_nan_inf_filter`: True
|
224 |
+
- `save_safetensors`: True
|
225 |
+
- `save_on_each_node`: False
|
226 |
+
- `save_only_model`: False
|
227 |
+
- `restore_callback_states_from_checkpoint`: False
|
228 |
+
- `no_cuda`: False
|
229 |
+
- `use_cpu`: False
|
230 |
+
- `use_mps_device`: False
|
231 |
+
- `seed`: 42
|
232 |
+
- `data_seed`: None
|
233 |
+
- `jit_mode_eval`: False
|
234 |
+
- `use_ipex`: False
|
235 |
+
- `bf16`: False
|
236 |
+
- `fp16`: False
|
237 |
+
- `fp16_opt_level`: O1
|
238 |
+
- `half_precision_backend`: auto
|
239 |
+
- `bf16_full_eval`: False
|
240 |
+
- `fp16_full_eval`: False
|
241 |
+
- `tf32`: None
|
242 |
+
- `local_rank`: 0
|
243 |
+
- `ddp_backend`: None
|
244 |
+
- `tpu_num_cores`: None
|
245 |
+
- `tpu_metrics_debug`: False
|
246 |
+
- `debug`: []
|
247 |
+
- `dataloader_drop_last`: False
|
248 |
+
- `dataloader_num_workers`: 0
|
249 |
+
- `dataloader_prefetch_factor`: None
|
250 |
+
- `past_index`: -1
|
251 |
+
- `disable_tqdm`: False
|
252 |
+
- `remove_unused_columns`: True
|
253 |
+
- `label_names`: None
|
254 |
+
- `load_best_model_at_end`: False
|
255 |
+
- `ignore_data_skip`: False
|
256 |
+
- `fsdp`: []
|
257 |
+
- `fsdp_min_num_params`: 0
|
258 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
259 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
260 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
261 |
+
- `deepspeed`: None
|
262 |
+
- `label_smoothing_factor`: 0.0
|
263 |
+
- `optim`: adamw_torch
|
264 |
+
- `optim_args`: None
|
265 |
+
- `adafactor`: False
|
266 |
+
- `group_by_length`: False
|
267 |
+
- `length_column_name`: length
|
268 |
+
- `ddp_find_unused_parameters`: None
|
269 |
+
- `ddp_bucket_cap_mb`: None
|
270 |
+
- `ddp_broadcast_buffers`: False
|
271 |
+
- `dataloader_pin_memory`: True
|
272 |
+
- `dataloader_persistent_workers`: False
|
273 |
+
- `skip_memory_metrics`: True
|
274 |
+
- `use_legacy_prediction_loop`: False
|
275 |
+
- `push_to_hub`: False
|
276 |
+
- `resume_from_checkpoint`: None
|
277 |
+
- `hub_model_id`: None
|
278 |
+
- `hub_strategy`: every_save
|
279 |
+
- `hub_private_repo`: None
|
280 |
+
- `hub_always_push`: False
|
281 |
+
- `gradient_checkpointing`: False
|
282 |
+
- `gradient_checkpointing_kwargs`: None
|
283 |
+
- `include_inputs_for_metrics`: False
|
284 |
+
- `include_for_metrics`: []
|
285 |
+
- `eval_do_concat_batches`: True
|
286 |
+
- `fp16_backend`: auto
|
287 |
+
- `push_to_hub_model_id`: None
|
288 |
+
- `push_to_hub_organization`: None
|
289 |
+
- `mp_parameters`:
|
290 |
+
- `auto_find_batch_size`: False
|
291 |
+
- `full_determinism`: False
|
292 |
+
- `torchdynamo`: None
|
293 |
+
- `ray_scope`: last
|
294 |
+
- `ddp_timeout`: 1800
|
295 |
+
- `torch_compile`: False
|
296 |
+
- `torch_compile_backend`: None
|
297 |
+
- `torch_compile_mode`: None
|
298 |
+
- `dispatch_batches`: None
|
299 |
+
- `split_batches`: None
|
300 |
+
- `include_tokens_per_second`: False
|
301 |
+
- `include_num_input_tokens_seen`: False
|
302 |
+
- `neftune_noise_alpha`: None
|
303 |
+
- `optim_target_modules`: None
|
304 |
+
- `batch_eval_metrics`: False
|
305 |
+
- `eval_on_start`: False
|
306 |
+
- `use_liger_kernel`: False
|
307 |
+
- `eval_use_gather_object`: False
|
308 |
+
- `average_tokens_across_devices`: False
|
309 |
+
- `prompts`: None
|
310 |
+
- `batch_sampler`: batch_sampler
|
311 |
+
- `multi_dataset_batch_sampler`: proportional
|
312 |
+
|
313 |
+
</details>
|
314 |
+
|
315 |
+
### Training Logs
|
316 |
+
| Epoch | Step | Training Loss |
|
317 |
+
|:------:|:----:|:-------------:|
|
318 |
+
| 0.1546 | 500 | 0.2287 |
|
319 |
+
| 0.3091 | 1000 | 0.2057 |
|
320 |
+
| 0.4637 | 1500 | 0.1884 |
|
321 |
+
| 0.6182 | 2000 | 0.1873 |
|
322 |
+
| 0.7728 | 2500 | 0.2153 |
|
323 |
+
| 0.9274 | 3000 | 0.2972 |
|
324 |
+
| 1.0819 | 3500 | 0.2991 |
|
325 |
+
| 1.2365 | 4000 | 0.2994 |
|
326 |
+
| 1.3910 | 4500 | 0.3 |
|
327 |
+
| 1.5456 | 5000 | 0.3 |
|
328 |
+
| 1.7002 | 5500 | 0.3 |
|
329 |
+
| 1.8547 | 6000 | 0.3001 |
|
330 |
+
| 2.0093 | 6500 | 0.3 |
|
331 |
+
| 2.1638 | 7000 | 0.3001 |
|
332 |
+
| 2.3184 | 7500 | 0.3 |
|
333 |
+
| 2.4730 | 8000 | 0.3 |
|
334 |
+
| 2.6275 | 8500 | 0.3001 |
|
335 |
+
| 2.7821 | 9000 | 0.3 |
|
336 |
+
| 2.9366 | 9500 | 0.3001 |
|
337 |
+
|
338 |
+
|
339 |
+
### Framework Versions
|
340 |
+
- Python: 3.11.11
|
341 |
+
- Sentence Transformers: 3.4.1
|
342 |
+
- Transformers: 4.48.3
|
343 |
+
- PyTorch: 2.5.1+cu124
|
344 |
+
- Accelerate: 1.3.0
|
345 |
+
- Datasets: 3.3.1
|
346 |
+
- Tokenizers: 0.21.0
|
347 |
+
|
348 |
+
## Citation
|
349 |
+
|
350 |
+
### BibTeX
|
351 |
+
|
352 |
+
#### Sentence Transformers
|
353 |
+
```bibtex
|
354 |
+
@inproceedings{reimers-2019-sentence-bert,
|
355 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
356 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
357 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
358 |
+
month = "11",
|
359 |
+
year = "2019",
|
360 |
+
publisher = "Association for Computational Linguistics",
|
361 |
+
url = "https://arxiv.org/abs/1908.10084",
|
362 |
+
}
|
363 |
+
```
|
364 |
+
|
365 |
+
#### TripletLoss
|
366 |
+
```bibtex
|
367 |
+
@misc{hermans2017defense,
|
368 |
+
title={In Defense of the Triplet Loss for Person Re-Identification},
|
369 |
+
author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
|
370 |
+
year={2017},
|
371 |
+
eprint={1703.07737},
|
372 |
+
archivePrefix={arXiv},
|
373 |
+
primaryClass={cs.CV}
|
374 |
+
}
|
375 |
+
```
|
376 |
+
|
377 |
+
<!--
|
378 |
+
## Glossary
|
379 |
+
|
380 |
+
*Clearly define terms in order to be accessible across audiences.*
|
381 |
+
-->
|
382 |
+
|
383 |
+
<!--
|
384 |
+
## Model Card Authors
|
385 |
+
|
386 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
387 |
+
-->
|
388 |
+
|
389 |
+
<!--
|
390 |
+
## Model Card Contact
|
391 |
+
|
392 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
393 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/all-mpnet-base-v2",
|
3 |
+
"architectures": [
|
4 |
+
"MPNetModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "mpnet",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"relative_attention_num_buckets": 32,
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.48.3",
|
23 |
+
"vocab_size": 30527
|
24 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.4.1",
|
4 |
+
"transformers": "4.48.3",
|
5 |
+
"pytorch": "2.5.1+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb28db92f7b339893f9f69c4a76f93fdb8d75f28a028c83fae207b010ca87df7
|
3 |
+
size 437967672
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
runs/Feb18_06-37-18_9d18770862ef/events.out.tfevents.1739860645.9d18770862ef.7189.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa92fa41bc8db747d7aa9f08ba34a5ea2b626bd938be267b3ab8dbe9e498e142
|
3 |
+
size 8738
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 384,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "[UNK]",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"104": {
|
36 |
+
"content": "[UNK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
},
|
43 |
+
"30526": {
|
44 |
+
"content": "<mask>",
|
45 |
+
"lstrip": true,
|
46 |
+
"normalized": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": true
|
50 |
+
}
|
51 |
+
},
|
52 |
+
"bos_token": "<s>",
|
53 |
+
"clean_up_tokenization_spaces": false,
|
54 |
+
"cls_token": "<s>",
|
55 |
+
"do_lower_case": true,
|
56 |
+
"eos_token": "</s>",
|
57 |
+
"extra_special_tokens": {},
|
58 |
+
"mask_token": "<mask>",
|
59 |
+
"max_length": 128,
|
60 |
+
"model_max_length": 384,
|
61 |
+
"pad_to_multiple_of": null,
|
62 |
+
"pad_token": "<pad>",
|
63 |
+
"pad_token_type_id": 0,
|
64 |
+
"padding_side": "right",
|
65 |
+
"sep_token": "</s>",
|
66 |
+
"stride": 0,
|
67 |
+
"strip_accents": null,
|
68 |
+
"tokenize_chinese_chars": true,
|
69 |
+
"tokenizer_class": "MPNetTokenizer",
|
70 |
+
"truncation_side": "right",
|
71 |
+
"truncation_strategy": "longest_first",
|
72 |
+
"unk_token": "[UNK]"
|
73 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f03dc66115a20cc231388424ba4857549fdc8b3487964815848347220bc248d
|
3 |
+
size 5560
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|