mpnet-base-all-mqp-binary
Browse files
README.md
CHANGED
@@ -211,9 +211,10 @@ You can finetune this model on your own dataset.
|
|
211 |
- `eval_strategy`: steps
|
212 |
- `per_device_train_batch_size`: 16
|
213 |
- `per_device_eval_batch_size`: 16
|
214 |
-
- `num_train_epochs`:
|
215 |
- `warmup_ratio`: 0.1
|
216 |
- `fp16`: True
|
|
|
217 |
- `batch_sampler`: no_duplicates
|
218 |
|
219 |
#### All Hyperparameters
|
@@ -236,7 +237,7 @@ You can finetune this model on your own dataset.
|
|
236 |
- `adam_beta2`: 0.999
|
237 |
- `adam_epsilon`: 1e-08
|
238 |
- `max_grad_norm`: 1.0
|
239 |
-
- `num_train_epochs`:
|
240 |
- `max_steps`: -1
|
241 |
- `lr_scheduler_type`: linear
|
242 |
- `lr_scheduler_kwargs`: {}
|
@@ -297,7 +298,7 @@ You can finetune this model on your own dataset.
|
|
297 |
- `dataloader_persistent_workers`: False
|
298 |
- `skip_memory_metrics`: True
|
299 |
- `use_legacy_prediction_loop`: False
|
300 |
-
- `push_to_hub`:
|
301 |
- `resume_from_checkpoint`: None
|
302 |
- `hub_model_id`: None
|
303 |
- `hub_strategy`: every_save
|
@@ -340,21 +341,7 @@ You can finetune this model on your own dataset.
|
|
340 |
### Training Logs
|
341 |
| Epoch | Step | Training Loss | Validation Loss |
|
342 |
|:------:|:----:|:-------------:|:---------------:|
|
343 |
-
| 0.6536 | 100 | 2.
|
344 |
-
| 1.3072 | 200 | 2.4602 | 2.8035 |
|
345 |
-
| 1.9608 | 300 | 0.9681 | 3.4420 |
|
346 |
-
| 2.6144 | 400 | 0.4578 | 4.1960 |
|
347 |
-
| 3.2680 | 500 | 0.1123 | 4.3254 |
|
348 |
-
| 3.9216 | 600 | 0.0155 | 4.8884 |
|
349 |
-
| 4.5752 | 700 | 0.0026 | 5.0455 |
|
350 |
-
| 5.2288 | 800 | 0.0022 | 5.0907 |
|
351 |
-
| 5.8824 | 900 | 0.0003 | 5.0952 |
|
352 |
-
| 6.5359 | 1000 | 0.0001 | 5.1793 |
|
353 |
-
| 7.1895 | 1100 | 0.0001 | 5.2393 |
|
354 |
-
| 7.8431 | 1200 | 0.0001 | 5.2619 |
|
355 |
-
| 8.4967 | 1300 | 0.0001 | 5.2712 |
|
356 |
-
| 9.1503 | 1400 | 0.0001 | 5.2953 |
|
357 |
-
| 9.8039 | 1500 | 0.0001 | 5.3024 |
|
358 |
|
359 |
|
360 |
### Framework Versions
|
|
|
211 |
- `eval_strategy`: steps
|
212 |
- `per_device_train_batch_size`: 16
|
213 |
- `per_device_eval_batch_size`: 16
|
214 |
+
- `num_train_epochs`: 1
|
215 |
- `warmup_ratio`: 0.1
|
216 |
- `fp16`: True
|
217 |
+
- `push_to_hub`: True
|
218 |
- `batch_sampler`: no_duplicates
|
219 |
|
220 |
#### All Hyperparameters
|
|
|
237 |
- `adam_beta2`: 0.999
|
238 |
- `adam_epsilon`: 1e-08
|
239 |
- `max_grad_norm`: 1.0
|
240 |
+
- `num_train_epochs`: 1
|
241 |
- `max_steps`: -1
|
242 |
- `lr_scheduler_type`: linear
|
243 |
- `lr_scheduler_kwargs`: {}
|
|
|
298 |
- `dataloader_persistent_workers`: False
|
299 |
- `skip_memory_metrics`: True
|
300 |
- `use_legacy_prediction_loop`: False
|
301 |
+
- `push_to_hub`: True
|
302 |
- `resume_from_checkpoint`: None
|
303 |
- `hub_model_id`: None
|
304 |
- `hub_strategy`: every_save
|
|
|
341 |
### Training Logs
|
342 |
| Epoch | Step | Training Loss | Validation Loss |
|
343 |
|:------:|:----:|:-------------:|:---------------:|
|
344 |
+
| 0.6536 | 100 | 2.7722 | 2.8215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
|
346 |
|
347 |
### Framework Versions
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 437967672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06c9fc017185df78269a8c8628dd77893dca26eed157fbe3923e5e455085ecd2
|
3 |
size 437967672
|
runs/Feb06_08-23-57_90c6fdffb148/events.out.tfevents.1738830343.90c6fdffb148.17069.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ab46833711dbb594ff00deef2d2cbfd942305b4f6d6d553c7c6aa7fd94971de
|
3 |
+
size 5254
|