Training in progress, step 280
Browse files
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 174655536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abb006299516fc0ae808037a652e46dedb7eb19af599c01dac67689022b14def
|
3 |
size 174655536
|
wandb/run-20250202_235451-rfjfhgaw/files/output.log
CHANGED
@@ -214,3 +214,16 @@ The model is not an instance of PreTrainedModel. No liger kernels will be applie
|
|
214 |
{'loss': 0.126, 'grad_norm': 1.300278663635254, 'learning_rate': 8.687862490269232e-06, 'epoch': 0.72}
|
215 |
with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
|
216 |
{'eval_loss': 0.15854938328266144, 'eval_runtime': 23.1914, 'eval_samples_per_second': 21.56, 'eval_steps_per_second': 0.906, 'epoch': 0.72}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
{'loss': 0.126, 'grad_norm': 1.300278663635254, 'learning_rate': 8.687862490269232e-06, 'epoch': 0.72}
|
215 |
with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
|
216 |
{'eval_loss': 0.15854938328266144, 'eval_runtime': 23.1914, 'eval_samples_per_second': 21.56, 'eval_steps_per_second': 0.906, 'epoch': 0.72}
|
217 |
+
78%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 280/360 [2:19:08<36:48, 27.61s/it]/usr/local/lib/python3.11/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.
|
218 |
+
{'loss': 0.1235, 'grad_norm': 1.0385386943817139, 'learning_rate': 8.370158574239466e-06, 'epoch': 0.73}
|
219 |
+
{'loss': 0.138, 'grad_norm': 1.8568216562271118, 'learning_rate': 8.056828165944282e-06, 'epoch': 0.73}
|
220 |
+
{'loss': 0.1387, 'grad_norm': 1.05785071849823, 'learning_rate': 7.747989096135943e-06, 'epoch': 0.74}
|
221 |
+
{'loss': 0.1348, 'grad_norm': 1.2209707498550415, 'learning_rate': 7.443757506558033e-06, 'epoch': 0.74}
|
222 |
+
{'loss': 0.1436, 'grad_norm': 2.2911832332611084, 'learning_rate': 7.1442478062692135e-06, 'epoch': 0.75}
|
223 |
+
{'loss': 0.1276, 'grad_norm': 1.4366707801818848, 'learning_rate': 6.84957262861873e-06, 'epoch': 0.76}
|
224 |
+
{'loss': 0.132, 'grad_norm': 1.3975704908370972, 'learning_rate': 6.559842788889552e-06, 'epoch': 0.76}
|
225 |
+
{'loss': 0.121, 'grad_norm': 1.098695993423462, 'learning_rate': 6.275167242625331e-06, 'epoch': 0.77}
|
226 |
+
{'loss': 0.1347, 'grad_norm': 2.457047939300537, 'learning_rate': 5.9956530446566305e-06, 'epoch': 0.77}
|
227 |
+
{'loss': 0.1298, 'grad_norm': 0.8969805240631104, 'learning_rate': 5.721405308842023e-06, 'epoch': 0.78}
|
228 |
+
with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
|
229 |
+
{'eval_loss': 0.15774241089820862, 'eval_runtime': 23.2641, 'eval_samples_per_second': 21.492, 'eval_steps_per_second': 0.903, 'epoch': 0.78}
|
wandb/run-20250202_235451-rfjfhgaw/run-rfjfhgaw.wandb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbe02a8ca2c36e8605f62acdb0cf92b568b0b38b835147a73473da21c6d33ab0
|
3 |
+
size 3244032
|