Training in progress, epoch 0
Browse files
logs/training_log.txt
CHANGED
@@ -1,58 +1,120 @@
|
|
1 |
-
2025-01-08
|
2 |
-
2025-01-08
|
3 |
-
2025-01-08
|
4 |
-
2025-01-08
|
5 |
-
2025-01-08
|
6 |
-
2025-01-08
|
7 |
-
2025-01-08
|
8 |
-
2025-01-08
|
9 |
-
2025-01-08
|
10 |
-
2025-01-08
|
11 |
-
2025-01-08
|
12 |
-
2025-01-08
|
13 |
-
2025-01-08
|
14 |
-
2025-01-08
|
15 |
-
2025-01-08
|
16 |
-
2025-01-08
|
17 |
-
2025-01-08
|
18 |
-
2025-01-08
|
19 |
-
2025-01-08
|
20 |
-
2025-01-08
|
21 |
-
2025-01-08
|
22 |
-
2025-01-08
|
23 |
-
2025-01-08
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
2025-01-08
|
32 |
-
2025-01-08
|
33 |
-
2025-01-08
|
34 |
-
2025-01-08
|
35 |
-
2025-01-08
|
36 |
-
2025-01-08
|
37 |
-
2025-01-08
|
38 |
-
2025-01-08
|
39 |
-
2025-01-08
|
40 |
-
2025-01-08
|
41 |
-
2025-01-08
|
42 |
-
2025-01-08
|
43 |
-
2025-01-08
|
44 |
-
2025-01-08
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
eval_steps_per_second: 0.0790
|
49 |
-
epoch:
|
50 |
-
elapsed_time:
|
51 |
-
step_time:
|
52 |
-
2025-01-
|
53 |
-
2025-01-
|
54 |
-
2025-01-
|
55 |
-
2025-01-
|
56 |
-
2025-01-
|
57 |
-
2025-01-08 18:49:15,799 - INFO - Step 2/2 (100.0%), epoch: 2.0000, step_time: 219.27s, elapsed_time: 1292.32s
|
58 |
-
2025-01-08 18:49:15,801 - INFO - Training completed in 1292.32 seconds
|
|
|
1 |
+
2025-01-08 19:17:21,692 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp2ck6dpv_/test.c -o /tmp/tmp2ck6dpv_/test.o
|
2 |
+
2025-01-08 19:17:21,723 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp2ck6dpv_/test.o -laio -o /tmp/tmp2ck6dpv_/a.out
|
3 |
+
2025-01-08 19:17:22,159 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmppfomhgow/test.c -o /tmp/tmppfomhgow/test.o
|
4 |
+
2025-01-08 19:17:22,204 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmppfomhgow/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmppfomhgow/a.out
|
5 |
+
2025-01-08 19:17:24,497 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp0g0jxvh5/test.c -o /tmp/tmp0g0jxvh5/test.o
|
6 |
+
2025-01-08 19:17:24,525 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp0g0jxvh5/test.o -laio -o /tmp/tmp0g0jxvh5/a.out
|
7 |
+
2025-01-08 19:17:24,555 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp6e8myn4y/test.c -o /tmp/tmp6e8myn4y/test.o
|
8 |
+
2025-01-08 19:17:24,557 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp7vyq5nz_/test.c -o /tmp/tmp7vyq5nz_/test.o
|
9 |
+
2025-01-08 19:17:24,582 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp6e8myn4y/test.o -laio -o /tmp/tmp6e8myn4y/a.out
|
10 |
+
2025-01-08 19:17:24,583 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp7vyq5nz_/test.o -laio -o /tmp/tmp7vyq5nz_/a.out
|
11 |
+
2025-01-08 19:17:24,960 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp2rrm1y3q/test.c -o /tmp/tmp2rrm1y3q/test.o
|
12 |
+
2025-01-08 19:17:24,983 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpbg_v2wps/test.c -o /tmp/tmpbg_v2wps/test.o
|
13 |
+
2025-01-08 19:17:24,986 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp2rrm1y3q/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp2rrm1y3q/a.out
|
14 |
+
2025-01-08 19:17:25,007 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpbg_v2wps/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpbg_v2wps/a.out
|
15 |
+
2025-01-08 19:17:25,049 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp8gt5q4f1/test.c -o /tmp/tmp8gt5q4f1/test.o
|
16 |
+
2025-01-08 19:17:25,071 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp8gt5q4f1/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp8gt5q4f1/a.out
|
17 |
+
2025-01-08 19:22:03,063 - INFO - Training started
|
18 |
+
2025-01-08 19:22:03,063 - INFO - Total steps: 1281
|
19 |
+
2025-01-08 19:34:20,509 - INFO - Step 5/1281 (0.4%), loss: 1.4755, learning_rate: 1.00e-04, epoch: 0.0117, step_time: 1120.55s, elapsed_time: 1120.55s, grad_norm: 0.8934
|
20 |
+
2025-01-08 19:44:33,351 - INFO - Step 10/1281 (0.8%), loss: 1.0298, learning_rate: 1.00e-04, epoch: 0.0234, step_time: 612.84s, elapsed_time: 1733.39s, grad_norm: 0.7723
|
21 |
+
2025-01-08 19:54:45,845 - INFO - Step 15/1281 (1.2%), loss: 0.8797, learning_rate: 1.00e-04, epoch: 0.0351, step_time: 612.49s, elapsed_time: 2345.88s, grad_norm: 1.5860
|
22 |
+
2025-01-08 20:04:57,722 - INFO - Step 20/1281 (1.6%), loss: 0.7675, learning_rate: 9.99e-05, epoch: 0.0468, step_time: 611.88s, elapsed_time: 2957.76s, grad_norm: 1.6310
|
23 |
+
2025-01-08 20:15:05,543 - INFO - Step 25/1281 (2.0%), loss: 0.7195, learning_rate: 9.99e-05, epoch: 0.0585, step_time: 607.82s, elapsed_time: 3565.58s, grad_norm: 1.2201
|
24 |
+
2025-01-08 20:25:08,368 - INFO - Step 30/1281 (2.3%), loss: 0.6904, learning_rate: 9.99e-05, epoch: 0.0702, step_time: 602.82s, elapsed_time: 4168.40s, grad_norm: 0.6741
|
25 |
+
2025-01-08 20:35:09,287 - INFO - Step 35/1281 (2.7%), loss: 0.6628, learning_rate: 9.98e-05, epoch: 0.0819, step_time: 600.92s, elapsed_time: 4769.32s, grad_norm: 0.5197
|
26 |
+
2025-01-08 20:45:12,080 - INFO - Step 40/1281 (3.1%), loss: 0.6241, learning_rate: 9.98e-05, epoch: 0.0936, step_time: 602.79s, elapsed_time: 5372.12s, grad_norm: 0.4424
|
27 |
+
2025-01-08 20:55:15,658 - INFO - Step 45/1281 (3.5%), loss: 0.6229, learning_rate: 9.97e-05, epoch: 0.1053, step_time: 603.58s, elapsed_time: 5975.69s, grad_norm: 0.4752
|
28 |
+
2025-01-08 21:05:17,810 - INFO - Step 50/1281 (3.9%), loss: 0.5978, learning_rate: 9.96e-05, epoch: 0.1170, step_time: 602.15s, elapsed_time: 6577.85s, grad_norm: 0.3438
|
29 |
+
2025-01-08 21:15:19,753 - INFO - Step 55/1281 (4.3%), loss: 0.5847, learning_rate: 9.95e-05, epoch: 0.1287, step_time: 601.94s, elapsed_time: 7179.79s, grad_norm: 0.3600
|
30 |
+
2025-01-08 21:25:21,775 - INFO - Step 60/1281 (4.7%), loss: 0.5686, learning_rate: 9.95e-05, epoch: 0.1404, step_time: 602.02s, elapsed_time: 7781.81s, grad_norm: 0.3749
|
31 |
+
2025-01-08 21:35:25,945 - INFO - Step 65/1281 (5.1%), loss: 0.5787, learning_rate: 9.94e-05, epoch: 0.1520, step_time: 604.17s, elapsed_time: 8385.98s, grad_norm: 0.3729
|
32 |
+
2025-01-08 21:45:26,630 - INFO - Step 70/1281 (5.5%), loss: 0.5608, learning_rate: 9.93e-05, epoch: 0.1637, step_time: 600.69s, elapsed_time: 8986.67s, grad_norm: 0.3449
|
33 |
+
2025-01-08 21:55:29,457 - INFO - Step 75/1281 (5.9%), loss: 0.5192, learning_rate: 9.92e-05, epoch: 0.1754, step_time: 602.83s, elapsed_time: 9589.49s, grad_norm: 0.3919
|
34 |
+
2025-01-08 22:05:33,796 - INFO - Step 80/1281 (6.2%), loss: 0.5120, learning_rate: 9.90e-05, epoch: 0.1871, step_time: 604.34s, elapsed_time: 10193.83s, grad_norm: 0.3015
|
35 |
+
2025-01-08 22:15:37,562 - INFO - Step 85/1281 (6.6%), loss: 0.4869, learning_rate: 9.89e-05, epoch: 0.1988, step_time: 603.77s, elapsed_time: 10797.60s, grad_norm: 0.2931
|
36 |
+
2025-01-08 22:25:41,155 - INFO - Step 90/1281 (7.0%), loss: 0.4632, learning_rate: 9.88e-05, epoch: 0.2105, step_time: 603.59s, elapsed_time: 11401.19s, grad_norm: 0.3108
|
37 |
+
2025-01-08 22:35:45,900 - INFO - Step 95/1281 (7.4%), loss: 0.4794, learning_rate: 9.86e-05, epoch: 0.2222, step_time: 604.74s, elapsed_time: 12005.94s, grad_norm: 0.3473
|
38 |
+
2025-01-08 22:45:48,393 - INFO - Step 100/1281 (7.8%), loss: 0.4609, learning_rate: 9.85e-05, epoch: 0.2339, step_time: 602.49s, elapsed_time: 12608.43s, grad_norm: 0.2963
|
39 |
+
2025-01-08 22:55:51,306 - INFO - Step 105/1281 (8.2%), loss: 0.4842, learning_rate: 9.84e-05, epoch: 0.2456, step_time: 602.91s, elapsed_time: 13211.34s, grad_norm: 0.2883
|
40 |
+
2025-01-08 23:05:52,794 - INFO - Step 110/1281 (8.6%), loss: 0.4557, learning_rate: 9.82e-05, epoch: 0.2573, step_time: 601.49s, elapsed_time: 13812.83s, grad_norm: 0.2928
|
41 |
+
2025-01-08 23:15:55,888 - INFO - Step 115/1281 (9.0%), loss: 0.4644, learning_rate: 9.80e-05, epoch: 0.2690, step_time: 603.09s, elapsed_time: 14415.92s, grad_norm: 0.2669
|
42 |
+
2025-01-08 23:25:57,881 - INFO - Step 120/1281 (9.4%), loss: 0.4490, learning_rate: 9.79e-05, epoch: 0.2807, step_time: 601.99s, elapsed_time: 15017.92s, grad_norm: 0.3591
|
43 |
+
2025-01-08 23:35:58,659 - INFO - Step 125/1281 (9.8%), loss: 0.4663, learning_rate: 9.77e-05, epoch: 0.2924, step_time: 600.78s, elapsed_time: 15618.70s, grad_norm: 0.2833
|
44 |
+
2025-01-08 23:46:00,173 - INFO - Step 130/1281 (10.1%), loss: 0.4461, learning_rate: 9.75e-05, epoch: 0.3041, step_time: 601.51s, elapsed_time: 16220.21s, grad_norm: 0.2706
|
45 |
+
2025-01-08 23:56:01,734 - INFO - Step 135/1281 (10.5%), loss: 0.4481, learning_rate: 9.73e-05, epoch: 0.3158, step_time: 601.56s, elapsed_time: 16821.77s, grad_norm: 0.2958
|
46 |
+
2025-01-09 00:06:06,317 - INFO - Step 140/1281 (10.9%), loss: 0.4631, learning_rate: 9.71e-05, epoch: 0.3275, step_time: 604.58s, elapsed_time: 17426.35s, grad_norm: 0.2749
|
47 |
+
2025-01-09 00:16:09,098 - INFO - Step 145/1281 (11.3%), loss: 0.4503, learning_rate: 9.69e-05, epoch: 0.3392, step_time: 602.78s, elapsed_time: 18029.13s, grad_norm: 0.3135
|
48 |
+
2025-01-09 00:26:11,816 - INFO - Step 150/1281 (11.7%), loss: 0.4389, learning_rate: 9.67e-05, epoch: 0.3509, step_time: 602.72s, elapsed_time: 18631.85s, grad_norm: 0.2961
|
49 |
+
2025-01-09 00:36:12,847 - INFO - Step 155/1281 (12.1%), loss: 0.4391, learning_rate: 9.64e-05, epoch: 0.3626, step_time: 601.03s, elapsed_time: 19232.88s, grad_norm: 0.2587
|
50 |
+
2025-01-09 00:46:15,889 - INFO - Step 160/1281 (12.5%), loss: 0.4372, learning_rate: 9.62e-05, epoch: 0.3743, step_time: 603.04s, elapsed_time: 19835.93s, grad_norm: 0.2949
|
51 |
+
2025-01-09 00:56:18,225 - INFO - Step 165/1281 (12.9%), loss: 0.4333, learning_rate: 9.60e-05, epoch: 0.3860, step_time: 602.34s, elapsed_time: 20438.26s, grad_norm: 0.2650
|
52 |
+
2025-01-09 01:06:21,912 - INFO - Step 170/1281 (13.3%), loss: 0.4352, learning_rate: 9.57e-05, epoch: 0.3977, step_time: 603.69s, elapsed_time: 21041.95s, grad_norm: 0.2787
|
53 |
+
2025-01-09 01:16:26,354 - INFO - Step 175/1281 (13.7%), loss: 0.4215, learning_rate: 9.55e-05, epoch: 0.4094, step_time: 604.44s, elapsed_time: 21646.39s, grad_norm: 0.2737
|
54 |
+
2025-01-09 01:26:27,417 - INFO - Step 180/1281 (14.1%), loss: 0.4382, learning_rate: 9.52e-05, epoch: 0.4211, step_time: 601.06s, elapsed_time: 22247.45s, grad_norm: 0.2691
|
55 |
+
2025-01-09 01:36:29,557 - INFO - Step 185/1281 (14.4%), loss: 0.4456, learning_rate: 9.49e-05, epoch: 0.4327, step_time: 602.14s, elapsed_time: 22849.59s, grad_norm: 0.2718
|
56 |
+
2025-01-09 01:46:30,681 - INFO - Step 190/1281 (14.8%), loss: 0.4134, learning_rate: 9.47e-05, epoch: 0.4444, step_time: 601.12s, elapsed_time: 23450.72s, grad_norm: 0.2703
|
57 |
+
2025-01-09 01:56:32,016 - INFO - Step 195/1281 (15.2%), loss: 0.4200, learning_rate: 9.44e-05, epoch: 0.4561, step_time: 601.33s, elapsed_time: 24052.05s, grad_norm: 0.2519
|
58 |
+
2025-01-09 02:06:34,240 - INFO - Step 200/1281 (15.6%), loss: 0.4261, learning_rate: 9.41e-05, epoch: 0.4678, step_time: 602.22s, elapsed_time: 24654.28s, grad_norm: 0.3421
|
59 |
+
2025-01-09 02:16:36,844 - INFO - Step 205/1281 (16.0%), loss: 0.3964, learning_rate: 9.38e-05, epoch: 0.4795, step_time: 602.60s, elapsed_time: 25256.88s, grad_norm: 0.2663
|
60 |
+
2025-01-09 02:26:39,776 - INFO - Step 210/1281 (16.4%), loss: 0.4266, learning_rate: 9.35e-05, epoch: 0.4912, step_time: 602.93s, elapsed_time: 25859.81s, grad_norm: 0.2692
|
61 |
+
2025-01-09 02:36:42,346 - INFO - Step 215/1281 (16.8%), loss: 0.4340, learning_rate: 9.32e-05, epoch: 0.5029, step_time: 602.57s, elapsed_time: 26462.38s, grad_norm: 0.2842
|
62 |
+
2025-01-09 02:46:44,912 - INFO - Step 220/1281 (17.2%), loss: 0.4246, learning_rate: 9.29e-05, epoch: 0.5146, step_time: 602.57s, elapsed_time: 27064.95s, grad_norm: 0.4175
|
63 |
+
2025-01-09 02:56:48,074 - INFO - Step 225/1281 (17.6%), loss: 0.4436, learning_rate: 9.26e-05, epoch: 0.5263, step_time: 603.16s, elapsed_time: 27668.11s, grad_norm: 0.2852
|
64 |
+
2025-01-09 03:06:49,000 - INFO - Step 230/1281 (18.0%), loss: 0.4152, learning_rate: 9.23e-05, epoch: 0.5380, step_time: 600.93s, elapsed_time: 28269.04s, grad_norm: 0.2848
|
65 |
+
2025-01-09 03:16:50,893 - INFO - Step 235/1281 (18.3%), loss: 0.4013, learning_rate: 9.19e-05, epoch: 0.5497, step_time: 601.89s, elapsed_time: 28870.93s, grad_norm: 0.2704
|
66 |
+
2025-01-09 03:26:53,653 - INFO - Step 240/1281 (18.7%), loss: 0.3941, learning_rate: 9.16e-05, epoch: 0.5614, step_time: 602.76s, elapsed_time: 29473.69s, grad_norm: 0.2616
|
67 |
+
2025-01-09 03:36:53,946 - INFO - Step 245/1281 (19.1%), loss: 0.4165, learning_rate: 9.12e-05, epoch: 0.5731, step_time: 600.29s, elapsed_time: 30073.98s, grad_norm: 0.2544
|
68 |
+
2025-01-09 03:46:56,614 - INFO - Step 250/1281 (19.5%), loss: 0.4177, learning_rate: 9.09e-05, epoch: 0.5848, step_time: 602.67s, elapsed_time: 30676.65s, grad_norm: 0.2776
|
69 |
+
2025-01-09 03:56:57,796 - INFO - Step 255/1281 (19.9%), loss: 0.4018, learning_rate: 9.05e-05, epoch: 0.5965, step_time: 601.18s, elapsed_time: 31277.83s, grad_norm: 0.2499
|
70 |
+
2025-01-09 04:07:00,066 - INFO - Step 260/1281 (20.3%), loss: 0.4138, learning_rate: 9.02e-05, epoch: 0.6082, step_time: 602.27s, elapsed_time: 31880.10s, grad_norm: 0.2693
|
71 |
+
2025-01-09 04:17:02,224 - INFO - Step 265/1281 (20.7%), loss: 0.3984, learning_rate: 8.98e-05, epoch: 0.6199, step_time: 602.16s, elapsed_time: 32482.26s, grad_norm: 0.2744
|
72 |
+
2025-01-09 04:27:03,630 - INFO - Step 270/1281 (21.1%), loss: 0.4269, learning_rate: 8.94e-05, epoch: 0.6316, step_time: 601.41s, elapsed_time: 33083.67s, grad_norm: 0.2762
|
73 |
+
2025-01-09 04:37:06,555 - INFO - Step 275/1281 (21.5%), loss: 0.3986, learning_rate: 8.91e-05, epoch: 0.6433, step_time: 602.93s, elapsed_time: 33686.59s, grad_norm: 0.2647
|
74 |
+
2025-01-09 04:47:08,811 - INFO - Step 280/1281 (21.9%), loss: 0.4057, learning_rate: 8.87e-05, epoch: 0.6550, step_time: 602.26s, elapsed_time: 34288.85s, grad_norm: 0.2787
|
75 |
+
2025-01-09 04:57:11,235 - INFO - Step 285/1281 (22.2%), loss: 0.4143, learning_rate: 8.83e-05, epoch: 0.6667, step_time: 602.42s, elapsed_time: 34891.27s, grad_norm: 0.3001
|
76 |
+
2025-01-09 05:07:12,645 - INFO - Step 290/1281 (22.6%), loss: 0.4012, learning_rate: 8.79e-05, epoch: 0.6784, step_time: 601.41s, elapsed_time: 35492.68s, grad_norm: 0.2544
|
77 |
+
2025-01-09 05:17:14,293 - INFO - Step 295/1281 (23.0%), loss: 0.3942, learning_rate: 8.75e-05, epoch: 0.6901, step_time: 601.65s, elapsed_time: 36094.33s, grad_norm: 0.2604
|
78 |
+
2025-01-09 05:27:17,925 - INFO - Step 300/1281 (23.4%), loss: 0.3974, learning_rate: 8.71e-05, epoch: 0.7018, step_time: 603.63s, elapsed_time: 36697.96s, grad_norm: 0.2718
|
79 |
+
2025-01-09 05:37:19,535 - INFO - Step 305/1281 (23.8%), loss: 0.3967, learning_rate: 8.67e-05, epoch: 0.7135, step_time: 601.61s, elapsed_time: 37299.57s, grad_norm: 0.2717
|
80 |
+
2025-01-09 05:47:20,092 - INFO - Step 310/1281 (24.2%), loss: 0.3765, learning_rate: 8.62e-05, epoch: 0.7251, step_time: 600.56s, elapsed_time: 37900.13s, grad_norm: 0.2735
|
81 |
+
2025-01-09 05:57:20,851 - INFO - Step 315/1281 (24.6%), loss: 0.4131, learning_rate: 8.58e-05, epoch: 0.7368, step_time: 600.76s, elapsed_time: 38500.89s, grad_norm: 0.2609
|
82 |
+
2025-01-09 06:07:22,985 - INFO - Step 320/1281 (25.0%), loss: 0.3945, learning_rate: 8.54e-05, epoch: 0.7485, step_time: 602.13s, elapsed_time: 39103.02s, grad_norm: 0.2507
|
83 |
+
2025-01-09 06:17:24,449 - INFO - Step 325/1281 (25.4%), loss: 0.3916, learning_rate: 8.49e-05, epoch: 0.7602, step_time: 601.46s, elapsed_time: 39704.49s, grad_norm: 0.2386
|
84 |
+
2025-01-09 06:27:25,872 - INFO - Step 330/1281 (25.8%), loss: 0.3894, learning_rate: 8.45e-05, epoch: 0.7719, step_time: 601.42s, elapsed_time: 40305.91s, grad_norm: 0.2645
|
85 |
+
2025-01-09 06:37:27,281 - INFO - Step 335/1281 (26.2%), loss: 0.3955, learning_rate: 8.41e-05, epoch: 0.7836, step_time: 601.41s, elapsed_time: 40907.32s, grad_norm: 0.2722
|
86 |
+
2025-01-09 06:47:28,321 - INFO - Step 340/1281 (26.5%), loss: 0.3725, learning_rate: 8.36e-05, epoch: 0.7953, step_time: 601.04s, elapsed_time: 41508.36s, grad_norm: 0.2430
|
87 |
+
2025-01-09 06:57:30,311 - INFO - Step 345/1281 (26.9%), loss: 0.3883, learning_rate: 8.31e-05, epoch: 0.8070, step_time: 601.99s, elapsed_time: 42110.35s, grad_norm: 0.2525
|
88 |
+
2025-01-09 07:07:32,983 - INFO - Step 350/1281 (27.3%), loss: 0.3883, learning_rate: 8.27e-05, epoch: 0.8187, step_time: 602.67s, elapsed_time: 42713.02s, grad_norm: 0.2387
|
89 |
+
2025-01-09 07:17:34,098 - INFO - Step 355/1281 (27.7%), loss: 0.3906, learning_rate: 8.22e-05, epoch: 0.8304, step_time: 601.12s, elapsed_time: 43314.13s, grad_norm: 0.2725
|
90 |
+
2025-01-09 07:27:37,098 - INFO - Step 360/1281 (28.1%), loss: 0.3751, learning_rate: 8.17e-05, epoch: 0.8421, step_time: 603.00s, elapsed_time: 43917.13s, grad_norm: 0.2814
|
91 |
+
2025-01-09 07:37:37,150 - INFO - Step 365/1281 (28.5%), loss: 0.3858, learning_rate: 8.13e-05, epoch: 0.8538, step_time: 600.05s, elapsed_time: 44517.19s, grad_norm: 0.2561
|
92 |
+
2025-01-09 07:47:40,487 - INFO - Step 370/1281 (28.9%), loss: 0.3629, learning_rate: 8.08e-05, epoch: 0.8655, step_time: 603.34s, elapsed_time: 45120.52s, grad_norm: 0.2712
|
93 |
+
2025-01-09 07:57:41,870 - INFO - Step 375/1281 (29.3%), loss: 0.3733, learning_rate: 8.03e-05, epoch: 0.8772, step_time: 601.38s, elapsed_time: 45721.91s, grad_norm: 0.2457
|
94 |
+
2025-01-09 08:07:42,687 - INFO - Step 380/1281 (29.7%), loss: 0.3691, learning_rate: 7.98e-05, epoch: 0.8889, step_time: 600.82s, elapsed_time: 46322.72s, grad_norm: 0.2544
|
95 |
+
2025-01-09 08:17:46,148 - INFO - Step 385/1281 (30.1%), loss: 0.3768, learning_rate: 7.93e-05, epoch: 0.9006, step_time: 603.46s, elapsed_time: 46926.18s, grad_norm: 0.2821
|
96 |
+
2025-01-09 08:27:49,374 - INFO - Step 390/1281 (30.4%), loss: 0.3914, learning_rate: 7.88e-05, epoch: 0.9123, step_time: 603.23s, elapsed_time: 47529.41s, grad_norm: 0.2370
|
97 |
+
2025-01-09 08:37:51,424 - INFO - Step 395/1281 (30.8%), loss: 0.3796, learning_rate: 7.83e-05, epoch: 0.9240, step_time: 602.05s, elapsed_time: 48131.46s, grad_norm: 0.2675
|
98 |
+
2025-01-09 08:47:53,337 - INFO - Step 400/1281 (31.2%), loss: 0.3701, learning_rate: 7.78e-05, epoch: 0.9357, step_time: 601.91s, elapsed_time: 48733.37s, grad_norm: 0.2477
|
99 |
+
2025-01-09 08:57:57,081 - INFO - Step 405/1281 (31.6%), loss: 0.3703, learning_rate: 7.73e-05, epoch: 0.9474, step_time: 603.74s, elapsed_time: 49337.12s, grad_norm: 0.2288
|
100 |
+
2025-01-09 09:07:58,310 - INFO - Step 410/1281 (32.0%), loss: 0.3958, learning_rate: 7.68e-05, epoch: 0.9591, step_time: 601.23s, elapsed_time: 49938.35s, grad_norm: 0.2681
|
101 |
+
2025-01-09 09:17:59,916 - INFO - Step 415/1281 (32.4%), loss: 0.3704, learning_rate: 7.63e-05, epoch: 0.9708, step_time: 601.61s, elapsed_time: 50539.95s, grad_norm: 0.2619
|
102 |
+
2025-01-09 09:28:02,267 - INFO - Step 420/1281 (32.8%), loss: 0.3609, learning_rate: 7.57e-05, epoch: 0.9825, step_time: 602.35s, elapsed_time: 51142.30s, grad_norm: 0.2586
|
103 |
+
2025-01-09 09:38:04,706 - INFO - Step 425/1281 (33.2%), loss: 0.3553, learning_rate: 7.52e-05, epoch: 0.9942, step_time: 602.44s, elapsed_time: 51744.74s, grad_norm: 0.2764
|
104 |
+
2025-01-09 11:03:43,855 - INFO - Loss improved from inf to 0.37839
|
105 |
+
2025-01-09 11:03:43,855 - INFO - Loss improved from inf to 0.37839
|
106 |
+
2025-01-09 11:03:43,855 - INFO - Loss improved from inf to 0.37839
|
107 |
+
2025-01-09 11:03:43,856 - INFO - Step 427/1281 (33.3%), epoch: 0.9988, step_time: 5139.15s, elapsed_time: 56883.89s
|
108 |
+
2025-01-09 11:03:43,858 - INFO - Evaluation Results:
|
109 |
+
eval_loss: 0.3784
|
110 |
+
eval_runtime: 4839.9190
|
111 |
+
eval_samples_per_second: 0.3140
|
112 |
eval_steps_per_second: 0.0790
|
113 |
+
epoch: 0.9988
|
114 |
+
elapsed_time: 56883.89s
|
115 |
+
step_time: 5139.15s
|
116 |
+
2025-01-09 11:03:43,858 - INFO - Loss improved from inf to 0.37839
|
117 |
+
2025-01-09 11:07:38,811 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-427/pytorch_model_fsdp_0
|
118 |
+
2025-01-09 11:07:41,993 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-427/pytorch_model_fsdp_0
|
119 |
+
2025-01-09 11:07:48,542 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-427/optimizer_0
|
120 |
+
2025-01-09 11:07:54,762 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-427/optimizer_0
|
|
|
|
runs/Jan08_19-15-39_gpu-server/events.out.tfevents.1736364121.gpu-server.1095267.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:541a840766d43f95086492b9860aca0e0b96e56395216e99c70d250b0e305019
|
3 |
+
size 23713
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45369fb129012ba11d00d00653f2cc946852e10c134daf9fc8b8c017c046bdbf
|
3 |
size 5560
|