Training in progress, epoch 1
Browse files
logs/training_log.txt
CHANGED
@@ -1,23 +1,53 @@
|
|
1 |
-
2025-01-08 11:
|
2 |
-
2025-01-08 11:
|
3 |
-
2025-01-08 11:
|
4 |
-
2025-01-08 11:
|
5 |
-
2025-01-08 11:
|
6 |
-
2025-01-08 11:
|
7 |
-
2025-01-08 11:
|
8 |
-
2025-01-08 11:
|
9 |
-
2025-01-08 11:
|
10 |
-
2025-01-08 11:
|
11 |
-
2025-01-08 11:
|
12 |
-
2025-01-08 11:
|
13 |
-
2025-01-08 11:
|
14 |
-
2025-01-08 11:
|
15 |
-
2025-01-08 11:
|
16 |
-
2025-01-08 11:
|
17 |
-
2025-01-08
|
18 |
-
2025-01-08
|
19 |
-
2025-01-08
|
20 |
-
2025-01-08
|
21 |
-
2025-01-08
|
22 |
-
2025-01-08
|
23 |
-
2025-01-08
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-01-08 11:56:47,176 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpfjh4vmv3/test.c -o /tmp/tmpfjh4vmv3/test.o
|
2 |
+
2025-01-08 11:56:47,202 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpfjh4vmv3/test.o -laio -o /tmp/tmpfjh4vmv3/a.out
|
3 |
+
2025-01-08 11:56:47,286 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmplwmc65tr/test.c -o /tmp/tmplwmc65tr/test.o
|
4 |
+
2025-01-08 11:56:47,295 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpwle52tiy/test.c -o /tmp/tmpwle52tiy/test.o
|
5 |
+
2025-01-08 11:56:47,313 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmplwmc65tr/test.o -laio -o /tmp/tmplwmc65tr/a.out
|
6 |
+
2025-01-08 11:56:47,314 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp3oraphll/test.c -o /tmp/tmp3oraphll/test.o
|
7 |
+
2025-01-08 11:56:47,314 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpwle52tiy/test.o -laio -o /tmp/tmpwle52tiy/a.out
|
8 |
+
2025-01-08 11:56:47,334 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp3oraphll/test.o -laio -o /tmp/tmp3oraphll/a.out
|
9 |
+
2025-01-08 11:56:47,614 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpt_nlf1ik/test.c -o /tmp/tmpt_nlf1ik/test.o
|
10 |
+
2025-01-08 11:56:47,640 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpt_nlf1ik/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpt_nlf1ik/a.out
|
11 |
+
2025-01-08 11:56:47,744 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpzhxypo42/test.c -o /tmp/tmpzhxypo42/test.o
|
12 |
+
2025-01-08 11:56:47,763 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpcqbf3ea7/test.c -o /tmp/tmpcqbf3ea7/test.o
|
13 |
+
2025-01-08 11:56:47,768 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpzhxypo42/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpzhxypo42/a.out
|
14 |
+
2025-01-08 11:56:47,784 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpcqbf3ea7/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpcqbf3ea7/a.out
|
15 |
+
2025-01-08 11:56:47,801 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpg8qdjlt6/test.c -o /tmp/tmpg8qdjlt6/test.o
|
16 |
+
2025-01-08 11:56:47,826 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpg8qdjlt6/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpg8qdjlt6/a.out
|
17 |
+
2025-01-08 12:02:43,814 - INFO - Training started
|
18 |
+
2025-01-08 12:02:43,814 - INFO - Total steps: 2
|
19 |
+
2025-01-08 12:08:39,753 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp6uspoou5/test.c -o /tmp/tmp6uspoou5/test.o
|
20 |
+
2025-01-08 12:08:39,779 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp6uspoou5/test.o -laio -o /tmp/tmp6uspoou5/a.out
|
21 |
+
2025-01-08 12:08:39,923 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp2dn1gkyg/test.c -o /tmp/tmp2dn1gkyg/test.o
|
22 |
+
2025-01-08 12:08:39,941 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp2dn1gkyg/test.o -laio -o /tmp/tmp2dn1gkyg/a.out
|
23 |
+
2025-01-08 12:08:39,949 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmprht5flqq/test.c -o /tmp/tmprht5flqq/test.o
|
24 |
+
2025-01-08 12:08:39,970 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmprht5flqq/test.o -laio -o /tmp/tmprht5flqq/a.out
|
25 |
+
2025-01-08 12:08:40,051 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmprprf_vgp/test.c -o /tmp/tmprprf_vgp/test.o
|
26 |
+
2025-01-08 12:08:40,076 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmprprf_vgp/test.o -laio -o /tmp/tmprprf_vgp/a.out
|
27 |
+
2025-01-08 12:08:40,183 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpvzpvk8rt/test.c -o /tmp/tmpvzpvk8rt/test.o
|
28 |
+
2025-01-08 12:08:40,209 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpvzpvk8rt/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpvzpvk8rt/a.out
|
29 |
+
2025-01-08 12:08:40,370 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpi2rfsg_y/test.c -o /tmp/tmpi2rfsg_y/test.o
|
30 |
+
2025-01-08 12:08:40,397 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpi2rfsg_y/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpi2rfsg_y/a.out
|
31 |
+
2025-01-08 12:08:40,422 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp_jxnwkd3/test.c -o /tmp/tmp_jxnwkd3/test.o
|
32 |
+
2025-01-08 12:08:40,450 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp_jxnwkd3/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp_jxnwkd3/a.out
|
33 |
+
2025-01-08 12:08:40,637 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpwedoozg1/test.c -o /tmp/tmpwedoozg1/test.o
|
34 |
+
2025-01-08 12:08:40,662 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpwedoozg1/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpwedoozg1/a.out
|
35 |
+
2025-01-08 12:13:40,449 - INFO - Training started
|
36 |
+
2025-01-08 12:13:40,449 - INFO - Total steps: 2
|
37 |
+
2025-01-08 12:16:40,735 - INFO - Loss improved from inf to 1.98058
|
38 |
+
2025-01-08 12:16:40,735 - INFO - Loss improved from inf to 1.98058
|
39 |
+
2025-01-08 12:16:40,735 - INFO - Loss improved from inf to 1.98058
|
40 |
+
2025-01-08 12:16:40,737 - INFO - Step 1/2 (50.0%), epoch: 1.0000, step_time: 578.66s, elapsed_time: 578.66s
|
41 |
+
2025-01-08 12:16:40,738 - INFO - Evaluation Results:
|
42 |
+
eval_loss: 1.9806
|
43 |
+
eval_runtime: 24.7167
|
44 |
+
eval_samples_per_second: 0.3240
|
45 |
+
eval_steps_per_second: 0.0810
|
46 |
+
epoch: 1.0000
|
47 |
+
elapsed_time: 578.66s
|
48 |
+
step_time: 578.66s
|
49 |
+
2025-01-08 12:16:40,738 - INFO - Loss improved from inf to 1.98058
|
50 |
+
2025-01-08 12:20:41,402 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
|
51 |
+
2025-01-08 12:20:44,678 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
|
52 |
+
2025-01-08 12:20:50,772 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
|
53 |
+
2025-01-08 12:20:57,033 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
|
runs/Jan08_11-55-09_gpu-server/events.out.tfevents.1736337762.gpu-server.727342.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:594ec92bde44495543826f4e5d64032b46f63401188167f48569f1dd7154ca6a
|
3 |
+
size 5607
|
runs/Jan08_12-07-01_gpu-server/events.out.tfevents.1736338418.gpu-server.744461.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7a5857768f9a682c9c49b2c5858d340a0c66cefe23bc49315d7e7cf31163d15
|
3 |
+
size 5873
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ed3409351f95a01c5cb5714d45dfe9fdf866624fcf68827bf48c20ab52d5def
|
3 |
+
size 5560
|