Training in progress, epoch 1
Browse files
logs/training_log.txt
CHANGED
@@ -1,41 +1,35 @@
|
|
1 |
-
2025-01-08 13:
|
2 |
-
2025-01-08 13:
|
3 |
-
2025-01-08 13:
|
4 |
-
2025-01-08 13:
|
5 |
-
2025-01-08 13:
|
6 |
-
2025-01-08 13:
|
7 |
-
2025-01-08 13:
|
8 |
-
2025-01-08 13:
|
9 |
-
2025-01-08 13:
|
10 |
-
2025-01-08 13:
|
11 |
-
2025-01-08 13:
|
12 |
-
2025-01-08 13:
|
13 |
-
2025-01-08 13:
|
14 |
-
2025-01-08 13:
|
15 |
-
2025-01-08 13:
|
16 |
-
2025-01-08 13:
|
17 |
-
2025-01-08 13:
|
18 |
-
2025-01-08 13:
|
19 |
-
2025-01-08 13:
|
20 |
-
2025-01-08 13:
|
21 |
-
2025-01-08 13:
|
22 |
-
2025-01-08 13:
|
23 |
-
2025-01-08 13:
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
eval_loss: 1.9796
|
29 |
-
eval_runtime: 23.7191
|
30 |
-
eval_samples_per_second: 0.3370
|
31 |
-
eval_steps_per_second: 0.0840
|
32 |
epoch: 1.0000
|
33 |
-
elapsed_time:
|
34 |
-
step_time:
|
35 |
-
2025-01-08 13:
|
36 |
-
2025-01-08 13:
|
37 |
-
2025-01-08 13:
|
38 |
-
2025-01-08 13:
|
39 |
-
2025-01-08 13:
|
40 |
-
2025-01-08 13:24:55,351 - INFO - Step 1/1 (100.0%), epoch: 1.0000, step_time: 75.98s, elapsed_time: 569.81s
|
41 |
-
2025-01-08 13:24:55,353 - INFO - Training completed in 569.81 seconds
|
|
|
1 |
+
2025-01-08 13:49:01,305 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpejuv75wg/test.c -o /tmp/tmpejuv75wg/test.o
|
2 |
+
2025-01-08 13:49:01,329 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpejuv75wg/test.o -laio -o /tmp/tmpejuv75wg/a.out
|
3 |
+
2025-01-08 13:49:01,445 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmph1wqufet/test.c -o /tmp/tmph1wqufet/test.o
|
4 |
+
2025-01-08 13:49:01,461 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpn7ir_9h3/test.c -o /tmp/tmpn7ir_9h3/test.o
|
5 |
+
2025-01-08 13:49:01,462 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmplbt1g8ij/test.c -o /tmp/tmplbt1g8ij/test.o
|
6 |
+
2025-01-08 13:49:01,470 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmph1wqufet/test.o -laio -o /tmp/tmph1wqufet/a.out
|
7 |
+
2025-01-08 13:49:01,490 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmplbt1g8ij/test.o -laio -o /tmp/tmplbt1g8ij/a.out
|
8 |
+
2025-01-08 13:49:01,490 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpn7ir_9h3/test.o -laio -o /tmp/tmpn7ir_9h3/a.out
|
9 |
+
2025-01-08 13:49:01,791 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp387_6tzl/test.c -o /tmp/tmp387_6tzl/test.o
|
10 |
+
2025-01-08 13:49:01,819 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp387_6tzl/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp387_6tzl/a.out
|
11 |
+
2025-01-08 13:49:01,929 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpbb4cu51n/test.c -o /tmp/tmpbb4cu51n/test.o
|
12 |
+
2025-01-08 13:49:01,940 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp6me18vjb/test.c -o /tmp/tmp6me18vjb/test.o
|
13 |
+
2025-01-08 13:49:01,951 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpbb4cu51n/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpbb4cu51n/a.out
|
14 |
+
2025-01-08 13:49:01,961 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpu5nbozol/test.c -o /tmp/tmpu5nbozol/test.o
|
15 |
+
2025-01-08 13:49:01,966 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp6me18vjb/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp6me18vjb/a.out
|
16 |
+
2025-01-08 13:49:01,985 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpu5nbozol/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpu5nbozol/a.out
|
17 |
+
2025-01-08 13:51:07,759 - INFO - Training started
|
18 |
+
2025-01-08 13:51:07,759 - INFO - Total steps: 2
|
19 |
+
2025-01-08 13:53:58,210 - INFO - Loss improved from inf to 1.97976
|
20 |
+
2025-01-08 13:53:58,210 - INFO - Loss improved from inf to 1.97976
|
21 |
+
2025-01-08 13:53:58,210 - INFO - Loss improved from inf to 1.97976
|
22 |
+
2025-01-08 13:53:58,212 - INFO - Step 1/2 (50.0%), epoch: 1.0000, step_time: 394.79s, elapsed_time: 394.79s
|
23 |
+
2025-01-08 13:53:58,213 - INFO - Evaluation Results:
|
24 |
+
eval_loss: 1.9798
|
25 |
+
eval_runtime: 23.5550
|
26 |
+
eval_samples_per_second: 0.3400
|
27 |
+
eval_steps_per_second: 0.0850
|
|
|
|
|
|
|
|
|
28 |
epoch: 1.0000
|
29 |
+
elapsed_time: 394.79s
|
30 |
+
step_time: 394.79s
|
31 |
+
2025-01-08 13:53:58,213 - INFO - Loss improved from inf to 1.97976
|
32 |
+
2025-01-08 13:54:59,859 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
|
33 |
+
2025-01-08 13:55:03,136 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
|
34 |
+
2025-01-08 13:55:08,046 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
|
35 |
+
2025-01-08 13:55:14,259 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
|
|
|
|
runs/Jan08_13-47-23_gpu-server/events.out.tfevents.1736344266.gpu-server.882619.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dc1e98fd384467f592194e00f17315e978da77c8188ec70ec9d163ed6d145e6
|
3 |
+
size 5873
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c371cb2d8df70dd7c76903b3ee3adcead61312f419be560aae3002d5638c614
|
3 |
size 5560
|