Adil1567 commited on
Commit
c84468d
·
verified ·
1 Parent(s): c85d91f

Training in progress, epoch 1

Browse files
logs/training_log.txt CHANGED
@@ -1,23 +1,53 @@
1
- 2025-01-08 11:02:41,083 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp9dziixt1/test.c -o /tmp/tmp9dziixt1/test.o
2
- 2025-01-08 11:02:41,109 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp9dziixt1/test.o -laio -o /tmp/tmp9dziixt1/a.out
3
- 2025-01-08 11:02:41,269 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpidcu768k/test.c -o /tmp/tmpidcu768k/test.o
4
- 2025-01-08 11:02:41,277 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp2y1e8qh2/test.c -o /tmp/tmp2y1e8qh2/test.o
5
- 2025-01-08 11:02:41,297 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpidcu768k/test.o -laio -o /tmp/tmpidcu768k/a.out
6
- 2025-01-08 11:02:41,302 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp2y1e8qh2/test.o -laio -o /tmp/tmp2y1e8qh2/a.out
7
- 2025-01-08 11:02:41,318 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp24cnxal2/test.c -o /tmp/tmp24cnxal2/test.o
8
- 2025-01-08 11:02:41,344 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp24cnxal2/test.o -laio -o /tmp/tmp24cnxal2/a.out
9
- 2025-01-08 11:02:41,522 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp4xmh24lc/test.c -o /tmp/tmp4xmh24lc/test.o
10
- 2025-01-08 11:02:41,543 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp4xmh24lc/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp4xmh24lc/a.out
11
- 2025-01-08 11:02:41,715 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpaqdlp9co/test.c -o /tmp/tmpaqdlp9co/test.o
12
- 2025-01-08 11:02:41,737 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp30n6krfo/test.c -o /tmp/tmp30n6krfo/test.o
13
- 2025-01-08 11:02:41,743 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpaqdlp9co/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpaqdlp9co/a.out
14
- 2025-01-08 11:02:41,765 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp30n6krfo/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp30n6krfo/a.out
15
- 2025-01-08 11:02:41,818 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmprr4qj1cu/test.c -o /tmp/tmprr4qj1cu/test.o
16
- 2025-01-08 11:02:41,846 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmprr4qj1cu/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmprr4qj1cu/a.out
17
- 2025-01-08 11:06:41,409 - INFO - Training started
18
- 2025-01-08 11:06:41,409 - INFO - Total steps: 5
19
- 2025-01-08 11:11:07,381 - INFO - Step 5/5 (100.0%), loss: 1.7330, learning_rate: 0.00e+00, epoch: 1.0000, step_time: 603.66s, elapsed_time: 603.66s, grad_norm: 1.2133
20
- 2025-01-08 11:14:43,581 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-5/pytorch_model_fsdp_0
21
- 2025-01-08 11:14:46,698 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-5/pytorch_model_fsdp_0
22
- 2025-01-08 11:14:52,142 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-5/optimizer_0
23
- 2025-01-08 11:14:58,294 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-5/optimizer_0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-01-08 11:56:47,176 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpfjh4vmv3/test.c -o /tmp/tmpfjh4vmv3/test.o
2
+ 2025-01-08 11:56:47,202 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpfjh4vmv3/test.o -laio -o /tmp/tmpfjh4vmv3/a.out
3
+ 2025-01-08 11:56:47,286 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmplwmc65tr/test.c -o /tmp/tmplwmc65tr/test.o
4
+ 2025-01-08 11:56:47,295 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpwle52tiy/test.c -o /tmp/tmpwle52tiy/test.o
5
+ 2025-01-08 11:56:47,313 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmplwmc65tr/test.o -laio -o /tmp/tmplwmc65tr/a.out
6
+ 2025-01-08 11:56:47,314 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp3oraphll/test.c -o /tmp/tmp3oraphll/test.o
7
+ 2025-01-08 11:56:47,314 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpwle52tiy/test.o -laio -o /tmp/tmpwle52tiy/a.out
8
+ 2025-01-08 11:56:47,334 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp3oraphll/test.o -laio -o /tmp/tmp3oraphll/a.out
9
+ 2025-01-08 11:56:47,614 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpt_nlf1ik/test.c -o /tmp/tmpt_nlf1ik/test.o
10
+ 2025-01-08 11:56:47,640 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpt_nlf1ik/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpt_nlf1ik/a.out
11
+ 2025-01-08 11:56:47,744 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpzhxypo42/test.c -o /tmp/tmpzhxypo42/test.o
12
+ 2025-01-08 11:56:47,763 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpcqbf3ea7/test.c -o /tmp/tmpcqbf3ea7/test.o
13
+ 2025-01-08 11:56:47,768 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpzhxypo42/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpzhxypo42/a.out
14
+ 2025-01-08 11:56:47,784 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpcqbf3ea7/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpcqbf3ea7/a.out
15
+ 2025-01-08 11:56:47,801 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpg8qdjlt6/test.c -o /tmp/tmpg8qdjlt6/test.o
16
+ 2025-01-08 11:56:47,826 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpg8qdjlt6/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpg8qdjlt6/a.out
17
+ 2025-01-08 12:02:43,814 - INFO - Training started
18
+ 2025-01-08 12:02:43,814 - INFO - Total steps: 2
19
+ 2025-01-08 12:08:39,753 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp6uspoou5/test.c -o /tmp/tmp6uspoou5/test.o
20
+ 2025-01-08 12:08:39,779 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp6uspoou5/test.o -laio -o /tmp/tmp6uspoou5/a.out
21
+ 2025-01-08 12:08:39,923 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp2dn1gkyg/test.c -o /tmp/tmp2dn1gkyg/test.o
22
+ 2025-01-08 12:08:39,941 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp2dn1gkyg/test.o -laio -o /tmp/tmp2dn1gkyg/a.out
23
+ 2025-01-08 12:08:39,949 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmprht5flqq/test.c -o /tmp/tmprht5flqq/test.o
24
+ 2025-01-08 12:08:39,970 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmprht5flqq/test.o -laio -o /tmp/tmprht5flqq/a.out
25
+ 2025-01-08 12:08:40,051 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmprprf_vgp/test.c -o /tmp/tmprprf_vgp/test.o
26
+ 2025-01-08 12:08:40,076 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmprprf_vgp/test.o -laio -o /tmp/tmprprf_vgp/a.out
27
+ 2025-01-08 12:08:40,183 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpvzpvk8rt/test.c -o /tmp/tmpvzpvk8rt/test.o
28
+ 2025-01-08 12:08:40,209 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpvzpvk8rt/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpvzpvk8rt/a.out
29
+ 2025-01-08 12:08:40,370 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpi2rfsg_y/test.c -o /tmp/tmpi2rfsg_y/test.o
30
+ 2025-01-08 12:08:40,397 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpi2rfsg_y/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpi2rfsg_y/a.out
31
+ 2025-01-08 12:08:40,422 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp_jxnwkd3/test.c -o /tmp/tmp_jxnwkd3/test.o
32
+ 2025-01-08 12:08:40,450 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp_jxnwkd3/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp_jxnwkd3/a.out
33
+ 2025-01-08 12:08:40,637 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpwedoozg1/test.c -o /tmp/tmpwedoozg1/test.o
34
+ 2025-01-08 12:08:40,662 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpwedoozg1/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpwedoozg1/a.out
35
+ 2025-01-08 12:13:40,449 - INFO - Training started
36
+ 2025-01-08 12:13:40,449 - INFO - Total steps: 2
37
+ 2025-01-08 12:16:40,735 - INFO - Loss improved from inf to 1.98058
38
+ 2025-01-08 12:16:40,735 - INFO - Loss improved from inf to 1.98058
39
+ 2025-01-08 12:16:40,735 - INFO - Loss improved from inf to 1.98058
40
+ 2025-01-08 12:16:40,737 - INFO - Step 1/2 (50.0%), epoch: 1.0000, step_time: 578.66s, elapsed_time: 578.66s
41
+ 2025-01-08 12:16:40,738 - INFO - Evaluation Results:
42
+ eval_loss: 1.9806
43
+ eval_runtime: 24.7167
44
+ eval_samples_per_second: 0.3240
45
+ eval_steps_per_second: 0.0810
46
+ epoch: 1.0000
47
+ elapsed_time: 578.66s
48
+ step_time: 578.66s
49
+ 2025-01-08 12:16:40,738 - INFO - Loss improved from inf to 1.98058
50
+ 2025-01-08 12:20:41,402 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
51
+ 2025-01-08 12:20:44,678 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
52
+ 2025-01-08 12:20:50,772 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
53
+ 2025-01-08 12:20:57,033 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
runs/Jan08_11-55-09_gpu-server/events.out.tfevents.1736337762.gpu-server.727342.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:594ec92bde44495543826f4e5d64032b46f63401188167f48569f1dd7154ca6a
3
+ size 5607
runs/Jan08_12-07-01_gpu-server/events.out.tfevents.1736338418.gpu-server.744461.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a5857768f9a682c9c49b2c5858d340a0c66cefe23bc49315d7e7cf31163d15
3
+ size 5873
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a72e677e4df9124e54e2c969c197d922385eda98a6934940132e57f367bf2075
3
- size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ed3409351f95a01c5cb5714d45dfe9fdf866624fcf68827bf48c20ab52d5def
3
+ size 5560