Adil1567 commited on
Commit
40d29dc
·
verified ·
1 Parent(s): 41b3211

Training in progress, epoch 1

Browse files
logs/training_log.txt CHANGED
@@ -1,58 +1,35 @@
1
- 2025-01-08 13:49:01,305 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpejuv75wg/test.c -o /tmp/tmpejuv75wg/test.o
2
- 2025-01-08 13:49:01,329 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpejuv75wg/test.o -laio -o /tmp/tmpejuv75wg/a.out
3
- 2025-01-08 13:49:01,445 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmph1wqufet/test.c -o /tmp/tmph1wqufet/test.o
4
- 2025-01-08 13:49:01,461 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpn7ir_9h3/test.c -o /tmp/tmpn7ir_9h3/test.o
5
- 2025-01-08 13:49:01,462 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmplbt1g8ij/test.c -o /tmp/tmplbt1g8ij/test.o
6
- 2025-01-08 13:49:01,470 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmph1wqufet/test.o -laio -o /tmp/tmph1wqufet/a.out
7
- 2025-01-08 13:49:01,490 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmplbt1g8ij/test.o -laio -o /tmp/tmplbt1g8ij/a.out
8
- 2025-01-08 13:49:01,490 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpn7ir_9h3/test.o -laio -o /tmp/tmpn7ir_9h3/a.out
9
- 2025-01-08 13:49:01,791 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp387_6tzl/test.c -o /tmp/tmp387_6tzl/test.o
10
- 2025-01-08 13:49:01,819 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp387_6tzl/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp387_6tzl/a.out
11
- 2025-01-08 13:49:01,929 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpbb4cu51n/test.c -o /tmp/tmpbb4cu51n/test.o
12
- 2025-01-08 13:49:01,940 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmp6me18vjb/test.c -o /tmp/tmp6me18vjb/test.o
13
- 2025-01-08 13:49:01,951 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpbb4cu51n/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpbb4cu51n/a.out
14
- 2025-01-08 13:49:01,961 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpu5nbozol/test.c -o /tmp/tmpu5nbozol/test.o
15
- 2025-01-08 13:49:01,966 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmp6me18vjb/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmp6me18vjb/a.out
16
- 2025-01-08 13:49:01,985 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpu5nbozol/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpu5nbozol/a.out
17
- 2025-01-08 13:51:07,759 - INFO - Training started
18
- 2025-01-08 13:51:07,759 - INFO - Total steps: 2
19
- 2025-01-08 13:53:58,210 - INFO - Loss improved from inf to 1.97976
20
- 2025-01-08 13:53:58,210 - INFO - Loss improved from inf to 1.97976
21
- 2025-01-08 13:53:58,210 - INFO - Loss improved from inf to 1.97976
22
- 2025-01-08 13:53:58,212 - INFO - Step 1/2 (50.0%), epoch: 1.0000, step_time: 394.79s, elapsed_time: 394.79s
23
- 2025-01-08 13:53:58,213 - INFO - Evaluation Results:
24
- eval_loss: 1.9798
25
- eval_runtime: 23.5550
26
- eval_samples_per_second: 0.3400
27
- eval_steps_per_second: 0.0850
28
  epoch: 1.0000
29
- elapsed_time: 394.79s
30
- step_time: 394.79s
31
- 2025-01-08 13:53:58,213 - INFO - Loss improved from inf to 1.97976
32
- 2025-01-08 13:54:59,859 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
33
- 2025-01-08 13:55:03,136 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
34
- 2025-01-08 13:55:08,046 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
35
- 2025-01-08 13:55:14,259 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
36
- 2025-01-08 13:56:45,309 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-2/pytorch_model_fsdp_0
37
- 2025-01-08 13:56:48,398 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-2/pytorch_model_fsdp_0
38
- 2025-01-08 13:56:53,278 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-2/optimizer_0
39
- 2025-01-08 13:56:59,275 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-2/optimizer_0
40
- 2025-01-08 13:57:23,252 - INFO - Loss improved from 1.97976 to 1.83431
41
- 2025-01-08 13:57:23,252 - INFO - Loss improved from 1.97976 to 1.83431
42
- 2025-01-08 13:57:23,252 - INFO - Loss improved from 1.97976 to 1.83431
43
- 2025-01-08 13:57:23,253 - INFO - Step 2/2 (100.0%), epoch: 2.0000, step_time: 205.04s, elapsed_time: 599.83s
44
- 2025-01-08 13:57:23,254 - INFO - Evaluation Results:
45
- eval_loss: 1.8343
46
- eval_runtime: 23.7205
47
- eval_samples_per_second: 0.3370
48
- eval_steps_per_second: 0.0840
49
- epoch: 2.0000
50
- elapsed_time: 599.83s
51
- step_time: 205.04s
52
- 2025-01-08 13:57:23,255 - INFO - Loss improved from 1.97976 to 1.83431
53
- 2025-01-08 13:58:21,011 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-2/pytorch_model_fsdp_0
54
- 2025-01-08 13:58:24,262 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-2/pytorch_model_fsdp_0
55
- 2025-01-08 13:58:29,156 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-2/optimizer_0
56
- 2025-01-08 13:58:35,600 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-2/optimizer_0
57
- 2025-01-08 13:58:36,036 - INFO - Step 2/2 (100.0%), epoch: 2.0000, step_time: 72.78s, elapsed_time: 672.61s
58
- 2025-01-08 13:58:36,037 - INFO - Training completed in 672.61 seconds
 
1
+ 2025-01-08 16:51:57,031 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmprelfhj11/test.c -o /tmp/tmprelfhj11/test.o
2
+ 2025-01-08 16:51:57,055 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmprelfhj11/test.o -laio -o /tmp/tmprelfhj11/a.out
3
+ 2025-01-08 16:51:57,150 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpq0kg_ejb/test.c -o /tmp/tmpq0kg_ejb/test.o
4
+ 2025-01-08 16:51:57,152 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpcl_df4so/test.c -o /tmp/tmpcl_df4so/test.o
5
+ 2025-01-08 16:51:57,176 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpcl_df4so/test.o -laio -o /tmp/tmpcl_df4so/a.out
6
+ 2025-01-08 16:51:57,177 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpq0kg_ejb/test.o -laio -o /tmp/tmpq0kg_ejb/a.out
7
+ 2025-01-08 16:51:57,241 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpr4qfgumu/test.c -o /tmp/tmpr4qfgumu/test.o
8
+ 2025-01-08 16:51:57,270 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpr4qfgumu/test.o -laio -o /tmp/tmpr4qfgumu/a.out
9
+ 2025-01-08 16:51:57,507 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpovnjxczz/test.c -o /tmp/tmpovnjxczz/test.o
10
+ 2025-01-08 16:51:57,534 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpovnjxczz/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpovnjxczz/a.out
11
+ 2025-01-08 16:51:57,596 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpkg32zxk_/test.c -o /tmp/tmpkg32zxk_/test.o
12
+ 2025-01-08 16:51:57,611 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmphv2qqt72/test.c -o /tmp/tmphv2qqt72/test.o
13
+ 2025-01-08 16:51:57,615 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpkg32zxk_/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpkg32zxk_/a.out
14
+ 2025-01-08 16:51:57,629 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmphv2qqt72/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmphv2qqt72/a.out
15
+ 2025-01-08 16:51:57,706 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -O2 -isystem /root/anaconda3/envs/faiss_1.8.0/include -fPIC -c /tmp/tmpfcn3zdii/test.c -o /tmp/tmpfcn3zdii/test.o
16
+ 2025-01-08 16:51:57,728 - INFO - gcc -pthread -B /root/anaconda3/envs/faiss_1.8.0/compiler_compat /tmp/tmpfcn3zdii/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpfcn3zdii/a.out
17
+ 2025-01-08 16:56:02,452 - INFO - Training started
18
+ 2025-01-08 16:56:02,452 - INFO - Total steps: 2
19
+ 2025-01-08 16:59:03,388 - INFO - Loss improved from inf to 1.97822
20
+ 2025-01-08 16:59:03,388 - INFO - Loss improved from inf to 1.97822
21
+ 2025-01-08 16:59:03,388 - INFO - Loss improved from inf to 1.97822
22
+ 2025-01-08 16:59:03,389 - INFO - Step 1/2 (50.0%), epoch: 1.0000, step_time: 526.05s, elapsed_time: 526.05s
23
+ 2025-01-08 16:59:03,391 - INFO - Evaluation Results:
24
+ eval_loss: 1.9782
25
+ eval_runtime: 25.4206
26
+ eval_samples_per_second: 0.3150
27
+ eval_steps_per_second: 0.0790
28
  epoch: 1.0000
29
+ elapsed_time: 526.05s
30
+ step_time: 526.05s
31
+ 2025-01-08 16:59:03,391 - INFO - Loss improved from inf to 1.97822
32
+ 2025-01-08 17:01:50,261 - INFO - Saving model to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
33
+ 2025-01-08 17:01:53,657 - INFO - Model saved to mistral-sft-lora-fsdp2/checkpoint-1/pytorch_model_fsdp_0
34
+ 2025-01-08 17:01:59,821 - INFO - Saving Optimizer state to mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
35
+ 2025-01-08 17:02:06,458 - INFO - Optimizer state saved in mistral-sft-lora-fsdp2/checkpoint-1/optimizer_0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
runs/Jan08_16-50-16_gpu-server/events.out.tfevents.1736355360.gpu-server.922092.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d43fc185418e889196286a648d25b64e44b0bb14959b9af5987765245a4eb50
3
+ size 5873
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c371cb2d8df70dd7c76903b3ee3adcead61312f419be560aae3002d5638c614
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51fc8d24c71b3f37b8b6ee98fea4d4c8df8bae3b541e3efa59279a56399f9f0c
3
  size 5560