Training in progress, epoch 1
Browse files- config.json +30 -0
- logs/events.out.tfevents.1717557366.jagupard19.stanford.edu +0 -0
- logs/events.out.tfevents.1717557630.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717558039.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717558480.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717558972.jagupard32.stanford.edu +0 -0
- logs/events.out.tfevents.1717559070.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717559350.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717559427.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717559467.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717559511.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717559599.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717561143.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717561390.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717561496.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717561804.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717562559.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717563001.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717563490.jagupard32.stanford.edu +3 -0
- logs/events.out.tfevents.1717564216.jagupard32.stanford.edu +3 -0
- model.safetensors +3 -0
- training_args.bin +3 -0
config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "EleutherAI/pythia-70m",
|
3 |
+
"architectures": [
|
4 |
+
"GPTNeoXForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": true,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"classifier_dropout": 0.1,
|
10 |
+
"eos_token_id": 0,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout": 0.0,
|
13 |
+
"hidden_size": 512,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 2048,
|
16 |
+
"layer_norm_eps": 1e-05,
|
17 |
+
"max_position_embeddings": 2048,
|
18 |
+
"model_type": "gpt_neox",
|
19 |
+
"num_attention_heads": 8,
|
20 |
+
"num_hidden_layers": 6,
|
21 |
+
"rope_scaling": null,
|
22 |
+
"rotary_emb_base": 10000,
|
23 |
+
"rotary_pct": 0.25,
|
24 |
+
"tie_word_embeddings": false,
|
25 |
+
"torch_dtype": "float32",
|
26 |
+
"transformers_version": "4.41.2",
|
27 |
+
"use_cache": true,
|
28 |
+
"use_parallel_residual": true,
|
29 |
+
"vocab_size": 50304
|
30 |
+
}
|
logs/events.out.tfevents.1717557366.jagupard19.stanford.edu
ADDED
File without changes
|
logs/events.out.tfevents.1717557630.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:201c1aa12fe21726ce564c9a61ae8027fd998c68ea64024f6ee78706626e1126
|
3 |
+
size 4808
|
logs/events.out.tfevents.1717558039.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:643747da8986aef2e1558a5c71c8e567f33a921ebab2ffafc9f37998d559a6aa
|
3 |
+
size 4800
|
logs/events.out.tfevents.1717558480.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e9bfef1269c6e938c33daa3bd8553fcadd9b53cfd3cb3bf54acbb17374ecea9
|
3 |
+
size 4800
|
logs/events.out.tfevents.1717558972.jagupard32.stanford.edu
ADDED
File without changes
|
logs/events.out.tfevents.1717559070.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3ecd0f48c0324af3e289dc45bc804ce517b5d9c6e857fd890e08679e82a744a
|
3 |
+
size 4800
|
logs/events.out.tfevents.1717559350.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c80a481051c9f34b76795b15f1f4336a0ea2f17a13910694402c7e94d73919a1
|
3 |
+
size 4800
|
logs/events.out.tfevents.1717559427.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:793cc68c3fc2c37071448f3417838ac2c95f19706d0aa3f84bf61ef6c82e767c
|
3 |
+
size 4800
|
logs/events.out.tfevents.1717559467.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:190bfa8d7be4fa057d690e90fb04b7ecd4595a782beb24ded8b4feebc6eeaad3
|
3 |
+
size 4800
|
logs/events.out.tfevents.1717559511.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98e65ceb6c9d49bd311e95b4a38aec935fcbf4fa47a9e4710ea77e04c9f5e0cf
|
3 |
+
size 4800
|
logs/events.out.tfevents.1717559599.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46aee3b73cdd63377671f0e0c781041f83777dc5e055e9a12c01c52c09173c37
|
3 |
+
size 13642
|
logs/events.out.tfevents.1717561143.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af5238405a1ef733c61a569da4f32b6e7b172aeca28db327fd73dcc3cf1350a5
|
3 |
+
size 5835
|
logs/events.out.tfevents.1717561390.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdaabc2c1d23f0d01c1fa4de38aba8ca96f9257c69c82ffd66e9efc535765bde
|
3 |
+
size 5629
|
logs/events.out.tfevents.1717561496.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71e0db80a50786f3328663da7c7681282ad199f186bf0df1884f247cb3454293
|
3 |
+
size 7524
|
logs/events.out.tfevents.1717561804.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c9ebe76dff6d6b9f328e1ce9627a269e7bf60fe17bdc8e2797aab1a977e4edb
|
3 |
+
size 6680
|
logs/events.out.tfevents.1717562559.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdfcc0bee81f01070bc143007868a8e0aa4fd0f605dbc585adec3dabdda5cf7b
|
3 |
+
size 6047
|
logs/events.out.tfevents.1717563001.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:299f5c3f6f7d4403fb1946737dc5f1f786ab0f6b467422b0a28f1ba8e13d0781
|
3 |
+
size 6474
|
logs/events.out.tfevents.1717563490.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fbf6e855c690270a57b43b2abb86a8ee6a53933f716ee67841f61f641ec8f50
|
3 |
+
size 11111
|
logs/events.out.tfevents.1717564216.jagupard32.stanford.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:332dc0498f772a8bc4a19a7beb87ca2dd54c6daba51accd4a944d990bf0679fe
|
3 |
+
size 186130
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32480ebb74aef7716fb6d8013eadee68137de8ff6329297cd526c9c7b6468788
|
3 |
+
size 281715176
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13e0bfd76591446f76582435702778ca13cd179b988dd8a946784a4880cc3ac6
|
3 |
+
size 5112
|