cassanof commited on
Commit
6d8dea3
·
1 Parent(s): a08675c

model_starcoder_15b_r-epoch1

Browse files
config.json CHANGED
@@ -1,10 +1,11 @@
1
  {
2
- "_name_or_path": "model_starcoder15b_multiplt_jl/checkpoint-510",
3
  "activation_function": "gelu",
4
  "architectures": [
5
  "GPTBigCodeForCausalLM"
6
  ],
7
  "attention_softmax_in_fp32": true,
 
8
  "attn_pdrop": 0.1,
9
  "bos_token_id": 0,
10
  "embd_pdrop": 0.1,
@@ -24,6 +25,7 @@
24
  "pad_key_length": true,
25
  "pre_allocate_kv_cache": false,
26
  "resid_pdrop": 0.1,
 
27
  "scale_attention_softmax_in_fp32": true,
28
  "scale_attn_weights": true,
29
  "summary_activation": null,
@@ -32,8 +34,10 @@
32
  "summary_type": "cls_index",
33
  "summary_use_proj": true,
34
  "torch_dtype": "bfloat16",
35
- "transformers_version": "4.34.0",
36
  "use_cache": false,
 
 
37
  "validate_runner_input": true,
38
  "vocab_size": 49152
39
  }
 
1
  {
2
+ "_name_or_path": "model_starcoder_15b_r/checkpoint-69",
3
  "activation_function": "gelu",
4
  "architectures": [
5
  "GPTBigCodeForCausalLM"
6
  ],
7
  "attention_softmax_in_fp32": true,
8
+ "attention_window_size": null,
9
  "attn_pdrop": 0.1,
10
  "bos_token_id": 0,
11
  "embd_pdrop": 0.1,
 
25
  "pad_key_length": true,
26
  "pre_allocate_kv_cache": false,
27
  "resid_pdrop": 0.1,
28
+ "rotary_embedding_scale": -9.210340371976184,
29
  "scale_attention_softmax_in_fp32": true,
30
  "scale_attn_weights": true,
31
  "summary_activation": null,
 
34
  "summary_type": "cls_index",
35
  "summary_use_proj": true,
36
  "torch_dtype": "bfloat16",
37
+ "transformers_version": "4.34.1",
38
  "use_cache": false,
39
+ "use_position_embeddings": true,
40
+ "use_rotary_embeddings": false,
41
  "validate_runner_input": true,
42
  "vocab_size": 49152
43
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 0,
5
- "transformers_version": "4.34.0"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 0,
5
+ "transformers_version": "4.34.1"
6
  }
pytorch_model-00001-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:245cdf270e0b70faf797cf7b9f0ee36dd384523543a3979ba7269926f365c9f2
3
  size 9957983029
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5257b1372bf34ef97db4c180a6402c2a86e2d8ef5e9812ac13e13bd3539e9db8
3
  size 9957983029
pytorch_model-00002-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d17878c8bfa80d821b3f1a6bf527f1d0a5b479691fbd7af7d04ac75fdca073a
3
  size 9857381671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbb23a3872a6962517269c0b5c5debf3237af609e5397b640d5aa9d8c7ba144f
3
  size 9857381671
pytorch_model-00003-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3403988aa2c961ec11268c0e1d55612f9fb902f25d1ceb7ecd4f79f758666c9
3
  size 9857381671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef0c270525571ddf31f0ac018a7ac11b9b54ceed5956658b522eda8401ad5496
3
  size 9857381671
pytorch_model-00004-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8da484be73e3603c158335c6aed0e287c105787be1b18e583b8a8408fe1c2c0
3
  size 1362327955
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e184a4e1eda3f2cb33f7a2027b062049a2e42b2a5db06fff6c148ebf9f97ee41
3
  size 1362327955