TroyDoesAI commited on
Commit
bd9f59e
·
verified ·
1 Parent(s): 23dcc36

Revealing My Secret Method: Added the highest delta SNR layers interleved within the model strategically from intuition on how it responds to it.

Browse files
config.json CHANGED
@@ -18,7 +18,7 @@
18
  "max_position_embeddings": 131072,
19
  "model_type": "phi3",
20
  "num_attention_heads": 32,
21
- "num_hidden_layers": 56,
22
  "num_key_value_heads": 32,
23
  "original_max_position_embeddings": 4096,
24
  "pad_token_id": 32000,
 
18
  "max_position_embeddings": 131072,
19
  "model_type": "phi3",
20
  "num_attention_heads": 32,
21
+ "num_hidden_layers": 64,
22
  "num_key_value_heads": 32,
23
  "original_max_position_embeddings": 4096,
24
  "pad_token_id": 32000,
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5db4e2ccd1767b13093c7cb1991349069b5a7402c95677e670bc36fddd13020
3
  size 4972489200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6d5414ed5506ef306a4f0c9b4b52ef0109acc9c011ba8ba67b32caff6d2150
3
  size 4972489200
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94eb540127bde035bf06bca85771d9f96286e2d58d74315a55a3a3e777c8b7f
3
  size 4983118840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:293031b0dee0e4fbcd1d77f108c68953165aac67ee1c9c3ee9179919e8d709b9
3
  size 4983118840
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c7cdc7d532555603488fd8fea7e326fef44b82b0d054c29bcfb56fec1d2d74a
3
- size 3122703288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea70b50b85c30bb47b12b66f5c175b821e86d709b8422e382ab963f8d870e4cd
3
+ size 4934746512
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 13078272000
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -317,12 +317,60 @@
317
  "model.layers.55.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
318
  "model.layers.55.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
319
  "model.layers.55.self_attn.qkv_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
321
  "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
322
  "model.layers.6.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors",
323
  "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
324
  "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
325
  "model.layers.6.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
327
  "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
328
  "model.layers.7.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 14890309632
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
317
  "model.layers.55.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
318
  "model.layers.55.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
319
  "model.layers.55.self_attn.qkv_proj.weight": "model-00003-of-00003.safetensors",
320
+ "model.layers.56.input_layernorm.weight": "model-00003-of-00003.safetensors",
321
+ "model.layers.56.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
322
+ "model.layers.56.mlp.gate_up_proj.weight": "model-00003-of-00003.safetensors",
323
+ "model.layers.56.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
324
+ "model.layers.56.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
325
+ "model.layers.56.self_attn.qkv_proj.weight": "model-00003-of-00003.safetensors",
326
+ "model.layers.57.input_layernorm.weight": "model-00003-of-00003.safetensors",
327
+ "model.layers.57.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
328
+ "model.layers.57.mlp.gate_up_proj.weight": "model-00003-of-00003.safetensors",
329
+ "model.layers.57.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
330
+ "model.layers.57.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
331
+ "model.layers.57.self_attn.qkv_proj.weight": "model-00003-of-00003.safetensors",
332
+ "model.layers.58.input_layernorm.weight": "model-00003-of-00003.safetensors",
333
+ "model.layers.58.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
334
+ "model.layers.58.mlp.gate_up_proj.weight": "model-00003-of-00003.safetensors",
335
+ "model.layers.58.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
336
+ "model.layers.58.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
337
+ "model.layers.58.self_attn.qkv_proj.weight": "model-00003-of-00003.safetensors",
338
+ "model.layers.59.input_layernorm.weight": "model-00003-of-00003.safetensors",
339
+ "model.layers.59.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
340
+ "model.layers.59.mlp.gate_up_proj.weight": "model-00003-of-00003.safetensors",
341
+ "model.layers.59.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
342
+ "model.layers.59.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
343
+ "model.layers.59.self_attn.qkv_proj.weight": "model-00003-of-00003.safetensors",
344
  "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
345
  "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
346
  "model.layers.6.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors",
347
  "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
348
  "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
349
  "model.layers.6.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors",
350
+ "model.layers.60.input_layernorm.weight": "model-00003-of-00003.safetensors",
351
+ "model.layers.60.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
352
+ "model.layers.60.mlp.gate_up_proj.weight": "model-00003-of-00003.safetensors",
353
+ "model.layers.60.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
354
+ "model.layers.60.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
355
+ "model.layers.60.self_attn.qkv_proj.weight": "model-00003-of-00003.safetensors",
356
+ "model.layers.61.input_layernorm.weight": "model-00003-of-00003.safetensors",
357
+ "model.layers.61.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
358
+ "model.layers.61.mlp.gate_up_proj.weight": "model-00003-of-00003.safetensors",
359
+ "model.layers.61.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
360
+ "model.layers.61.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
361
+ "model.layers.61.self_attn.qkv_proj.weight": "model-00003-of-00003.safetensors",
362
+ "model.layers.62.input_layernorm.weight": "model-00003-of-00003.safetensors",
363
+ "model.layers.62.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
364
+ "model.layers.62.mlp.gate_up_proj.weight": "model-00003-of-00003.safetensors",
365
+ "model.layers.62.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
366
+ "model.layers.62.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
367
+ "model.layers.62.self_attn.qkv_proj.weight": "model-00003-of-00003.safetensors",
368
+ "model.layers.63.input_layernorm.weight": "model-00003-of-00003.safetensors",
369
+ "model.layers.63.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
370
+ "model.layers.63.mlp.gate_up_proj.weight": "model-00003-of-00003.safetensors",
371
+ "model.layers.63.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
372
+ "model.layers.63.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
373
+ "model.layers.63.self_attn.qkv_proj.weight": "model-00003-of-00003.safetensors",
374
  "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
375
  "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
376
  "model.layers.7.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors",