Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4c5b9cb78dab4bbd871f.json +51 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8456948f6fde59831390.json +51 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8c9f1fab702cddae6156.json +51 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/1fb5c6b213ce9eb31109.json +55 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/be6cf655a8b36069d883.json +55 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/bf840674318c6cdc6330.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/c59993fff4b07fbeb262.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/e4975f3e0d271e6155e4.json +55 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/fb64b85b484e3ebb9a1d.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/14f6b812ab3b13c199f1.json +56 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/1c3abae211f3e919b7f0.json +73 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/b21b3ac2511d590ac1c2.json +73 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/baf0fb4d07ac05e74df7.json +73 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/076d544cf144121b2024.json +52 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/9da282b561774776761c.json +52 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/bcf8d0c54c45b277333b.json +52 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/0dd437c3697ceec3b894.json +53 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/44c8abc3ecedc8598f8a.json +53 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/d472f81e5c657f79c2f9.json +53 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb +1 -1
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4c5b9cb78dab4bbd871f.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"GraniteForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"attention_multiplier": 1.0,
|
11 |
+
"embedding_multiplier": 1.0,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 32,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 64,
|
16 |
+
"logits_scaling": 1.0,
|
17 |
+
"max_position_embeddings": 2048,
|
18 |
+
"mlp_bias": false,
|
19 |
+
"model_type": "granite",
|
20 |
+
"neuron": {
|
21 |
+
"_serialized_key": "HloNeuronConfig",
|
22 |
+
"all_reduce_dtype": null,
|
23 |
+
"allow_flash_attention": true,
|
24 |
+
"attention_layout": "HSB",
|
25 |
+
"attn_output_transposed": false,
|
26 |
+
"auto_cast_type": "fp16",
|
27 |
+
"batch_size": 1,
|
28 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
29 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
30 |
+
"collectives_layout": "HSB",
|
31 |
+
"continuous_batching": false,
|
32 |
+
"fuse_qkv": true,
|
33 |
+
"group_query_attention": null,
|
34 |
+
"log_softmax_scores": false,
|
35 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
36 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
37 |
+
"output_all_logits": false,
|
38 |
+
"sequence_length": 100,
|
39 |
+
"tp_degree": 2
|
40 |
+
},
|
41 |
+
"num_attention_heads": 4,
|
42 |
+
"num_hidden_layers": 2,
|
43 |
+
"num_key_value_heads": 4,
|
44 |
+
"residual_multiplier": 1.0,
|
45 |
+
"rms_norm_eps": 1e-06,
|
46 |
+
"rope_scaling": null,
|
47 |
+
"rope_theta": 10000.0,
|
48 |
+
"tie_word_embeddings": false,
|
49 |
+
"use_cache": true,
|
50 |
+
"vocab_size": 49152
|
51 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8456948f6fde59831390.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"GraniteForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"attention_multiplier": 1.0,
|
11 |
+
"embedding_multiplier": 1.0,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 32,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 64,
|
16 |
+
"logits_scaling": 1.0,
|
17 |
+
"max_position_embeddings": 2048,
|
18 |
+
"mlp_bias": false,
|
19 |
+
"model_type": "granite",
|
20 |
+
"neuron": {
|
21 |
+
"_serialized_key": "HloNeuronConfig",
|
22 |
+
"all_reduce_dtype": null,
|
23 |
+
"allow_flash_attention": true,
|
24 |
+
"attention_layout": "HSB",
|
25 |
+
"attn_output_transposed": false,
|
26 |
+
"auto_cast_type": "fp16",
|
27 |
+
"batch_size": 2,
|
28 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
29 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
30 |
+
"collectives_layout": "HSB",
|
31 |
+
"continuous_batching": true,
|
32 |
+
"fuse_qkv": true,
|
33 |
+
"group_query_attention": null,
|
34 |
+
"log_softmax_scores": false,
|
35 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
36 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
37 |
+
"output_all_logits": false,
|
38 |
+
"sequence_length": 100,
|
39 |
+
"tp_degree": 2
|
40 |
+
},
|
41 |
+
"num_attention_heads": 4,
|
42 |
+
"num_hidden_layers": 2,
|
43 |
+
"num_key_value_heads": 4,
|
44 |
+
"residual_multiplier": 1.0,
|
45 |
+
"rms_norm_eps": 1e-06,
|
46 |
+
"rope_scaling": null,
|
47 |
+
"rope_theta": 10000.0,
|
48 |
+
"tie_word_embeddings": false,
|
49 |
+
"use_cache": true,
|
50 |
+
"vocab_size": 49152
|
51 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8c9f1fab702cddae6156.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"GraniteForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"attention_multiplier": 1.0,
|
11 |
+
"embedding_multiplier": 1.0,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 32,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 64,
|
16 |
+
"logits_scaling": 1.0,
|
17 |
+
"max_position_embeddings": 2048,
|
18 |
+
"mlp_bias": false,
|
19 |
+
"model_type": "granite",
|
20 |
+
"neuron": {
|
21 |
+
"_serialized_key": "HloNeuronConfig",
|
22 |
+
"all_reduce_dtype": null,
|
23 |
+
"allow_flash_attention": true,
|
24 |
+
"attention_layout": "HSB",
|
25 |
+
"attn_output_transposed": false,
|
26 |
+
"auto_cast_type": "bf16",
|
27 |
+
"batch_size": 1,
|
28 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
29 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
30 |
+
"collectives_layout": "HSB",
|
31 |
+
"continuous_batching": false,
|
32 |
+
"fuse_qkv": true,
|
33 |
+
"group_query_attention": null,
|
34 |
+
"log_softmax_scores": false,
|
35 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
36 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
37 |
+
"output_all_logits": false,
|
38 |
+
"sequence_length": 100,
|
39 |
+
"tp_degree": 2
|
40 |
+
},
|
41 |
+
"num_attention_heads": 4,
|
42 |
+
"num_hidden_layers": 2,
|
43 |
+
"num_key_value_heads": 4,
|
44 |
+
"residual_multiplier": 1.0,
|
45 |
+
"rms_norm_eps": 1e-06,
|
46 |
+
"rope_scaling": null,
|
47 |
+
"rope_theta": 10000.0,
|
48 |
+
"tie_word_embeddings": false,
|
49 |
+
"use_cache": true,
|
50 |
+
"vocab_size": 49152
|
51 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/1fb5c6b213ce9eb31109.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 4,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 16,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 64,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "HloNeuronConfig",
|
20 |
+
"all_reduce_dtype": null,
|
21 |
+
"allow_flash_attention": true,
|
22 |
+
"attention_layout": "BSH",
|
23 |
+
"attn_output_transposed": false,
|
24 |
+
"auto_cast_type": "bf16",
|
25 |
+
"batch_size": 1,
|
26 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
27 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
28 |
+
"collectives_layout": "HSB",
|
29 |
+
"continuous_batching": false,
|
30 |
+
"fuse_qkv": true,
|
31 |
+
"group_query_attention": null,
|
32 |
+
"log_softmax_scores": false,
|
33 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
34 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
35 |
+
"output_all_logits": false,
|
36 |
+
"sequence_length": 100,
|
37 |
+
"tp_degree": 2
|
38 |
+
},
|
39 |
+
"num_attention_heads": 4,
|
40 |
+
"num_hidden_layers": 2,
|
41 |
+
"num_key_value_heads": 4,
|
42 |
+
"pretraining_tp": 1,
|
43 |
+
"rms_norm_eps": 1e-05,
|
44 |
+
"rope_scaling": {
|
45 |
+
"factor": 8.0,
|
46 |
+
"high_freq_factor": 4.0,
|
47 |
+
"low_freq_factor": 1.0,
|
48 |
+
"original_max_position_embeddings": 8192,
|
49 |
+
"rope_type": "llama3"
|
50 |
+
},
|
51 |
+
"rope_theta": 500000.0,
|
52 |
+
"tie_word_embeddings": false,
|
53 |
+
"use_cache": true,
|
54 |
+
"vocab_size": 128256
|
55 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/be6cf655a8b36069d883.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 4,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 16,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 64,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "HloNeuronConfig",
|
20 |
+
"all_reduce_dtype": null,
|
21 |
+
"allow_flash_attention": true,
|
22 |
+
"attention_layout": "BSH",
|
23 |
+
"attn_output_transposed": false,
|
24 |
+
"auto_cast_type": "fp16",
|
25 |
+
"batch_size": 2,
|
26 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
27 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
28 |
+
"collectives_layout": "HSB",
|
29 |
+
"continuous_batching": true,
|
30 |
+
"fuse_qkv": true,
|
31 |
+
"group_query_attention": null,
|
32 |
+
"log_softmax_scores": false,
|
33 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
34 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
35 |
+
"output_all_logits": false,
|
36 |
+
"sequence_length": 100,
|
37 |
+
"tp_degree": 2
|
38 |
+
},
|
39 |
+
"num_attention_heads": 4,
|
40 |
+
"num_hidden_layers": 2,
|
41 |
+
"num_key_value_heads": 4,
|
42 |
+
"pretraining_tp": 1,
|
43 |
+
"rms_norm_eps": 1e-05,
|
44 |
+
"rope_scaling": {
|
45 |
+
"factor": 8.0,
|
46 |
+
"high_freq_factor": 4.0,
|
47 |
+
"low_freq_factor": 1.0,
|
48 |
+
"original_max_position_embeddings": 8192,
|
49 |
+
"rope_type": "llama3"
|
50 |
+
},
|
51 |
+
"rope_theta": 500000.0,
|
52 |
+
"tie_word_embeddings": false,
|
53 |
+
"use_cache": true,
|
54 |
+
"vocab_size": 128256
|
55 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/bf840674318c6cdc6330.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 4,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 16,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 64,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 1,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
26 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
27 |
+
"continuous_batching": false,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 2,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 1,
|
37 |
+
"max_context_length": 100,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 100,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "float16",
|
52 |
+
"sequence_length": 100,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "float16",
|
58 |
+
"tp_degree": 2,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 4,
|
62 |
+
"num_hidden_layers": 2,
|
63 |
+
"num_key_value_heads": 4,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/c59993fff4b07fbeb262.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 4,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 16,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 64,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 1,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
26 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
27 |
+
"continuous_batching": false,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 2,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 1,
|
37 |
+
"max_context_length": 100,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 100,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "bfloat16",
|
52 |
+
"sequence_length": 100,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "bfloat16",
|
58 |
+
"tp_degree": 2,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 4,
|
62 |
+
"num_hidden_layers": 2,
|
63 |
+
"num_key_value_heads": 4,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/e4975f3e0d271e6155e4.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 4,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 16,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 64,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "HloNeuronConfig",
|
20 |
+
"all_reduce_dtype": null,
|
21 |
+
"allow_flash_attention": true,
|
22 |
+
"attention_layout": "BSH",
|
23 |
+
"attn_output_transposed": false,
|
24 |
+
"auto_cast_type": "fp16",
|
25 |
+
"batch_size": 1,
|
26 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
27 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
28 |
+
"collectives_layout": "HSB",
|
29 |
+
"continuous_batching": false,
|
30 |
+
"fuse_qkv": true,
|
31 |
+
"group_query_attention": null,
|
32 |
+
"log_softmax_scores": false,
|
33 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
34 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
35 |
+
"output_all_logits": false,
|
36 |
+
"sequence_length": 100,
|
37 |
+
"tp_degree": 2
|
38 |
+
},
|
39 |
+
"num_attention_heads": 4,
|
40 |
+
"num_hidden_layers": 2,
|
41 |
+
"num_key_value_heads": 4,
|
42 |
+
"pretraining_tp": 1,
|
43 |
+
"rms_norm_eps": 1e-05,
|
44 |
+
"rope_scaling": {
|
45 |
+
"factor": 8.0,
|
46 |
+
"high_freq_factor": 4.0,
|
47 |
+
"low_freq_factor": 1.0,
|
48 |
+
"original_max_position_embeddings": 8192,
|
49 |
+
"rope_type": "llama3"
|
50 |
+
},
|
51 |
+
"rope_theta": 500000.0,
|
52 |
+
"tie_word_embeddings": false,
|
53 |
+
"use_cache": true,
|
54 |
+
"vocab_size": 128256
|
55 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/fb64b85b484e3ebb9a1d.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 4,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 16,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 64,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 2,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
26 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
27 |
+
"continuous_batching": true,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 2,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 2,
|
37 |
+
"max_context_length": 100,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 100,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": false,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "float16",
|
52 |
+
"sequence_length": 100,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "float16",
|
58 |
+
"tp_degree": 2,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 4,
|
62 |
+
"num_hidden_layers": 2,
|
63 |
+
"num_key_value_heads": 4,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/14f6b812ab3b13c199f1.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 64,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 2048,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 8192,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "HloNeuronConfig",
|
20 |
+
"all_reduce_dtype": null,
|
21 |
+
"allow_flash_attention": true,
|
22 |
+
"attention_layout": "BSH",
|
23 |
+
"attn_output_transposed": false,
|
24 |
+
"auto_cast_type": "fp16",
|
25 |
+
"batch_size": 4,
|
26 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
27 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
28 |
+
"collectives_layout": "HSB",
|
29 |
+
"continuous_batching": true,
|
30 |
+
"fuse_qkv": true,
|
31 |
+
"group_query_attention": "shard-over-heads",
|
32 |
+
"log_softmax_scores": false,
|
33 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
34 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
35 |
+
"output_all_logits": false,
|
36 |
+
"sequence_length": 4096,
|
37 |
+
"tp_degree": 2
|
38 |
+
},
|
39 |
+
"num_attention_heads": 32,
|
40 |
+
"num_hidden_layers": 16,
|
41 |
+
"num_key_value_heads": 8,
|
42 |
+
"pretraining_tp": 1,
|
43 |
+
"rms_norm_eps": 1e-05,
|
44 |
+
"rope_scaling": {
|
45 |
+
"factor": 32.0,
|
46 |
+
"high_freq_factor": 4.0,
|
47 |
+
"low_freq_factor": 1.0,
|
48 |
+
"original_max_position_embeddings": 8192,
|
49 |
+
"rope_type": "llama3"
|
50 |
+
},
|
51 |
+
"rope_theta": 500000.0,
|
52 |
+
"tie_word_embeddings": true,
|
53 |
+
"unsloth_fixed": true,
|
54 |
+
"use_cache": true,
|
55 |
+
"vocab_size": 128256
|
56 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/1c3abae211f3e919b7f0.json
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"MixtralForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"head_dim": 32,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3584,
|
14 |
+
"max_position_embeddings": 1024,
|
15 |
+
"model_type": "mixtral",
|
16 |
+
"neuron": {
|
17 |
+
"_serialized_key": "NxDNeuronConfig",
|
18 |
+
"async_mode": false,
|
19 |
+
"attn_kernel_enabled": false,
|
20 |
+
"batch_size": 1,
|
21 |
+
"capacity_factor": null,
|
22 |
+
"cc_pipeline_tiling_factor": 2,
|
23 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
24 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
25 |
+
"continuous_batching": false,
|
26 |
+
"enable_bucketing": false,
|
27 |
+
"ep_degree": 1,
|
28 |
+
"flash_decoding_enabled": false,
|
29 |
+
"fused_qkv": false,
|
30 |
+
"glu_mlp": true,
|
31 |
+
"is_chunked_prefill": false,
|
32 |
+
"local_ranks_size": 2,
|
33 |
+
"logical_nc_config": 1,
|
34 |
+
"max_batch_size": 1,
|
35 |
+
"max_context_length": 100,
|
36 |
+
"max_topk": 256,
|
37 |
+
"mlp_kernel_enabled": false,
|
38 |
+
"mlp_kernel_fuse_residual_add": false,
|
39 |
+
"n_active_tokens": 100,
|
40 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
41 |
+
"num_cores_per_group": 1,
|
42 |
+
"on_device_sampling": false,
|
43 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
44 |
+
"output_logits": false,
|
45 |
+
"padding_side": "right",
|
46 |
+
"pp_degree": 1,
|
47 |
+
"qk_layernorm": false,
|
48 |
+
"qkv_kernel_enabled": false,
|
49 |
+
"rpl_reduce_dtype": "float16",
|
50 |
+
"sequence_length": 100,
|
51 |
+
"sequence_parallel_enabled": false,
|
52 |
+
"speculation_length": 0,
|
53 |
+
"start_rank_id": 0,
|
54 |
+
"target": null,
|
55 |
+
"torch_dtype": "float16",
|
56 |
+
"tp_degree": 2,
|
57 |
+
"vocab_parallel": false
|
58 |
+
},
|
59 |
+
"num_attention_heads": 32,
|
60 |
+
"num_experts_per_tok": 2,
|
61 |
+
"num_hidden_layers": 2,
|
62 |
+
"num_key_value_heads": 8,
|
63 |
+
"num_local_experts": 8,
|
64 |
+
"output_router_logits": false,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_theta": 10000.0,
|
67 |
+
"router_aux_loss_coef": 0.001,
|
68 |
+
"router_jitter_noise": 0.0,
|
69 |
+
"sliding_window": 4096,
|
70 |
+
"tie_word_embeddings": false,
|
71 |
+
"use_cache": true,
|
72 |
+
"vocab_size": 32000
|
73 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/b21b3ac2511d590ac1c2.json
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"MixtralForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"head_dim": 32,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3584,
|
14 |
+
"max_position_embeddings": 1024,
|
15 |
+
"model_type": "mixtral",
|
16 |
+
"neuron": {
|
17 |
+
"_serialized_key": "NxDNeuronConfig",
|
18 |
+
"async_mode": false,
|
19 |
+
"attn_kernel_enabled": false,
|
20 |
+
"batch_size": 1,
|
21 |
+
"capacity_factor": null,
|
22 |
+
"cc_pipeline_tiling_factor": 2,
|
23 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
24 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
25 |
+
"continuous_batching": false,
|
26 |
+
"enable_bucketing": false,
|
27 |
+
"ep_degree": 1,
|
28 |
+
"flash_decoding_enabled": false,
|
29 |
+
"fused_qkv": false,
|
30 |
+
"glu_mlp": true,
|
31 |
+
"is_chunked_prefill": false,
|
32 |
+
"local_ranks_size": 2,
|
33 |
+
"logical_nc_config": 1,
|
34 |
+
"max_batch_size": 1,
|
35 |
+
"max_context_length": 100,
|
36 |
+
"max_topk": 256,
|
37 |
+
"mlp_kernel_enabled": false,
|
38 |
+
"mlp_kernel_fuse_residual_add": false,
|
39 |
+
"n_active_tokens": 100,
|
40 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
41 |
+
"num_cores_per_group": 1,
|
42 |
+
"on_device_sampling": false,
|
43 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
44 |
+
"output_logits": false,
|
45 |
+
"padding_side": "right",
|
46 |
+
"pp_degree": 1,
|
47 |
+
"qk_layernorm": false,
|
48 |
+
"qkv_kernel_enabled": false,
|
49 |
+
"rpl_reduce_dtype": "bfloat16",
|
50 |
+
"sequence_length": 100,
|
51 |
+
"sequence_parallel_enabled": false,
|
52 |
+
"speculation_length": 0,
|
53 |
+
"start_rank_id": 0,
|
54 |
+
"target": null,
|
55 |
+
"torch_dtype": "bfloat16",
|
56 |
+
"tp_degree": 2,
|
57 |
+
"vocab_parallel": false
|
58 |
+
},
|
59 |
+
"num_attention_heads": 32,
|
60 |
+
"num_experts_per_tok": 2,
|
61 |
+
"num_hidden_layers": 2,
|
62 |
+
"num_key_value_heads": 8,
|
63 |
+
"num_local_experts": 8,
|
64 |
+
"output_router_logits": false,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_theta": 10000.0,
|
67 |
+
"router_aux_loss_coef": 0.001,
|
68 |
+
"router_jitter_noise": 0.0,
|
69 |
+
"sliding_window": 4096,
|
70 |
+
"tie_word_embeddings": false,
|
71 |
+
"use_cache": true,
|
72 |
+
"vocab_size": 32000
|
73 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/baf0fb4d07ac05e74df7.json
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"MixtralForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"head_dim": 32,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3584,
|
14 |
+
"max_position_embeddings": 1024,
|
15 |
+
"model_type": "mixtral",
|
16 |
+
"neuron": {
|
17 |
+
"_serialized_key": "NxDNeuronConfig",
|
18 |
+
"async_mode": false,
|
19 |
+
"attn_kernel_enabled": false,
|
20 |
+
"batch_size": 2,
|
21 |
+
"capacity_factor": null,
|
22 |
+
"cc_pipeline_tiling_factor": 2,
|
23 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
24 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
25 |
+
"continuous_batching": false,
|
26 |
+
"enable_bucketing": false,
|
27 |
+
"ep_degree": 1,
|
28 |
+
"flash_decoding_enabled": false,
|
29 |
+
"fused_qkv": false,
|
30 |
+
"glu_mlp": true,
|
31 |
+
"is_chunked_prefill": false,
|
32 |
+
"local_ranks_size": 2,
|
33 |
+
"logical_nc_config": 1,
|
34 |
+
"max_batch_size": 2,
|
35 |
+
"max_context_length": 100,
|
36 |
+
"max_topk": 256,
|
37 |
+
"mlp_kernel_enabled": false,
|
38 |
+
"mlp_kernel_fuse_residual_add": false,
|
39 |
+
"n_active_tokens": 100,
|
40 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
41 |
+
"num_cores_per_group": 1,
|
42 |
+
"on_device_sampling": false,
|
43 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
44 |
+
"output_logits": false,
|
45 |
+
"padding_side": "right",
|
46 |
+
"pp_degree": 1,
|
47 |
+
"qk_layernorm": false,
|
48 |
+
"qkv_kernel_enabled": false,
|
49 |
+
"rpl_reduce_dtype": "float16",
|
50 |
+
"sequence_length": 100,
|
51 |
+
"sequence_parallel_enabled": false,
|
52 |
+
"speculation_length": 0,
|
53 |
+
"start_rank_id": 0,
|
54 |
+
"target": null,
|
55 |
+
"torch_dtype": "float16",
|
56 |
+
"tp_degree": 2,
|
57 |
+
"vocab_parallel": false
|
58 |
+
},
|
59 |
+
"num_attention_heads": 32,
|
60 |
+
"num_experts_per_tok": 2,
|
61 |
+
"num_hidden_layers": 2,
|
62 |
+
"num_key_value_heads": 8,
|
63 |
+
"num_local_experts": 8,
|
64 |
+
"output_router_logits": false,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_theta": 10000.0,
|
67 |
+
"router_aux_loss_coef": 0.001,
|
68 |
+
"router_jitter_noise": 0.0,
|
69 |
+
"sliding_window": 4096,
|
70 |
+
"tie_word_embeddings": false,
|
71 |
+
"use_cache": true,
|
72 |
+
"vocab_size": 32000
|
73 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/076d544cf144121b2024.json
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Phi3ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"auto_map": {},
|
11 |
+
"embd_pdrop": 0.0,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 16,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 32,
|
16 |
+
"max_position_embeddings": 16384,
|
17 |
+
"model_type": "phi3",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "HloNeuronConfig",
|
20 |
+
"all_reduce_dtype": null,
|
21 |
+
"allow_flash_attention": false,
|
22 |
+
"attention_layout": "HSB",
|
23 |
+
"attn_output_transposed": false,
|
24 |
+
"auto_cast_type": "fp16",
|
25 |
+
"batch_size": 2,
|
26 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
27 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
28 |
+
"collectives_layout": "HSB",
|
29 |
+
"continuous_batching": true,
|
30 |
+
"fuse_qkv": true,
|
31 |
+
"group_query_attention": "replicated-heads",
|
32 |
+
"log_softmax_scores": false,
|
33 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
34 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
35 |
+
"output_all_logits": false,
|
36 |
+
"sequence_length": 100,
|
37 |
+
"tp_degree": 2
|
38 |
+
},
|
39 |
+
"num_attention_heads": 2,
|
40 |
+
"num_hidden_layers": 2,
|
41 |
+
"num_key_value_heads": 1,
|
42 |
+
"original_max_position_embeddings": 16384,
|
43 |
+
"partial_rotary_factor": 1.0,
|
44 |
+
"resid_pdrop": 0.0,
|
45 |
+
"rms_norm_eps": 1e-05,
|
46 |
+
"rope_scaling": null,
|
47 |
+
"rope_theta": 250000,
|
48 |
+
"sliding_window": null,
|
49 |
+
"tie_word_embeddings": false,
|
50 |
+
"use_cache": true,
|
51 |
+
"vocab_size": 100352
|
52 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/9da282b561774776761c.json
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Phi3ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"auto_map": {},
|
11 |
+
"embd_pdrop": 0.0,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 16,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 32,
|
16 |
+
"max_position_embeddings": 16384,
|
17 |
+
"model_type": "phi3",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "HloNeuronConfig",
|
20 |
+
"all_reduce_dtype": null,
|
21 |
+
"allow_flash_attention": false,
|
22 |
+
"attention_layout": "HSB",
|
23 |
+
"attn_output_transposed": false,
|
24 |
+
"auto_cast_type": "fp16",
|
25 |
+
"batch_size": 1,
|
26 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
27 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
28 |
+
"collectives_layout": "HSB",
|
29 |
+
"continuous_batching": false,
|
30 |
+
"fuse_qkv": true,
|
31 |
+
"group_query_attention": "replicated-heads",
|
32 |
+
"log_softmax_scores": false,
|
33 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
34 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
35 |
+
"output_all_logits": false,
|
36 |
+
"sequence_length": 100,
|
37 |
+
"tp_degree": 2
|
38 |
+
},
|
39 |
+
"num_attention_heads": 2,
|
40 |
+
"num_hidden_layers": 2,
|
41 |
+
"num_key_value_heads": 1,
|
42 |
+
"original_max_position_embeddings": 16384,
|
43 |
+
"partial_rotary_factor": 1.0,
|
44 |
+
"resid_pdrop": 0.0,
|
45 |
+
"rms_norm_eps": 1e-05,
|
46 |
+
"rope_scaling": null,
|
47 |
+
"rope_theta": 250000,
|
48 |
+
"sliding_window": null,
|
49 |
+
"tie_word_embeddings": false,
|
50 |
+
"use_cache": true,
|
51 |
+
"vocab_size": 100352
|
52 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/bcf8d0c54c45b277333b.json
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Phi3ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"auto_map": {},
|
11 |
+
"embd_pdrop": 0.0,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 16,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 32,
|
16 |
+
"max_position_embeddings": 16384,
|
17 |
+
"model_type": "phi3",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "HloNeuronConfig",
|
20 |
+
"all_reduce_dtype": null,
|
21 |
+
"allow_flash_attention": false,
|
22 |
+
"attention_layout": "HSB",
|
23 |
+
"attn_output_transposed": false,
|
24 |
+
"auto_cast_type": "bf16",
|
25 |
+
"batch_size": 1,
|
26 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
27 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
28 |
+
"collectives_layout": "HSB",
|
29 |
+
"continuous_batching": false,
|
30 |
+
"fuse_qkv": true,
|
31 |
+
"group_query_attention": "replicated-heads",
|
32 |
+
"log_softmax_scores": false,
|
33 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
34 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
35 |
+
"output_all_logits": false,
|
36 |
+
"sequence_length": 100,
|
37 |
+
"tp_degree": 2
|
38 |
+
},
|
39 |
+
"num_attention_heads": 2,
|
40 |
+
"num_hidden_layers": 2,
|
41 |
+
"num_key_value_heads": 1,
|
42 |
+
"original_max_position_embeddings": 16384,
|
43 |
+
"partial_rotary_factor": 1.0,
|
44 |
+
"resid_pdrop": 0.0,
|
45 |
+
"rms_norm_eps": 1e-05,
|
46 |
+
"rope_scaling": null,
|
47 |
+
"rope_theta": 250000,
|
48 |
+
"sliding_window": null,
|
49 |
+
"tie_word_embeddings": false,
|
50 |
+
"use_cache": true,
|
51 |
+
"vocab_size": 100352
|
52 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/0dd437c3697ceec3b894.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "yujiepan/qwen2.5-128k-tiny-random",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Qwen2ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 8,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 16,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"max_window_layers": 1,
|
15 |
+
"model_type": "qwen2",
|
16 |
+
"neuron": {
|
17 |
+
"_serialized_key": "HloNeuronConfig",
|
18 |
+
"all_reduce_dtype": null,
|
19 |
+
"allow_flash_attention": true,
|
20 |
+
"attention_layout": "HSB",
|
21 |
+
"attn_output_transposed": false,
|
22 |
+
"auto_cast_type": "fp16",
|
23 |
+
"batch_size": 1,
|
24 |
+
"checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
|
25 |
+
"checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
|
26 |
+
"collectives_layout": "HSB",
|
27 |
+
"continuous_batching": false,
|
28 |
+
"fuse_qkv": false,
|
29 |
+
"group_query_attention": "shard-over-heads",
|
30 |
+
"log_softmax_scores": false,
|
31 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
32 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
33 |
+
"output_all_logits": false,
|
34 |
+
"sequence_length": 100,
|
35 |
+
"tp_degree": 2
|
36 |
+
},
|
37 |
+
"num_attention_heads": 4,
|
38 |
+
"num_hidden_layers": 2,
|
39 |
+
"num_key_value_heads": 2,
|
40 |
+
"rms_norm_eps": 1e-06,
|
41 |
+
"rope_scaling": {
|
42 |
+
"factor": 4.0,
|
43 |
+
"original_max_position_embeddings": 32768,
|
44 |
+
"rope_type": "yarn",
|
45 |
+
"type": "yarn"
|
46 |
+
},
|
47 |
+
"rope_theta": 1000000.0,
|
48 |
+
"sliding_window": 131072,
|
49 |
+
"tie_word_embeddings": false,
|
50 |
+
"use_cache": true,
|
51 |
+
"use_sliding_window": false,
|
52 |
+
"vocab_size": 152064
|
53 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/44c8abc3ecedc8598f8a.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "yujiepan/qwen2.5-128k-tiny-random",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Qwen2ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 8,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 16,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"max_window_layers": 1,
|
15 |
+
"model_type": "qwen2",
|
16 |
+
"neuron": {
|
17 |
+
"_serialized_key": "HloNeuronConfig",
|
18 |
+
"all_reduce_dtype": null,
|
19 |
+
"allow_flash_attention": true,
|
20 |
+
"attention_layout": "HSB",
|
21 |
+
"attn_output_transposed": false,
|
22 |
+
"auto_cast_type": "bf16",
|
23 |
+
"batch_size": 1,
|
24 |
+
"checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
|
25 |
+
"checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
|
26 |
+
"collectives_layout": "HSB",
|
27 |
+
"continuous_batching": false,
|
28 |
+
"fuse_qkv": false,
|
29 |
+
"group_query_attention": "shard-over-heads",
|
30 |
+
"log_softmax_scores": false,
|
31 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
32 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
33 |
+
"output_all_logits": false,
|
34 |
+
"sequence_length": 100,
|
35 |
+
"tp_degree": 2
|
36 |
+
},
|
37 |
+
"num_attention_heads": 4,
|
38 |
+
"num_hidden_layers": 2,
|
39 |
+
"num_key_value_heads": 2,
|
40 |
+
"rms_norm_eps": 1e-06,
|
41 |
+
"rope_scaling": {
|
42 |
+
"factor": 4.0,
|
43 |
+
"original_max_position_embeddings": 32768,
|
44 |
+
"rope_type": "yarn",
|
45 |
+
"type": "yarn"
|
46 |
+
},
|
47 |
+
"rope_theta": 1000000.0,
|
48 |
+
"sliding_window": 131072,
|
49 |
+
"tie_word_embeddings": false,
|
50 |
+
"use_cache": true,
|
51 |
+
"use_sliding_window": false,
|
52 |
+
"vocab_size": 152064
|
53 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/d472f81e5c657f79c2f9.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "yujiepan/qwen2.5-128k-tiny-random",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"Qwen2ForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 8,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 16,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"max_window_layers": 1,
|
15 |
+
"model_type": "qwen2",
|
16 |
+
"neuron": {
|
17 |
+
"_serialized_key": "HloNeuronConfig",
|
18 |
+
"all_reduce_dtype": null,
|
19 |
+
"allow_flash_attention": true,
|
20 |
+
"attention_layout": "HSB",
|
21 |
+
"attn_output_transposed": false,
|
22 |
+
"auto_cast_type": "fp16",
|
23 |
+
"batch_size": 2,
|
24 |
+
"checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
|
25 |
+
"checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
|
26 |
+
"collectives_layout": "HSB",
|
27 |
+
"continuous_batching": true,
|
28 |
+
"fuse_qkv": false,
|
29 |
+
"group_query_attention": "shard-over-heads",
|
30 |
+
"log_softmax_scores": false,
|
31 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
32 |
+
"optimum_neuron_version": "0.2.0.dev6",
|
33 |
+
"output_all_logits": false,
|
34 |
+
"sequence_length": 100,
|
35 |
+
"tp_degree": 2
|
36 |
+
},
|
37 |
+
"num_attention_heads": 4,
|
38 |
+
"num_hidden_layers": 2,
|
39 |
+
"num_key_value_heads": 2,
|
40 |
+
"rms_norm_eps": 1e-06,
|
41 |
+
"rope_scaling": {
|
42 |
+
"factor": 4.0,
|
43 |
+
"original_max_position_embeddings": 32768,
|
44 |
+
"rope_type": "yarn",
|
45 |
+
"type": "yarn"
|
46 |
+
},
|
47 |
+
"rope_theta": 1000000.0,
|
48 |
+
"sliding_window": 131072,
|
49 |
+
"tie_word_embeddings": false,
|
50 |
+
"use_cache": true,
|
51 |
+
"use_sliding_window": false,
|
52 |
+
"vocab_size": 152064
|
53 |
+
}
|
neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9087c314f6fa55423fc6a68ce21fe41a23755e3187e5ca7b6262bb32852394b8
|
3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 103424
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:718f1087fa929f9e8019a422802ca71044b98f0f2737a1946a86fad0e34be7c4
|
3 |
size 103424
|
neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 144384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:433777267e8f7fbdf53f2c5d8ea049016dc5ac3db4ce58c1013158cac1b7fec9
|
3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38e3d7eba41dec4525d00434677bda5b3628a20542f8dac4da11a04905ec8603
|
3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 144384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2998d347ffb4da8ad8ae85b0de34b3708c9b95ab1a3d3a6ef3724d39101214db
|
3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8658d7edca7655f8ccb268fe74d58fb5c0e28ce000767e740e040effb86a324
|
3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 68277
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ae03fcab3b8d64ae3d2021aa19538fd99e346fbcb83856c5605c4708c68a674
|
3 |
size 68277
|
neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 257024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59365c55ac85b4319e6652d308793be3f7770506d36d9a898c3c95994f19ad97
|
3 |
size 257024
|
neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 268322
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bbb2da6842ffd61c068a7dad0614707927a24cdeb69847ace43b44d48701cd8
|
3 |
size 268322
|
neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a436f46163b80d40e3bcdf3710bae3f53053542d3a20179699b69e649a66f94
|
3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 144384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef98b073c4b799ee1cebf406f19161d2bff5d27ebf91a7780cbc3309bb443372
|
3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 144384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bdd6ea59a8bd73ff49bdb300a19c9c89e924f848b46f58fc0350c770d9a1aeb
|
3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 154624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ef05748f5996defa4be3ceb641b0afd5d573c02a707427f39dc316f28b1da5b
|
3 |
size 154624
|
neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 144384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed6c2a97e73faa481af38694827d8c5cc6ba040780fe793cd59333e5d2fa99f6
|
3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 144384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e1a2489ae30050340d2a2d74b2f99e125ce55e7af649559b69436f1bbc1254a
|
3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fcf15abaffa10d5ccb148094aea3c11796dd108eef543d2c169791b865bc239
|
3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 154624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e905bcf110b6673f11faf97fb29fce488607c5de39d5ec35dd07d086353d8a78
|
3 |
size 154624
|
neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 154624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f93c2ed8e16a660030c2962766e8574b694a4d3d1a5d715db9f68b047830da2
|
3 |
size 154624
|
neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 154624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bf30589edfe8ba7f48c911fbf185782d5d19b48913922c257b3dd6ffde9fd18
|
3 |
size 154624
|
neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 103424
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fba211abd63e7ed62b5d57d0a4d73db93c7a282e59a7f9fdabcb6efed59c099f
|
3 |
size 103424
|
neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 144384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29e3d53dcf1b9f2fcb975c725d8fb256a6a3319118511e15ac3ba34e410a6956
|
3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:969bf0657f95b712f172b4bd0e69d6ded550c83713c18ca22e8d2e9194c74ea4
|
3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 103424
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a84ebe3c800ade3de31e6eaf3688059f5b5fdd90e09214dbc0cf2872305adf0
|
3 |
size 103424
|
neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 154624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edc55474ec7083ec768bafb01ae9c71c99047535acdcbefdabead066d414d5f1
|
3 |
size 154624
|
neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 79431
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:994deb4a93f90da2b4fee4d4af87c5b9406751d3ea328ecc995da75cce552a2e
|
3 |
size 79431
|
neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 318464
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1588a7ea46d43352059bb8cdc393a447d1d1784af84f14251ceb80076511e713
|
3 |
size 318464
|
neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 329762
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bec3a1886e11e568e658c14152e45db460a7a44b8fdfe83ff5d3dd864cd81e01
|
3 |
size 329762
|
neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 144384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caa9f885f4efc918b827042f1c34badd68f9c844d90033d4730a878b3a4a9a0b
|
3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 144384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:440323f4af46865bceb7408b37140a999b2e02a4745b6736e1adedb1cd5ff64e
|
3 |
size 144384
|
neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:890ec6a0f753c9f9c9166051fe387da323ae33be211d724b810fbbbe8bc30282
|
3 |
size 134144
|
neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 68279
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77ace701a29dd40a20d0b86dc7d3c572990f7f98aaf5c97d77e8dd383bb4b465
|
3 |
size 68279
|