optimum-internal-testing-user commited on
Commit
13d1a53
·
verified ·
1 Parent(s): 9113df1

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/394ba8149659ea39ae01.json +73 -0
  2. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3f77bb65ae5a6b9c96e0.json +73 -0
  3. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4d418dd59449dde47dcb.json +73 -0
  4. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/llama/llamafactory/tiny-random-Llama-3/7c28c8a88c5d68b7a0e5.json +77 -0
  5. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/llama/llamafactory/tiny-random-Llama-3/d2cee631571498108c74.json +77 -0
  6. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/llama/llamafactory/tiny-random-Llama-3/e64ce62517cdbdc71dae.json +77 -0
  7. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/llama/unsloth/Llama-3.2-1B-Instruct/e0240f41c55198f5b4a2.json +78 -0
  8. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/mixtral/dacorvo/Mixtral-tiny/cc6e87ff362c7755dfc3.json +73 -0
  9. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/mixtral/dacorvo/Mixtral-tiny/ea665141c31d4cbbb111.json +73 -0
  10. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/mixtral/dacorvo/Mixtral-tiny/eea019da9ffc7619dda7.json +73 -0
  11. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/phi3/yujiepan/phi-4-tiny-random/46c220062dd86ae6025f.json +74 -0
  12. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/phi3/yujiepan/phi-4-tiny-random/773fae5d2cf3af166253.json +74 -0
  13. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/phi3/yujiepan/phi-4-tiny-random/ec89efa8bb14d3c6915a.json +74 -0
  14. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/1c7306d39a56777c651d.json +75 -0
  15. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/5fefeb68e7e50cd1787e.json +75 -0
  16. neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/8fdb7edba36a465a665f.json +75 -0
  17. neuronxcc-2.18.121.0+9e31e41a/MODULE_02f045f6902463c49bce+84f3e719/model.hlo_module.pb +1 -1
  18. neuronxcc-2.18.121.0+9e31e41a/MODULE_02f045f6902463c49bce+84f3e719/model.neff +1 -1
  19. neuronxcc-2.18.121.0+9e31e41a/MODULE_046ebd86c77dc4a94c6c+5be477de/model.neff +1 -1
  20. neuronxcc-2.18.121.0+9e31e41a/MODULE_046ebd86c77dc4a94c6c+5be477de/wrapped_neff.hlo +1 -1
  21. neuronxcc-2.18.121.0+9e31e41a/MODULE_0bbe60dde8eaacbc8218+5be477de/model.neff +1 -1
  22. neuronxcc-2.18.121.0+9e31e41a/MODULE_0bbe60dde8eaacbc8218+5be477de/wrapped_neff.hlo +1 -1
  23. neuronxcc-2.18.121.0+9e31e41a/MODULE_0f88705903403514996d+431f5505/model.neff +0 -0
  24. neuronxcc-2.18.121.0+9e31e41a/MODULE_1215feca19e3858f9ef6+84f3e719/model.hlo_module.pb +1 -1
  25. neuronxcc-2.18.121.0+9e31e41a/MODULE_1215feca19e3858f9ef6+84f3e719/model.neff +1 -1
  26. neuronxcc-2.18.121.0+9e31e41a/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff +1 -1
  27. neuronxcc-2.18.121.0+9e31e41a/MODULE_21d49e164d26352245e5+84f3e719/model.hlo_module.pb +1 -1
  28. neuronxcc-2.18.121.0+9e31e41a/MODULE_21d49e164d26352245e5+84f3e719/model.neff +1 -1
  29. neuronxcc-2.18.121.0+9e31e41a/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.neff +1 -1
  30. neuronxcc-2.18.121.0+9e31e41a/MODULE_30c8e5dffb371f5a2fc0+5be477de/wrapped_neff.hlo +1 -1
  31. neuronxcc-2.18.121.0+9e31e41a/MODULE_331276a07386ee77d52e+431f5505/model.neff +0 -0
  32. neuronxcc-2.18.121.0+9e31e41a/MODULE_3688ee5eab5a3273c651+84f3e719/model.hlo_module.pb +1 -1
  33. neuronxcc-2.18.121.0+9e31e41a/MODULE_3688ee5eab5a3273c651+84f3e719/model.neff +1 -1
  34. neuronxcc-2.18.121.0+9e31e41a/MODULE_3a1bd8b5ecc619e49cdb+5be477de/model.neff +1 -1
  35. neuronxcc-2.18.121.0+9e31e41a/MODULE_3a1bd8b5ecc619e49cdb+5be477de/wrapped_neff.hlo +1 -1
  36. neuronxcc-2.18.121.0+9e31e41a/MODULE_3ddc835c8aaca5fb3605+84f3e719/model.hlo_module.pb +1 -1
  37. neuronxcc-2.18.121.0+9e31e41a/MODULE_3ddc835c8aaca5fb3605+84f3e719/model.neff +1 -1
  38. neuronxcc-2.18.121.0+9e31e41a/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff +1 -1
  39. neuronxcc-2.18.121.0+9e31e41a/MODULE_4c948ba1d275cea9b124+431f5505/model.neff +0 -0
  40. neuronxcc-2.18.121.0+9e31e41a/MODULE_53e7ea3b124fbe95f047+84f3e719/model.hlo_module.pb +1 -1
  41. neuronxcc-2.18.121.0+9e31e41a/MODULE_53e7ea3b124fbe95f047+84f3e719/model.neff +1 -1
  42. neuronxcc-2.18.121.0+9e31e41a/MODULE_57b107bd0499cc4986ac+ca355898/model.neff +1 -1
  43. neuronxcc-2.18.121.0+9e31e41a/MODULE_57b107bd0499cc4986ac+ca355898/wrapped_neff.hlo +1 -1
  44. neuronxcc-2.18.121.0+9e31e41a/MODULE_5a10198534c5f2725fd7+5be477de/model.neff +1 -1
  45. neuronxcc-2.18.121.0+9e31e41a/MODULE_5a10198534c5f2725fd7+5be477de/wrapped_neff.hlo +1 -1
  46. neuronxcc-2.18.121.0+9e31e41a/MODULE_5a81b67dd74f9d5520b3+5be477de/model.neff +1 -1
  47. neuronxcc-2.18.121.0+9e31e41a/MODULE_5a81b67dd74f9d5520b3+5be477de/wrapped_neff.hlo +1 -1
  48. neuronxcc-2.18.121.0+9e31e41a/MODULE_61794b8717d8b5a8853d+431f5505/model.neff +0 -0
  49. neuronxcc-2.18.121.0+9e31e41a/MODULE_64950c85776a119cdf83+c2248236/model.hlo_module.pb +1 -1
  50. neuronxcc-2.18.121.0+9e31e41a/MODULE_64950c85776a119cdf83+c2248236/model.neff +1 -1
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/394ba8149659ea39ae01.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev3",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3f77bb65ae5a6b9c96e0.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev3",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4d418dd59449dde47dcb.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 2,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 2,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": false,
47
+ "optimum_neuron_version": "0.3.0.dev3",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/llama/llamafactory/tiny-random-Llama-3/7c28c8a88c5d68b7a0e5.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev3",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/llama/llamafactory/tiny-random-Llama-3/d2cee631571498108c74.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev3",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/llama/llamafactory/tiny-random-Llama-3/e64ce62517cdbdc71dae.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev3",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/llama/unsloth/Llama-3.2-1B-Instruct/e0240f41c55198f5b4a2.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev3",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/mixtral/dacorvo/Mixtral-tiny/cc6e87ff362c7755dfc3.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev3",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/mixtral/dacorvo/Mixtral-tiny/ea665141c31d4cbbb111.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev3",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/mixtral/dacorvo/Mixtral-tiny/eea019da9ffc7619dda7.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 2,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev3",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/phi3/yujiepan/phi-4-tiny-random/46c220062dd86ae6025f.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev3",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 2,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 1,
64
+ "original_max_position_embeddings": 16384,
65
+ "partial_rotary_factor": 1.0,
66
+ "resid_pdrop": 0.0,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 250000,
70
+ "sliding_window": null,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "vocab_size": 100352
74
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/phi3/yujiepan/phi-4-tiny-random/773fae5d2cf3af166253.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev3",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 2,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 1,
64
+ "original_max_position_embeddings": 16384,
65
+ "partial_rotary_factor": 1.0,
66
+ "resid_pdrop": 0.0,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 250000,
70
+ "sliding_window": null,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "vocab_size": 100352
74
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/phi3/yujiepan/phi-4-tiny-random/ec89efa8bb14d3c6915a.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev3",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 2,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 1,
64
+ "original_max_position_embeddings": 16384,
65
+ "partial_rotary_factor": 1.0,
66
+ "resid_pdrop": 0.0,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 250000,
70
+ "sliding_window": null,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "vocab_size": 100352
74
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/1c7306d39a56777c651d.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
24
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev3",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 4,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": {
64
+ "factor": 4.0,
65
+ "original_max_position_embeddings": 32768,
66
+ "rope_type": "yarn",
67
+ "type": "yarn"
68
+ },
69
+ "rope_theta": 1000000.0,
70
+ "sliding_window": 131072,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "use_sliding_window": false,
74
+ "vocab_size": 152064
75
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/5fefeb68e7e50cd1787e.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
24
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
25
+ "continuous_batching": true,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 2,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev3",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 4,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": {
64
+ "factor": 4.0,
65
+ "original_max_position_embeddings": 32768,
66
+ "rope_type": "yarn",
67
+ "type": "yarn"
68
+ },
69
+ "rope_theta": 1000000.0,
70
+ "sliding_window": 131072,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "use_sliding_window": false,
74
+ "vocab_size": 152064
75
+ }
neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev3/qwen2/yujiepan/qwen2.5-128k-tiny-random/8fdb7edba36a465a665f.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
24
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.18.121.0+9e31e41a",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev3",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 4,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": {
64
+ "factor": 4.0,
65
+ "original_max_position_embeddings": 32768,
66
+ "rope_type": "yarn",
67
+ "type": "yarn"
68
+ },
69
+ "rope_theta": 1000000.0,
70
+ "sliding_window": 131072,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "use_sliding_window": false,
74
+ "vocab_size": 152064
75
+ }
neuronxcc-2.18.121.0+9e31e41a/MODULE_02f045f6902463c49bce+84f3e719/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0803cb5e5e23b32611e1d7a9f6868f512ce57b77d3a0a29c8f986b5cce743321
3
  size 46622
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb35be7b4c769876dd61e1a759d13da8a6c34881ccd1757aaa64f7039b95ea8
3
  size 46622
neuronxcc-2.18.121.0+9e31e41a/MODULE_02f045f6902463c49bce+84f3e719/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5230dfb96ab51bf16cb02ac63f3e8ac41c847cb011db0f82f5001847f758f0e6
3
  size 144384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24adadd3501a511aa2f9f1dbbd493488f53843ad2daa88d6920bcd471b86b608
3
  size 144384
neuronxcc-2.18.121.0+9e31e41a/MODULE_046ebd86c77dc4a94c6c+5be477de/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b5cd54ec5279c4dde9e6475b7cfaeda2aab62e3e2a92fc242c1a56d05049874
3
  size 236544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5783bd343ae3fdcb8313d9dc3b23d3a9356c6f8dc24505e32bac20b2b7adba6a
3
  size 236544
neuronxcc-2.18.121.0+9e31e41a/MODULE_046ebd86c77dc4a94c6c+5be477de/wrapped_neff.hlo CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc1dd826502c3a5e1b83a630e5959c0f82c6ac46c9977cc351feebcc5a0fe5c1
3
  size 247153
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b455edb9ad0d00f2e1dbe356f9574eeb2ab17f99756c60771074e81f234af3
3
  size 247153
neuronxcc-2.18.121.0+9e31e41a/MODULE_0bbe60dde8eaacbc8218+5be477de/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60e93f84fcd9ed08889336789a60b971c3c21a507ccc034decf8161453cd443a
3
  size 144384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c5c5bc8f5599a78c6434bc2b088dcd7c9dd1df1b0636bcac6f7ffbc702d1f6
3
  size 144384
neuronxcc-2.18.121.0+9e31e41a/MODULE_0bbe60dde8eaacbc8218+5be477de/wrapped_neff.hlo CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:885b920ee07701ecbd3ad5016e0df8b9fda3c38ce8ee8418f9978ccc29609f40
3
  size 152045
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c509b3f043adb90c90d629be73780694aff7b0f465526f919c5de301bceb14d4
3
  size 152045
neuronxcc-2.18.121.0+9e31e41a/MODULE_0f88705903403514996d+431f5505/model.neff CHANGED
Binary files a/neuronxcc-2.18.121.0+9e31e41a/MODULE_0f88705903403514996d+431f5505/model.neff and b/neuronxcc-2.18.121.0+9e31e41a/MODULE_0f88705903403514996d+431f5505/model.neff differ
 
neuronxcc-2.18.121.0+9e31e41a/MODULE_1215feca19e3858f9ef6+84f3e719/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:527ff38e542de60af8633e078ec3c5c57fbadbe30d74ee1aac584834b1615288
3
  size 53803
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5820a8b0f2c5de27eeb3167bbd6c7ad99cbc6e4250b487d89a102ef1a236af
3
  size 53803
neuronxcc-2.18.121.0+9e31e41a/MODULE_1215feca19e3858f9ef6+84f3e719/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6869ac00c3e40d1cf9945744471e75dd98b9e6b9571289427d0902e068456fc
3
  size 164864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40c87ee01d8ab4c3c744687040b92c2674201d2fedc2f318e1d91a5c8213e81a
3
  size 164864
neuronxcc-2.18.121.0+9e31e41a/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad2f90d77e4a5f3f591bd8b664f2daa39b197f27e7c946650e8776e7a9a5d0fc
3
  size 103424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c61151ca6c4e98324d2dd643e5ebd2dc7136090fc82c49ecd0265025d98203c9
3
  size 103424
neuronxcc-2.18.121.0+9e31e41a/MODULE_21d49e164d26352245e5+84f3e719/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b25c984cce04ac74df62cd1dc284c28470313957cf8e49fea854099b9378ffcf
3
  size 448722
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77bd2663c3810b6b6d6e3248f235ad11eaaea050da0bf32e10c8f17c3299efe2
3
  size 448722
neuronxcc-2.18.121.0+9e31e41a/MODULE_21d49e164d26352245e5+84f3e719/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fafb376e0631f5d62079fe5c78eb54c9982815c07a92e4dd51e53e801f78048d
3
  size 32257024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52a19cb8631463a2a4f2eb1f3e41aba5fdb2b297355d92f3b99e6bcf0c79691e
3
  size 32257024
neuronxcc-2.18.121.0+9e31e41a/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08bd07117299b6a109bbdcc2de25721ecca088a812d7ccf82129137817b69a44
3
  size 164864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f5e58e36a41c0b089557aa450c98b3d4a45c290dbb22a12e7d4335fef917f2d
3
  size 164864
neuronxcc-2.18.121.0+9e31e41a/MODULE_30c8e5dffb371f5a2fc0+5be477de/wrapped_neff.hlo CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2b61c4890b4861116cc151d02681a4e10deda4aaf1728a6a24853b73747001f
3
  size 172461
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:479b0c170ca81520f7c4fcd3aea24c3a0c85995f90b64f42619db6d90ff23302
3
  size 172461
neuronxcc-2.18.121.0+9e31e41a/MODULE_331276a07386ee77d52e+431f5505/model.neff CHANGED
Binary files a/neuronxcc-2.18.121.0+9e31e41a/MODULE_331276a07386ee77d52e+431f5505/model.neff and b/neuronxcc-2.18.121.0+9e31e41a/MODULE_331276a07386ee77d52e+431f5505/model.neff differ
 
neuronxcc-2.18.121.0+9e31e41a/MODULE_3688ee5eab5a3273c651+84f3e719/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a84e2c4df78dd443728551a6323c507c957175fc24244fad6ae2771ef852433
3
  size 83856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dad13102e9bd32be98f125c4efd1ec18611c949b07e82e8e2b4a89fbbe18cc57
3
  size 83856
neuronxcc-2.18.121.0+9e31e41a/MODULE_3688ee5eab5a3273c651+84f3e719/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ba992580776ec3af728322a3b16baf4a7fc3238ff50b75431e123b378e170a0
3
  size 175104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e8664f1d6106174289c8218f47a1fde759745930f0726cf1324564b5f0336b3
3
  size 175104
neuronxcc-2.18.121.0+9e31e41a/MODULE_3a1bd8b5ecc619e49cdb+5be477de/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9c035847659ef48195627fa9b0edd5de00a79c72a7f9d5f84fa4ed48dd96e8a
3
  size 216064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fab9dac62fde66d29ca013fd3635c66d7776f2357fcabf475227c0ce23479fa7
3
  size 216064
neuronxcc-2.18.121.0+9e31e41a/MODULE_3a1bd8b5ecc619e49cdb+5be477de/wrapped_neff.hlo CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:508c3810dcee0650a55c552d9a1d20bc4b687866e5363fea46a5eb1eed230ee7
3
  size 223770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d0da1d38f365fd2583898c15a59ddee1b122e9fecaab4943efdf20aae8bd1fa
3
  size 223770
neuronxcc-2.18.121.0+9e31e41a/MODULE_3ddc835c8aaca5fb3605+84f3e719/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e47f49ac892e885be490920dad61b26edcec9067ceecd3a32a0a409e6428becf
3
  size 87721
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40969194a5f66b5cc929ec86015e06383d410f8c7236c19178b919c26c782b86
3
  size 87721
neuronxcc-2.18.121.0+9e31e41a/MODULE_3ddc835c8aaca5fb3605+84f3e719/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d54b77c463aabfc605a0f320223ce7eb30f740b6e5a316eedaa73118a4430ee
3
  size 246784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cd0f7a4fac7ae2fef50d6502a1371140fbd8156b13230d52ff5e9d72c2ff5ac
3
  size 246784
neuronxcc-2.18.121.0+9e31e41a/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8b7088016b28e05bc300a1c8ef5dafdcf09c8ace02575054444ebaa57baf00e
3
  size 1158144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35653481e484c459b14a7ccb13532dd346e12ee110482b9052fd32f77f900e8e
3
  size 1158144
neuronxcc-2.18.121.0+9e31e41a/MODULE_4c948ba1d275cea9b124+431f5505/model.neff CHANGED
Binary files a/neuronxcc-2.18.121.0+9e31e41a/MODULE_4c948ba1d275cea9b124+431f5505/model.neff and b/neuronxcc-2.18.121.0+9e31e41a/MODULE_4c948ba1d275cea9b124+431f5505/model.neff differ
 
neuronxcc-2.18.121.0+9e31e41a/MODULE_53e7ea3b124fbe95f047+84f3e719/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9db8a869bcc82627d232ef994835bbbfcaee6907910ffa4b8474e57b46c44aef
3
  size 80405
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5fc5821a18a923d93067a2dab3deb8a1ea77b78e5d3a2be6096277f02257ab
3
  size 80405
neuronxcc-2.18.121.0+9e31e41a/MODULE_53e7ea3b124fbe95f047+84f3e719/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:838abe20817ce441de934d90c5006bcbcf7d7d57e68feb15129318c8051c2bd3
3
  size 205824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c959eb0a7e13ba0edc9e91dd947d82d1e12766b067f60d0de7accff36c3d365d
3
  size 205824
neuronxcc-2.18.121.0+9e31e41a/MODULE_57b107bd0499cc4986ac+ca355898/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c7db1459ef8bd7be49a3f56b30f1daddb6a4488d7940df1b2fef1c69dcd0aa6
3
  size 257024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4974299d8936bd1519b4df3ca6b7e88e9da692519955e41e3537d33fb61fbaeb
3
  size 257024
neuronxcc-2.18.121.0+9e31e41a/MODULE_57b107bd0499cc4986ac+ca355898/wrapped_neff.hlo CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20493d9e2fde14fe06cfcee45e4a1f5f8c780d6e47c7df3996dc3ca1f6502849
3
  size 268322
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71c4e7236b91d6e521abbd93b345612cea0e2ae824d35f368ef40d711a6771a4
3
  size 268322
neuronxcc-2.18.121.0+9e31e41a/MODULE_5a10198534c5f2725fd7+5be477de/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0746e9e3250dcf63f7996beed4c4ae2b68b9f338223f1e5a183772052e3318db
3
  size 185344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ae7afef0b01868c6cddc354d06639bc0f646abcb15ac11076e0ffdc9132f6e0
3
  size 185344
neuronxcc-2.18.121.0+9e31e41a/MODULE_5a10198534c5f2725fd7+5be477de/wrapped_neff.hlo CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1de43537186c063d0753bca387178e708ea05ba20a821eb97f258f432886e849
3
  size 195507
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f75279e91322df84c7f2c47dedeed1efa3f0bb1a6a865f0e26a45e76a1bce1c4
3
  size 195507
neuronxcc-2.18.121.0+9e31e41a/MODULE_5a81b67dd74f9d5520b3+5be477de/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71149026777d06d0cc0885f4527aab8b53141395290e5f9ed57611de45157720
3
  size 185344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05808bab6fd6a5b1c3324df06931d8e1e92fe076a37cd170b683b8e094f4c9b2
3
  size 185344
neuronxcc-2.18.121.0+9e31e41a/MODULE_5a81b67dd74f9d5520b3+5be477de/wrapped_neff.hlo CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2900d852844bc8462d53b0987336ecbb2619e53f461ab8eb4d54a0ecfc4ec0d9
3
  size 193114
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2fc774aeb66814bd527397e18b098b034f48bfc5354aea68fae0ca100664b14
3
  size 193114
neuronxcc-2.18.121.0+9e31e41a/MODULE_61794b8717d8b5a8853d+431f5505/model.neff CHANGED
Binary files a/neuronxcc-2.18.121.0+9e31e41a/MODULE_61794b8717d8b5a8853d+431f5505/model.neff and b/neuronxcc-2.18.121.0+9e31e41a/MODULE_61794b8717d8b5a8853d+431f5505/model.neff differ
 
neuronxcc-2.18.121.0+9e31e41a/MODULE_64950c85776a119cdf83+c2248236/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb65cf7fc2975320dabce897b5f50369c9bdb4062f74a252f32ca0418771b03f
3
  size 82456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7d0abc6789528ae294d14222f1a8453446c6a4bf4fd827cdcf9ceb646cc94be
3
  size 82456
neuronxcc-2.18.121.0+9e31e41a/MODULE_64950c85776a119cdf83+c2248236/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd168a7d4d1fe94001bd9677413bc3309d5e07b232f1c1239b9907de94cb6f98
3
  size 420864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df60f36c3a3deb0a83084f779d69657336d0a6e6e5ca6d55961e365d9c296da2
3
  size 420864