dacorvo HF Staff commited on
Commit
aa87966
·
verified ·
1 Parent(s): 67edfaf

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +25 -0
  2. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json +73 -0
  3. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json +73 -0
  4. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json +73 -0
  5. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json +78 -0
  6. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json +71 -0
  7. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json +71 -0
  8. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json +71 -0
  9. neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/compile_flags.json +1 -0
  10. neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.done +0 -0
  11. neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.hlo_module.pb +3 -0
  12. neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff +3 -0
  13. neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/compile_flags.json +1 -0
  14. neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.done +0 -0
  15. neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.hlo_module.pb +3 -0
  16. neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff +3 -0
  17. neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/compile_flags.json +1 -0
  18. neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.done +0 -0
  19. neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.hlo_module.pb +3 -0
  20. neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff +3 -0
  21. neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo +3 -0
  22. neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/compile_flags.json +1 -0
  23. neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.done +0 -0
  24. neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.hlo_module.pb +3 -0
  25. neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff +3 -0
  26. neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/compile_flags.json +1 -0
  27. neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.done +0 -0
  28. neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.hlo_module.pb +3 -0
  29. neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff +3 -0
  30. neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/compile_flags.json +1 -0
  31. neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.done +0 -0
  32. neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.hlo_module.pb +3 -0
  33. neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff +3 -0
  34. neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo +3 -0
  35. neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/compile_flags.json +1 -0
  36. neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.done +0 -0
  37. neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.hlo_module.pb +3 -0
  38. neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff +3 -0
  39. neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/compile_flags.json +1 -0
  40. neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.done +0 -0
  41. neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.hlo_module.pb +3 -0
  42. neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff +3 -0
  43. neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo +3 -0
  44. neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/compile_flags.json +1 -0
  45. neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.done +0 -0
  46. neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.hlo_module.pb +3 -0
  47. neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff +3 -0
  48. neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/compile_flags.json +1 -0
  49. neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/model.hlo_module.pb +3 -0
  50. neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/compile_flags.json +1 -0
.gitattributes CHANGED
@@ -2965,3 +2965,28 @@ neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.neff fi
2965
  neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2966
  neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2967
  neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2965
  neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2966
  neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2967
  neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
2968
+ neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
2969
+ neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
2970
+ neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2971
+ neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2972
+ neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
2973
+ neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
2974
+ neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2975
+ neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2976
+ neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
2977
+ neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2978
+ neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2979
+ neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
2980
+ neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2981
+ neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2982
+ neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
2983
+ neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2984
+ neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2985
+ neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
2986
+ neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2987
+ neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2988
+ neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
2989
+ neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
2990
+ neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2991
+ neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2992
+ neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 4,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": false,
47
+ "optimum_neuron_version": "0.3.0.dev1",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 5000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49155
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev1",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 5000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49155
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.3-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 0.0078125,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 12800,
16
+ "logits_scaling": 16.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
28
+ "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev1",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49159
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 24,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 128,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 128,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev1",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 128,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 24,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 24,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 128,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 128,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 128,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 24,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 128,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 128,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 128,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 1,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 128,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 128,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 128,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 1,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:892c32ed92799e95d2ab40d5a34738b04d2f7143e1f4624f0929e59758e23c64
3
+ size 535749
neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81343b6030b10b8e0afc06e398050dec11ff840e24955618dc85d1e7fbc5d78c
3
+ size 1772544
neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7da48877712718e281765474b71b2b7197a4394e8f1c2aa326df846e21f1d6ac
3
+ size 563496
neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf4a8f9b622920e767a57582e10bc9ec2f1aaa1fa3e330093ba80d09a509cf8b
3
+ size 1158144
neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63751a0bb5e7aa9b0373ea81eb9ba1650b5eb601b4169dd868046c083c38c781
3
+ size 971593
neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d945a6bef3d712dbcb309c056d799eaefa88d0b05342e5420d0f6d014cbc82e
3
+ size 3369984
neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2019af15f575af164d7ff719706b72c086c5cadae8453404f2cbd0a16472fcec
3
+ size 3543112
neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd95ce79e3100f0f971325b4e1a0b5d080fb4443153405b5869b9cb4c8870c91
3
+ size 378945
neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe54150ac740c2430e66289586ee7d6049c299eeb3e0d2db71b5ffe42c844a8a
3
+ size 564224
neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b003122b37563ff19e4c2d28bc52b81a23c0510a6e7dd43292f0f777bed842b
3
+ size 1887935
neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e6aeaf87f5edff5233503800ffd18f97c872b18ca3ff2ad3ecc68f0305e905c
3
+ size 25160704
neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f3f22f5a5aed80511ff092bb6c5efd7cee45e557c3b5bd10bdc47754f159d3b
3
+ size 540237
neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d970a56f621f6c119ee6f8fdebf4107a3c8d50fc760b157ce0f1a87412d41a78
3
+ size 553984
neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:291b718353dffab0649ec844f053b3402735e48e4fff5c9bef9275c03effb4fe
3
+ size 688151
neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b711200dfd242da19e652bc20b035b0c3436c19ecf253b6a5f2d0f22febb349a
3
+ size 172324
neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d97120bee25e3b2ff4a812692964b8c62248afb0d20652b898baa6856e8966f
3
+ size 2397184
neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b2ed73a544b049277a3e081d48cd74f24a067c1c0d8df51acf59a9db985d1e
3
+ size 2400727
neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85dd8e9800800942b8f4f44efc280befa8d4d41e1d9a489a9440de4d6f0e3768
3
+ size 4885504
neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a8781e8141bf01e24b6e3d3eb546d7fe388d234ecfd5bca446268e7a007ed7d
3
+ size 5058549
neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:401233f1288017ae6d1a5139e126e22161c3152a6145c923b5b78040dd3f8c83
3
+ size 171444
neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2eae1d3728ce2c5fe18d5df1ddec5217ef5ad2b4a0973aea8e3e9707bcc9507
3
+ size 7353344
neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33a1cd799da49294617b2445cab66236a026ac0e51133ae310607b88893c2e3
3
+ size 945923
neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"