diff --git a/.gitattributes b/.gitattributes index 9ed156ccca3a288e68870b48ad2bea991f82f083..357422c24729fe8243041a9869af437aeabcf3ea 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2337,3 +2337,19 @@ neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json new file mode 100644 index 0000000000000000000000000000000000000000..054a282284428a4d05af9ae4d6808806a7aab632 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d6243e5d4b8e3d9fb0d15be007b78f7087594c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json new file mode 100644 index 0000000000000000000000000000000000000000..da020dfa113114e31fb18ef15a7f579b9887749e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json new file mode 100644 index 0000000000000000000000000000000000000000..7a0150d747f7111bb7d09073bb9dd6f632b6de5c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7944191e21d64907a94ba6b5d12c4fa10e63e1ac --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ec18053447cb1cb442bcbaf59b49fa97c94742a8a19f2dd46422c0d267e465 +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2fb9fdcc9f931f5579ddf232fdad18f8e8c41d82 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4b2e055f4587f249ca92d4e481b64e432e7ef85f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ab9a9bc8b76147bb93389fa66a068ae4c026c6c315202afb6f1d545d194317 +size 45682 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e7d7011602f07a9edc9bf084536c2fd24a792e47 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a121861eb8eff7e1a738ec932e6c457cabbf469f83f7c5ce0ad03a4523a73670 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cbd9791e4f13766a60b00397f93642f4c27802ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52fb365e8e4d3c997bd2477f41da3bfa46d08fd899a9ff115620c9e1a9a88932 +size 162290 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0c6af5f04f59e9b17083d7c10bed6bfc49081ff --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e8148ead06342f4043259e16a8c99b3d5ea22f0a254da9cff35dd9a98a11ed5 +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6e37af12b5260fbd338350f7e706806702f22696 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ccdba1e68b2f4f102baef4d511f5438ee0e0d3fc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a17898121f429cebb326d20cf0cf3bad35bc578e61f0e538cc86f32e226fe7e +size 42844 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b1714398e11ee2141fc4a374f39cf3eb7256963a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6070a18334fd7917ac0f734a57eb2e5faffd2aadfd00f37ce124c4d011545d3c +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..677c422431e1773c81e112a267e1429e11bf6ae5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e9655d393e7edc003fd49e54ad3933f831525d8b4dbc2f8ffd75aee40f1841 +size 43178 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..837be56a9c9c97288ab7310ef68119984d5c3b11 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c9399329c2ede8a1c26b8239a26f682070553b835ce000d8308568a1848b52 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0ebee16350c14edc8ccac8a7f7d40bd6e5c8c9b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e075c7397ee8a0808897a32296a37ab9fd3074d51eafcd3c50b9489c1a22b266 +size 82489 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..da22242427ee4d6c26d08dbac653048b423cfd50 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71c077e93530edacfde4757021a076cd10a19fed69acff91b509e1d28170397 +size 461824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a83b93d39ad02a00af95abc93dc39b0c45c2878c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e62063b27c5ad62e19fdc90ad60db49316407efb7d7c5db1b20458df13f0eac +size 469663 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7303d18f46979d2ed04959b739a487b29b127844 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2f25bd3c3845ff49cf885db664160bf05fd476ea4200b73baebd56f479a9fb +size 90431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3d6926a7563b56fbd719501e5530fadf5ace1c78 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97453c1d5b9445d40a9b0bae6a69b41962f3eab552123ba9ac8c6414d83b3d0f +size 213853184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8b8c259dc59696cc7e2b534f8bb8619a53d9cb59 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4416339d0e2e57324d94a492d9e3b4b2fea4c87d7e20b2ed96ff53b359aa427a +size 45682 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..88ff6130b565af752a4a79896bd4f7ce1e6e98f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3c9104da3cf8214de132b364b60322b60bd67b9e4ef346b7885b81c96ec542 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..dacdcdc12219618df51d5e1c0a49fe7e8a0116eb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e167b02ad686e26c9f6c458e673babe22cc1b2ee10b71a0988f3e53987385c +size 162290 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4d49a653b9f5cdf628ffe2e64d263db031387df6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25c8cb2b38101e7f69c2485fafcdab296c5befbbf1920020db0cdacc9283a0a2 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e5e5c873b2bd75f2ca4e235e10fe58c3cbcf7455 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e495aa83ee058fdcd936d45bd17cce0673306b5b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c325ed92a03a92f9ed903edf1f90308f0d42c263b599462a74ab76b8eb30995e +size 46073 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..03267f8ca9de48553789516f4cbee2dc0a8eff05 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7953347c7b789e29f12296126d63618feedb9936e098aaa92e88ec53908c267 +size 175104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9e7d8725b1e26e10c920c36418a26b9ba599191f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2665c6252208494148151248fcf5a9dcb49a8b73777664b487b3c0c787efdf +size 182770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..57bfbf56a6575adde228f1e1b7089ee13caf2dd3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1899753a61026cc1113a9fd8df0944bb76d26fd38bdcae6a6ccceab7e6ae8e20 +size 376914 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..677f8d909dd5751f548f03f559d98d599a25da77 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbba648c2f71cd2802e87bae0d44a2c5fbcdaee7afb6740036b29b61981e5d9f +size 2151424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1007b553e7ad60926cfdbf2e28d824cc1f242b52 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70bf4145ee5b450e3b15e73416d4f5ca8d109e2daa00bbbc3f8214cae665aea6 +size 2221053 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..324ba9969baf5a425eeb63b029393a709a6aad13 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c04b52a9d7b1c44571496b65eab600bb1b385a3cbc7f8e092e32fad992e307 +size 410647 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0cf4bb7c9108f2548d1d4c116a74c04efbf5a813 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c22af2cfd936fc83ffbb931e7e19b7e96723c70ab550b4ea5385d9f61586e279 +size 25140224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..22ee441afcfb5d7500e2955ad50e823878b0e474 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c28329f6cf4c41002edca947bc9fe238dcf678fa9015e197c84adfd624aefd +size 42844 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..47aff8bbdb096e649586f6ec0d42117bc8954452 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7713e8e6e6447588def54781bc6347697462ad221892df9ec388a5f800c315 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b84a46840ae17f1b9b19296d48cc9ad8b9d243a8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa5cb11090c0db156b73c67514fecc774180253d4980064595632b1d0e7eb7a +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5de76fe23744e18849755ddce914b3366b98c5eb Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..14142d4be6ba89f79cf9e4c64ef438cde1eac01b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c114d00c5b63392debe1575b94c9cd5fb37da72b776dda342032b737f9dc67 +size 69044 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b52a3332bce4868684039c4278651a41af10f212 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ec02bf5189d2a56eb101d3dc9d8918f10608b22c20c1dfb422650fa83b6f9b +size 1158144