diff --git a/.gitattributes b/.gitattributes index 620469c4167313456f7b7a62608fd91442074268..9ed156ccca3a288e68870b48ad2bea991f82f083 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2324,3 +2324,16 @@ neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json new file mode 100644 index 0000000000000000000000000000000000000000..dab7dde7fb0b6d9f6a796e2c4d9bd0b1449a0138 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 131072, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 131072, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 131072, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7a327a1138aedf7f679c509b1717ad2e471c2e8e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82a4468f10d230f344133e3d413f1e6e21a9fc56f972c9a4ef54659b147d5a69 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..06f7aab2f291bf22dd3117e208b56315b5f83a6a Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..910e47f244eee704195cfb5d1588096585bba1f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a02e9ccc29e6da3480938961adc21cdd190bad8cd98fa2bcd5a16fc6e185395 +size 448531 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..105e4c9bd59e1aa99c1a0204ec75c2bc3d4d0b6d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c00dee85928a1f9b7ae0871aedd0ebccb032c8700adbeaafe94d93f3a79ef680 +size 25181184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e8fa1fd16b97a00b738414066438e3b279dfd9ab --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd11f9c02b52d7f705aeb808f36b3f9f29a503ad5149d528b8a37c590182676 +size 413964 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e87409e7860463d95346134f216e6aebdfe3a871 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed184f3f77a84fc4dff596704bc40263d775e8c199cfc8eb838654670074f5dc +size 2212864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ada5ead57a487761dbe24c6f352f6ada2f8b2fdc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f7e8c7c4bcf9fbac51e9cb9f08b09a3bb1fe949993e0e221e2926a9b12c0d4f +size 2282608 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5065c2d6f20b55b53d484536c77c6f45fa26e9ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e66ec249ad16c992cbf5a9fe134b429f5ffb31015ad95d48b06163013e87f288 +size 80815 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ae2e95fbfbb4ec5efb3a5158e02a164f026d716 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a57fa508135d4d59208e9c9ee82cbb303908e76ad18de2fa2b03fffc37dfc0f2 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e8255eec4367c12ada88787f2c3d0af4928cb2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fbf03bdca8871117888a18257ec507a634b0f975a450294af733fa147b17504 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4d534b6db871dc9f98c59c55dc91e750501575a1 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5c8523c09490110a7a7acf36b59e374d3ec72163 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af2512847fc86a3f6dea3d6fe8bf73ebef7fe77c7d887f66401fd43b31e24441 +size 82299 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d083a2e357f22f7673c25c8e01d77913ad7c40c1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48eeb80f42a8107b570efe66a70149b11a5ecb522045fcc92ebca3bda66c43b9 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a647f4202d47a26c422d9d0dd7451bda07075ff4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e61fdfca0c1f8eccc9945c8adc0ec7530d73cc6f9995884bbab1cc6d631593ac +size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2f71afec14c66232afa9acc016373deeb50bb217 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:580bf01a47f9cc1abe14e28d6b87924f1be5f6e3761a0139fc65cb48ad7404b5 +size 69051 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ebb953fc11ae45997f205d00efd9e9f61364e76e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14988dfdc69fd92e9d50ee823bd367f24f957b2ab6065b156d83992c506d792d +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb1b28e79b90573deac45c300a6121723d8d3e7c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4df4ddcde0818d80562fddaaec000c6df317e66662ae25e172b1365c890cbd5 +size 80244 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..07ee65f87ef68bd120b44a358ca11274f72de6fd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73efc545834d724084154bc14cc48f9ffced7673348a2f87cfafdd3c6ae08a73 +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..27ced62a8e6fb20fd2f427f997ac582857e43330 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc25da67cce0cd0213ee3dcc3c868aa3e36fdb4d63183e28b0f7a612d50aea1 +size 80244 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..01e8e81ae66feb277fa1e7947814109d370a663e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:114adae7470f048fa548c77a0570ddef2d92f6e2ce59e8a4a23a7b00cdae2676 +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..758f62837d11953b5d6b67769a2e5cb2c835057c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b706b02fcf5d239ef2b5374d3aac8a84187b8463757f5c6bc13251fd24dacab +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..573c8bec4c67b05a683cc6e8cd42f198d21f5b85 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0d573251d32ec16ff35d120043ff40c807e5225e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82617b73b42642502743459932ea40e4eed2cf2ea7e758bc62f1aef72ee5c97 +size 82299 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d82e3fc8f0f5982402b9376acdb447c6760442f4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b82744b2ed277051ad8130521d3371efe39cc491bbd7adf43d8a26d6b6d8265 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5301cc43fc955a7806ab6d0fd8b0bc378692e929 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcaccebd91e984f1c1cb795e8c454f6d9d9b03264e78538f99fc915a62b0a02c +size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..633d3d672e62a35b75c2af986ddf9c528bc1cc17 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae48749b4077f43ab15c97d37dd01f4e74d5474c2933604c854f2ddc1414608 +size 82915 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ab670769cad9f3c99be21dea57864711750965a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ae5693fc64da9378b91fcbe5ea8a2c302a1251bf973955f80f144a7dac329c +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..36322d50fa3df2095fbd7304796d704c9bf2abae --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77e435a403ea581c490682f88d1b14d21514bee867f4cdeb32beda6838d0928e +size 254559