diff --git a/.gitattributes b/.gitattributes index 1b5e281bd7be4fa9d39bb2fa3a127ccd888aed03..33951935783236655a9343838c40fec3954167df 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2255,3 +2255,47 @@ neuronxcc-2.17.194.0+d312836f/MODULE_892a0bb27ce39228be75+613edded/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_c24af7fcf05443daf3b7+613edded/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_e3aea5d1517d9896fd33+613edded/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_fd6170cedb4fe53c8433+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/700f3d6831b945b35649.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/700f3d6831b945b35649.json new file mode 100644 index 0000000000000000000000000000000000000000..76af17222f6ee754e11b18743e7cb909b1742a7f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/700f3d6831b945b35649.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "ctx_batch_size": 8, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "is_continuous_batching": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 8, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/b095f4e1a8142588f557.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/b095f4e1a8142588f557.json new file mode 100644 index 0000000000000000000000000000000000000000..8175d5a49083b57b3ab87d891b8e709ead55e146 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/b095f4e1a8142588f557.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 8, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev4", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/4c45a685188be76510ca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/4c45a685188be76510ca.json new file mode 100644 index 0000000000000000000000000000000000000000..e311c98319579323a4ea30acceb767ef8db60f76 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/4c45a685188be76510ca.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "ctx_batch_size": 8, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "is_continuous_batching": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 8, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/5719e2431a03a3a11a76.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/5719e2431a03a3a11a76.json new file mode 100644 index 0000000000000000000000000000000000000000..f4856e628eb3974c3b4597ec51a6e1e4e90a6460 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/5719e2431a03a3a11a76.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "ctx_batch_size": 8, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "is_continuous_batching": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 8, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6ced15a046147a7195f4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6ced15a046147a7195f4.json new file mode 100644 index 0000000000000000000000000000000000000000..660ea9db811180c81df88f2f9c0ca9e842c14f30 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6ced15a046147a7195f4.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "ctx_batch_size": 8, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "is_continuous_batching": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 8, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": true + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/cdcd648610c19bcc53eb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/cdcd648610c19bcc53eb.json new file mode 100644 index 0000000000000000000000000000000000000000..002d6ab43fbe91866e831334a7a5b05a28c4b6d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/cdcd648610c19bcc53eb.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": true, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "ctx_batch_size": 8, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "is_continuous_batching": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 8, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a1454e46779410eda936.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a1454e46779410eda936.json new file mode 100644 index 0000000000000000000000000000000000000000..f807836ae8723b931da8e10a17b65cfca582b920 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a1454e46779410eda936.json @@ -0,0 +1,80 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "ctx_batch_size": 4, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "is_continuous_batching": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 4, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/3bd58a8f29b6ca08f6a0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/3bd58a8f29b6ca08f6a0.json new file mode 100644 index 0000000000000000000000000000000000000000..36153f65a47279641b6fb1654507533ec9b9375d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/3bd58a8f29b6ca08f6a0.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "ctx_batch_size": 2, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "is_continuous_batching": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 2, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fc5948f61b835c4d6937b020aecc4a41cebaff1e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec824a349a9be66053dc28c493bf9e9137c78974212a6e5c05ecdc540083c91e +size 809203 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d99e64c9298752877ff260ef3c2db715e97c6114 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7987956410eaaa209e746c31cd9861bc6cfcbac9fb8b0107b4bb52d2ef2515f5 +size 18514944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c32d4d19d6735f146cb16d96f2e93358c3819029 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:839ccdee4866452ddb56084de8feae041b2afc80b0026cc0b5f079e35c43971e +size 174185 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3b03d4c7beb4a6ee34f500663cf559464d154fb0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:193826417c076529fed8c4be4bb3348e2b5407282ba499183eac96b4033101ed +size 2233344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a1def6fb982651697cb105705718f5748118028 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05c47bae0e908c0b46bcbf5d427777f5184d87f7b0b3911ead3e32f730dda4e9 +size 732777 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7a86c28b6e276b9511a9481cc23ae1f7dc0429de --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec2f4ea9e378d40c0cb9e897d365548c76723134d6d765dc0ff96b16957317e9 +size 3073024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..844935ebf67908a35a7c143b1f09c38c8e848387 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6e7608b51aad13aab7c9fbf121a2fd9cc275a8f821b0e2f4e18dad517b718da +size 3210726 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..311a1d4564f89ed0dbd81bf0a021b02623588ca9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b5eb538c86713678f7331dfef2d664995ca12b6f961df905762b4e1c8dc40b +size 837628 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6db64d70358adeb316df7c3ac6c8140fa7e31e05 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:779e99040a4d1ebc1839ab07114736075bcd27537d80bb820b16b6496aeb585d +size 18576384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..871f578bf35f79d5b3c85c03d7168d8021ff2833 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c4553fa5b171984dc9319965554cf951a5205d008d3fc52c52d623c34f8b2b +size 748501 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..279068c8dde0914bd002917326d16a92852f267d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87369186b26f9b78b867f8626208080532c4ad7119f0e241668ccb0499d2c7dc +size 3124224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..96bfdf1d870ebb5d6eb564038cc274593bf73826 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82dd294c24f308caaba609e59919c3794620372a8d33a0c4338225a45fa503d0 +size 3262041 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5645b78f020033f7ec70b37a70fba09692580366 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e2aa950891cbdc52179ffcbc1b0981a9d662927b7dc5d80b69616106ce9dd3 +size 825431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..66c5044311ee50db340aab679997266e68484ff3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b8da5fc53d757bac81937a69fcb5412a744f0482aa6ad28d0e9044a3d1c589 +size 18566144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1c153e42b3dc963cbf2af097fde76410995ce290 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f1844371da7c38b01be9977b0670dd49843be84c622f59851b21396727bedf +size 174020 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7531955467a9f1c4a57d0f8e27b4b1e5a9f83418 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee9b3694854ceef9f483bed5d3f89602b736ad6e640cc8327f7576df2055e42 +size 2223104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f86059372ba0754ba87c3cb902ed3ab452e74093 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2274b46e91edd1ff4132b909cbc27238ad4dded829a97cba5a96278a5df018e4 +size 46052 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..63db9945ab24483d370f5fb9f150b849e4523553 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05e9156cecf3a71b7230a44ab666a360b3302d6c5e1be3393f93c8ade4a4b4ca +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..46d544d1d7f924534f211aa1c9c9bd8a10416f6b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695e975d41dbd4b048c6145861b36e618773e863e071e15a339ee3aad7ff0111 +size 820388 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..06d7b93ea8148360a7a218dd0648b31e20a3aab9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b55424923aafef8ee4915f80c2722336963c913835ab88bd150fdaa7f839e5 +size 18525184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f0308164a88ee66056bf805495d861215921f351 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db430893cab774006da95e710d293521588dff3a3b6643508820c80821e88d6 +size 39597 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..54ab7288afefd00322848ffc4343a0fe2167b32f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d2d4bda6b56536ef39174ea56677db4754cbf927d8c24d14a472370e4e1b36b +size 205824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b1205ad1035d8d8c4c9ff38d59a7d64260932b10 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d3e45be6467e402a15d8f5171b27d79433aa177fa29926161c559a81e975a1 +size 9235 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6e28f18176d8d8e5c69da20d5e28afeb3f73caf1 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_48e792ceaf07ff2a3a72+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7f10392773371779059568753f66c717c867ac9c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6315daadaa4e97b39784f30d766e315b36e14277e78521e1ed45c3b17e36da2 +size 389134 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1aa2abdace26e415bbdc77322c1bc370d811cf8a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eac62e09e24823fd6bdff609675d1f9a9a2a4f9f6c5837ca2726dcab4704b1c +size 2243584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b141afd0e5e9cd06b4e8e7e7acae4b657f585e36 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13eb8d2c6913010254419fb6d197672181522fd7b4d53c2f9232f74a3dc52be4 +size 2313328 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cd6452133c853b10055da9e41b9af320058575d2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6c6ec87d3bc578be3649bed3dce656e50a0970aec88df6e7c8ec60e10a88bf +size 423536 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cbccdff5ecc460096ebe1d2c3a0a5568bc85da17 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea614555aace70b62119614eb49dfb0f2e0a2cb64026e11b00392a8bbce35bbe +size 25201664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5e985b1e7ed6f6ee41cf0aba0d8aeab2404249b7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7cad5aceea2a2557048e9121e8a326e57d8828587d2293581ba09859375a183 +size 174020 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..863d81404689cea8a8741ff8f3ddd541cda938b4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0fec39fbc43132a9198503e22fa0e8aa51e2030599a6d18814bf1328fdee13 +size 2223104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..75b1f5f4d49f560f0485f480a299cfa6618af1b1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a2d357929d445eb7691aefee4dcd3cbaf96dd5189078bcd538d27d4bdf60b2 +size 732777 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4864df7980d543040cd217044490474a5567f235 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14783393ccd55d695ce262894468fa888f03538b7f270630710db2420808d839 +size 3769344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..12b209a6aa897a7d746089af4535c1e64892d3aa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6818eb06b01d251a0b0db6699e311641a5c938113b061f1b1acde943b8bc10e3 +size 3907046 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8108dadb44b1132d5252739e10866310e5910017 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c63acb2108fca9514c9ec46a13d3f335a833b663ff9ed683e3c25cdfe29e6e1 +size 44175 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..09ca507f6df736bdfb3faeba98ffa6cdf12abf90 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b33ba194fffa7b461ef06e5b029c3cb567b893cbc5f14b93461a2cbb8b203ab +size 195584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d7b03b1ad998bb095343ac154d11a77be8943933 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d6fdcb13d0085fa866f17e88e1f307a19154662e2f22c8ddcf684abec360b7a +size 734801 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2fd8385b4a599e7324ef51681cf21d16f2642990 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6f16d3b3c1bbd801668f1bdd724a70494444ed04d861b22d386f9c190349a5 +size 3062784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..14dc4ae71665ec598ee96c1a813c4774eb03e973 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0162049ec88d92be303d3ec45611fac8b597d3f6496d13ea4f617a096b69f9f +size 3234534 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ea815e5bb252086cbfa56817be3c639efd06cd65 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cd0f918245975789e0bf299d701cdcf8202fa32c9baef960ec40003adf11c04 +size 820388 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..111ee84f55467af9c2fb06319404e86c085fc447 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8efa847ac2fa9b3be81d19a2cc7f7f4216fb0f40c7366d4ac8193612cfd030ce +size 36199424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2a648104a98a70c9b9adab22c56bcc7f51cf417a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe568558156d90ddde347168ab8da2e37325887b445d70039a7c63a83bc42875 +size 826443 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3eebfc9cdf6cb4417c64c693c8075450c6f4184b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6819809b4472283c1cf91b7d335e0c5c8dad485f86b01cf5827d973e5c73a804 +size 18566144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e0fb2e1bf95142fdab864442f6372ef0beb044cb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f974f087b1eaedb9a292f900bad4aeaac58f62a55d88d6cedfc7c50b3117b9 +size 136009 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e078a278d26ddcca930187548217fee2d4d79fb1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5db1c55e7f1d3f66a6b97815e32a323a6a8c04405aecb71a4b2bf6d69c7343 +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a0a0931b5a4210c8549d151ea59d434c98656d85 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a345275d24a5270b3e75a65be2d0b6653ed5869abc58a1dbdcba9967b3bf9c4 +size 136002 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c0f034c7575911250bfbfb0983ac3cc9c2bc446c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c505a63fe6a4ea475413341ebf2f243c0d573039d6dee5f403e10ea4d9492a4c +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fdacdd9aa563f2f08889e39f43969ffdf60a298f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc049a34567e017b40e9e223c6e0d7130aa2d90c0ed6ba2fe02237244e71cb52 +size 736933 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2757daccbfa3560b33766f3ca6d14eee731a40a3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894cc42790976138ab18536b5127c96a27381108797994916cd5b5ad4730a99a +size 3062784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b19205aeb44de2701d549f4bfcb03c153c884db2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a793189436f9612b1c32b502fa4d11a0737e19b7bb37f4b498ec37bf5b40353 +size 3234657 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..21b13cc1817911ba3907d293113ae7ad8e632950 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:761b12d7764c0dbb0896d0749aa1d7de246a933868e5340d2f1903f1c6522b58 +size 734801 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..642956bd5c9d93091071d92a342e9b8e8792b132 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55830c042c4f140fcdd50014502294a469b181b46a6012339f623f605055929f +size 2479104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c758c484870649da03ffe97419e4cfa63b0b4952 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7088fb350dc87568f4ac6918838b7704c1111a1cc1827d92ac2933206aa96a74 +size 2650854 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..564be7ef75becd9f6024ccb422898d5ea6e4ea9f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c966b070b9459fdf302b1f9e2e769b96b7210940c3f39676321f089906e116 +size 809203 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c3aecd3f937fe279be0aa6088da5be30cff23334 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:489dbba2e50b41a61d4bedc6475b660bd8cf339244237167b7c44e6857738aa2 +size 36291584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..76e0113c3b171f312992eaef5c2a1d19684beccf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b18a6bb191717837cedc0f337c91fc39054603990a27cc2bc5ae4fe6401a48 +size 734801 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fb551a0ce57a523b34fdfa56172538e7986cf7ce --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2721fad26236bfad6cbcf6ff501b1e7a60bda0a08f5f5af33402695a845207e9 +size 3800064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..65c5b7460d64d456676caae92fff4f40bcbf2edf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ebfa5664e44dac43e41ee3e668cd96af2df56fe9efb378b6d7903db09273100 +size 3971814 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7751c696f909c7cc687ef751fda4c8b7512c9a00 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b2e8b1329e662fe6607dc5722975128d385bfb4b150787e76a7312ae0e85a7 +size 174027 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2fbacf63b31527cdb1bd3c35ab48aa116ece20dc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cde78baa8826b81ec8ddcf9e995291b36e17fe5b2619cd49841dedfb1465e5d +size 2223104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eedbbbcdefbcd0677ded02f3d2d1fac56f953bcf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b32dd2430a5ed2e04dd9638e6f13825618c14f14669536ea719e3ddc5fcde7b +size 749521 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e0772d4cec27d0d3a049fd13594fdfcab34c7bad --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd2079c5c51459d43b43b97f79e7a17931c4fe04a6474413af5111bb2e57529 +size 3124224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7943a3a041557f3cf5e0d0c5307054365f031fcd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3350bac4c99e1aaf4cf2eeb14e5801bd9d876a91b2b0b999dd6c407fa67c615f +size 3262041 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf82083387a66c52dd5d222caaf3c068a6d39e33 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf668f9c76d161c50ccbd4af71a60ad0caa725567dc9893f6ccf83591af434b0 +size 136009 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4db026d88450e7421fa9c503f86b07febecbe200 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9bf005e0c754a0333a021ee1cc332a9781fa26593a9465137b30cb7d817ca5f +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c3f3e9c4559fe50fca3c41e3afd5eee309eb1467 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9584fe512f7053433bcadb314a45bdb6ef0c6697d83a0f1030b4f79c6294849 +size 69051 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..14e7fc52fb158ecdc49df53d993dabb12bfa49a8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a386815394945923195a1258eb00a6e7edcd9bc63710c55153999f937a8c27cf +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0403c0892977cd198171a7cac0604aa8580d6b01 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5f37a0bca501e4c1db406bf1dc969601f8378a92014c464264835395435b850 +size 823149 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c51b8992570730442bd3c1e7a42648151afcb7e3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a15e5b3b358d51ef33b00e54b9e85fad5993b63208231675bb18c9ee6967760b +size 17716224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..68b77545b6f52de9faf80226a926f06038689c86 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fef7d34fbf1871ac76b0f04ef0e15e6d801a1610a59f12c786577d3e3eb5c151 +size 734517 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fc52a043b0a072d12801f0b480af35b47dbbbafd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:819397be48d47415c8792c8a281d762043b08567556a824d1024e66b06910ce7 +size 2141184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..70dab844f2b15877a37b616caf471133f11863a2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d2229806010218cd3d9c8dda995e1e205ccae5bea759a4f294e3511a5cdd723 +size 2312934 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ec739df9970c1b3d34e592532010b4c37a8e5d65 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3634b32af9e37461d2c800c26617ab450a6abeb6b5cf9d806bc8bac728d2029a +size 820388 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8b4993f576f82d52230c8b7efd46f3d87f4755c3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8a382e52d42791f55e4cb6b430d82b0617157aa6846ef17656dcf715cb48cd +size 9114624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9ae471727f526836b3afcff28800575e090f2294 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:797e06ba0faf4be99943f908cea7c30695fcaff1d09ead147d76a10247a6b564 +size 751545 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..79e2c2d90d20355316acda3319818c65869f1eb1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d926e4c34b428d222f232f17a8e957cb488fe91e393e22d286b0f46262ac4d +size 3113984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..12b5510242a2b4340fc13429af5578a73ee2e589 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7de138128da4063bb31e41eedb8df3c07ae181f5e18fb4835cb683dc511e1e88 +size 3285849 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a7e85286e0acda19906d235a50603534544a2750 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d77b33c9285d22ec4c114d93656a5492cfaea2c61af9b8420775b118b3214eb +size 819660 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..053da8a92bb3168050368a14ddaf71f85ad2539c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c0c0445faaa2d4b8d6abe11b3c5aa6ef4c36a42edddab256cd96737b792714 +size 2427904