diff --git a/.gitattributes b/.gitattributes index cb49cbe705466dbd03d7f2c1b1b2e0f2894b33c3..3dcbea75ad6d909e823d4f607207508d5425f733 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2861,3 +2861,30 @@ neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/2c37f46ac553bfeb1189.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/2c37f46ac553bfeb1189.json new file mode 100644 index 0000000000000000000000000000000000000000..5b1defd53ee7a86dab53c7546914ea7a2fe00757 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/2c37f46ac553bfeb1189.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 32, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/3a90f787174f8770d5c0.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/3a90f787174f8770d5c0.json new file mode 100644 index 0000000000000000000000000000000000000000..ac62c0f242bad25ddad6747683894d9cdd9724d2 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/3a90f787174f8770d5c0.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 64, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/628b7adfe3da945dff04.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/628b7adfe3da945dff04.json new file mode 100644 index 0000000000000000000000000000000000000000..10f3f79953757910d96d3f22f669d3ec4b3b5d32 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/628b7adfe3da945dff04.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/6b9e6133a149401bcb0d.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/6b9e6133a149401bcb0d.json new file mode 100644 index 0000000000000000000000000000000000000000..a8f2647d8d36d84ed82b77a4d0f5f17927c0559b --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/6b9e6133a149401bcb0d.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/a5138e6ddb37cf14ea12.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/a5138e6ddb37cf14ea12.json new file mode 100644 index 0000000000000000000000000000000000000000..d3b30428afb3f1761a22dfe4780cbcbf331ee349 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/a5138e6ddb37cf14ea12.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 16, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/ae98a5d7620b07f327f7.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/ae98a5d7620b07f327f7.json new file mode 100644 index 0000000000000000000000000000000000000000..191c3c3c267a2786d9cf49b0882fbbc0561b87ae --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/ae98a5d7620b07f327f7.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/fb692722df710c79b9f1.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/fb692722df710c79b9f1.json new file mode 100644 index 0000000000000000000000000000000000000000..f7f0f55c4c2406975ded1b0e680e093d242fa827 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.1-8B-Instruct/fb692722df710c79b9f1.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 48, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 48, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/073115831aa8018f1940.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/073115831aa8018f1940.json new file mode 100644 index 0000000000000000000000000000000000000000..abc92a6fe6ef2c8df029ee6d3ede6b11c28f0c73 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/073115831aa8018f1940.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/60dec262acd521bc887f.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/60dec262acd521bc887f.json new file mode 100644 index 0000000000000000000000000000000000000000..65ae863bd2f42b2ab3f590353890e5ba1a2d5d7f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/60dec262acd521bc887f.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/972f5820fdfd2b10a1d7.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/972f5820fdfd2b10a1d7.json new file mode 100644 index 0000000000000000000000000000000000000000..21f31976d1b1f2ef738ca08d9e30693d7fd5e7b8 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/972f5820fdfd2b10a1d7.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/55128b8695c5ede06342.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/55128b8695c5ede06342.json new file mode 100644 index 0000000000000000000000000000000000000000..05539485080e7104c61169e38be9090a69ee5a08 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/55128b8695c5ede06342.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/818a58649ec58e388db2.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/818a58649ec58e388db2.json new file mode 100644 index 0000000000000000000000000000000000000000..b5745291c6fa497ad7950969da7c571b83dba059 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/818a58649ec58e388db2.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/9cace97854de088d5a8f.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/9cace97854de088d5a8f.json new file mode 100644 index 0000000000000000000000000000000000000000..bb071b97659ab93234f36bed05708ce2f856b1c8 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/9cace97854de088d5a8f.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/e031b70baf45024103a8.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/e031b70baf45024103a8.json new file mode 100644 index 0000000000000000000000000000000000000000..8569b3a324262f6d53c47e3f8313793ec579f937 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/e031b70baf45024103a8.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/eaaa679ebdc087eab52d.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/eaaa679ebdc087eab52d.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d0ab00aeb91a32ca2c0e028cdda0cb01fbd19f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/eaaa679ebdc087eab52d.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/eff313f1e59828128b6d.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/eff313f1e59828128b6d.json new file mode 100644 index 0000000000000000000000000000000000000000..af82ae2f4f7d175a2280189a5f55ee4987644ef8 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/eff313f1e59828128b6d.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/3d60284285dc6fdccbee.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/3d60284285dc6fdccbee.json new file mode 100644 index 0000000000000000000000000000000000000000..369129edcc6ce7619e2e44d70dd96c24ff401a26 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/3d60284285dc6fdccbee.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/mixtral/dacorvo/Mixtral-tiny/1352c7478716c88036dd.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/mixtral/dacorvo/Mixtral-tiny/1352c7478716c88036dd.json new file mode 100644 index 0000000000000000000000000000000000000000..93c06c59f1300580ede67d458ca9f18acc13d875 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/mixtral/dacorvo/Mixtral-tiny/1352c7478716c88036dd.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/mixtral/dacorvo/Mixtral-tiny/1545c8e430f0b8a4b739.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/mixtral/dacorvo/Mixtral-tiny/1545c8e430f0b8a4b739.json new file mode 100644 index 0000000000000000000000000000000000000000..2121aa8ea593fef0da902924c61ca56fbe5995f6 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/mixtral/dacorvo/Mixtral-tiny/1545c8e430f0b8a4b739.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/mixtral/dacorvo/Mixtral-tiny/19ede6c01460d40fdb8d.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/mixtral/dacorvo/Mixtral-tiny/19ede6c01460d40fdb8d.json new file mode 100644 index 0000000000000000000000000000000000000000..2f96f22f679ac8b4c5d06dc41404f98c85792b2e --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/mixtral/dacorvo/Mixtral-tiny/19ede6c01460d40fdb8d.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/phi3/yujiepan/phi-4-tiny-random/475e2d11c91481c3c4bf.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/phi3/yujiepan/phi-4-tiny-random/475e2d11c91481c3c4bf.json new file mode 100644 index 0000000000000000000000000000000000000000..4083ae26e1546eef21ade8f94e22fe5b9fa433bb --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/phi3/yujiepan/phi-4-tiny-random/475e2d11c91481c3c4bf.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/phi3/yujiepan/phi-4-tiny-random/5452a55a1b1be42159b6.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/phi3/yujiepan/phi-4-tiny-random/5452a55a1b1be42159b6.json new file mode 100644 index 0000000000000000000000000000000000000000..92538febab302f0ca69b658868ef0f0b0876eaf3 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/phi3/yujiepan/phi-4-tiny-random/5452a55a1b1be42159b6.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/phi3/yujiepan/phi-4-tiny-random/9987de2a48c62c901bdb.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/phi3/yujiepan/phi-4-tiny-random/9987de2a48c62c901bdb.json new file mode 100644 index 0000000000000000000000000000000000000000..9eba7f3b0a1ab129a497b3b2c13520846f789d8b --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/phi3/yujiepan/phi-4-tiny-random/9987de2a48c62c901bdb.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/4b1f2f9be36861dde93c.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/4b1f2f9be36861dde93c.json new file mode 100644 index 0000000000000000000000000000000000000000..c0f114378a7dfd3f3e215c102d14c8c04cd5af02 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/4b1f2f9be36861dde93c.json @@ -0,0 +1,49 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 4, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/113f0353299230f0c855.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/113f0353299230f0c855.json new file mode 100644 index 0000000000000000000000000000000000000000..d3be7ed1c48187f804b7b55c6390827355b4ab3f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/113f0353299230f0c855.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/3d49dcf098e7bbbf34c8.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/3d49dcf098e7bbbf34c8.json new file mode 100644 index 0000000000000000000000000000000000000000..dff5b996073270e7da6c8913e6e4e807fab30171 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/3d49dcf098e7bbbf34c8.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/cca4a6253d520dfab0f9.json b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/cca4a6253d520dfab0f9.json new file mode 100644 index 0000000000000000000000000000000000000000..fbaea8a0897c2b5beef20e3c4a151a648d2c2a8e --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/0_REGISTRY/0.3.0.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/cca4a6253d520dfab0f9.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.18.121.0+9e31e41a", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b451c95367f24d6b93e2ba1049375d6868145299 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ddb930f663ff06f69265d17e8f34c19c883bd98453bcc9324885246fc1bd5b +size 772420 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..525d776848482ddfcd94c53be5b72ac8fdd34ce0 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a3d7ddc19167112af658cd7bef1b9220762fe4aeb3d0a6b469300675e9817e +size 2038784 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ee4b74d33fa4e100ba2fc0877d30da1b2db69bf0 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_148470eedea2402e16a0+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:742446bb208550a48973129b6d021c6e5c34d1193413c94811e4fdd7fdad8e47 +size 2177111 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1649fc77b87fff02e370+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1649fc77b87fff02e370+613edded/model.neff index bba9e3e00df675a7d75d73ec19890e8e25df023e..4975ab39a01305b81f0e5d87eaf2354f34bbfd8a 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1649fc77b87fff02e370+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1649fc77b87fff02e370+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7fa48df6f8910e175a5dc54e89d513acf69e91f5d3c8efbab3ab5c50fe2cc78 +oid sha256:3c2406019569308d46e4a6ac2e1f967e02d2e29931c70a282182498189b506df size 134144 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c4dccbe1d2bb154396c4f2e11e90a4ef6f5623b3 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5928e1e3d71165915a25d2d9778aedfc58cd57ef9ba8621279558be58e16fb7 +size 136713 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f32b85efe9b6fc514afa776d637b1d73a9219178 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_184d4ed11977011ce5e7+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ca9e8e0f58e9aa229a5b901af785b5520de02d5bc96365b9c644ee2b23f446 +size 2325504 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff index 0d76f1433a1f43e4dedab40132aa35e30fbf6d4a..b030373c8df0669b11c65a91cf22272803f35308 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5aebf8cad2c5fc686f572901e2980ef9e5b16a4c0e4c726affc50b801ee0bab0 +oid sha256:54eafd37216e3155bbb19f9b10bda0d98486c7540f01429c4e55d0228ec7d487 size 103424 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1b80b788e3a49498f963+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1b80b788e3a49498f963+613edded/model.neff index f0b76e4eecfc0e0b4efe9ba06503c0185af39f8c..52779edbd68040207ebeaa4ca221085da91e415c 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1b80b788e3a49498f963+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1b80b788e3a49498f963+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b46e990d2c2077a5e6417a281c06e5fd6b8b09962814f2d9f51bd8aa00d73c04 +oid sha256:2c955d1cb4e62cf39f55f735edf9848dd993afcf1f289cd9a3d5eb5abaeba1e0 size 144384 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0ed780393d888b307ac5dbd41432a796afb6edf1 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0f8b985f6b14bac41793cc85673f00958c916a0b3f80ecf868448f6ae209d4 +size 777965 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..533a9c603116284977d3b3d702fc4a1fe2bad8ff --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c94bebb3cec894ed1f8443050b0e92ea754772df839d1dae784aecb59357b6ea +size 5950464 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..da85a6b8548073d50d0b0c48d19da63d37adaaab --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1ccc9f3b7f71b27407dd+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38355802f4322ba013b18cb43b4fb6d949ee4840b269db20146a906ca520d40f +size 6088408 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1df250ef1cf7a7de560f+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1df250ef1cf7a7de560f+613edded/model.neff index 72c7e5c989c15a5ecfa4b5965fb558c085b4ed92..4c2f073ed2a5fc89ebec9accf949d38dde671e4c 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_1df250ef1cf7a7de560f+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_1df250ef1cf7a7de560f+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a4e59496301f4c59377ee2750feb072036256757b73f1b9ad28b7e15a9563f1 +oid sha256:24e88de2456b73775725e8be2e2d673a068ccaeebf5c4954d3be97d97539b72c size 134144 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..28ff114292ca168d8fa503855f3e6c6e8858171f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e23ddd2c1ceb0ca9b13f12cfe085496dda4a9903f08aa51dd4aaa763af0ab3e +size 7106 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..06b50a00d140944f9166913c17485155f0f821f9 Binary files /dev/null and b/neuronxcc-2.18.121.0+9e31e41a/MODULE_228e41176dd02608ed1e+431f5505/model.neff differ diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_22cf23062ec53b3fd95d+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_22cf23062ec53b3fd95d+613edded/model.neff index 78055b7e5731c01f79239c541ad9a4818ef65817..1a9edd07aace1fc9a85a1c21fe0761452485c6d9 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_22cf23062ec53b3fd95d+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_22cf23062ec53b3fd95d+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2d56e523eed6d67550a0af7f30be458fea1c5db8e0ddb1b3c111250a34dcc44 +oid sha256:23b76029645f1de881afff4922f1ec5904c07f3f4f51ce59fd9525211144c873 size 144384 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c06fef786acbfc7e3cfe8668627e304d2a870ec4 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:641f98f8f636f98fe345207728020e77c755d87f2cabbc04ce830641e76c7588 +size 927675 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..06e7fe8dc18425d577dfaab138d094714319c1ec --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_239754c47bda68946b1e+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ccfc034f3755e3bd390d3b8990b99cf1972e0508d2abe96cb44d00c709145d +size 32758784 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_284ddd1b388e504631b8+ee23c5ad/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_284ddd1b388e504631b8+ee23c5ad/model.neff index 13a62cf1a85238236fe7452c06e602d98fbf6073..f40e3ba6eb1423a242e11aab691e07a3e3134fec 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_284ddd1b388e504631b8+ee23c5ad/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_284ddd1b388e504631b8+ee23c5ad/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ece2274696d18942b5416a3cffac89aa9f4a9fd894fefc11c67b7e6f9c12e228 +oid sha256:8ecd79bfa7bcb4554599b9a929ca2f9dd4288f2dd55882e70b90cfe824ce5671 size 257024 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_284ddd1b388e504631b8+ee23c5ad/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_284ddd1b388e504631b8+ee23c5ad/wrapped_neff.hlo index 0eadb066396a1f9f81295239b2aa2122115685db..55d8795499d73b8b544a1c16027ded82ae08c2bc 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_284ddd1b388e504631b8+ee23c5ad/wrapped_neff.hlo +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_284ddd1b388e504631b8+ee23c5ad/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3739a202109032c98d48e5b375571916059d1ab503a3c40d82229a338f3dc616 +oid sha256:ab33d4561540008544d9d3f9722416e9e4628836df20e0f6727c92302a3ffe0a size 268322 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_2e229618015e416964c5+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_2e229618015e416964c5+613edded/model.neff index af7b08c3682654c101e1d834e7f33dc0655b27fb..aa5425fa0fddf2ef9971ef9e4937c2a92ad7c6af 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_2e229618015e416964c5+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_2e229618015e416964c5+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0ab2727db7f7ce07daec6a170178e76ec225b4814dd2305b032042cf3cfdf53 +oid sha256:c557550362fa295e5f4ae2ff854df56b983ef2fb47c12ab048990dd895183e77 size 1414144 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_2ef52130792b59d66c66+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_2ef52130792b59d66c66+613edded/model.neff index cba18396d0ca5ac2446cddf4414578f41a7777c7..640fbaa90e5ff4b9a9954957658c0511f68d1104 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_2ef52130792b59d66c66+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_2ef52130792b59d66c66+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2525d08a915fbc47e5b6dbe6b604c0195046658d47f2aa4645a79fad93ab662 +oid sha256:032778095a9af67c72696c928944d338fe0d7631f14707b692ad348377691c60 size 134144 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8e217a63c119c4bfbe1b8d1c9c0152a0557401ed --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82e763f2b8b77b0d3184ff6ac32e853798da1502483f9dafa984103d89cf9e2 +size 778349 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..78b92fc8521065d7703205419c30eb59f0d0d17f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36b527f42432cc6824380a53ae76e71d1d3a05903f5bd1ebb305310fa15877fb +size 9319424 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..41bd60fd90667f302b58ae9e1b4fdb984a38e2f8 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_34e0b30ca70ddb9a5f96+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27939a04a8022bd0f3080c0ae85932c391d2ada355d47363c8bf5fdfba9adaef +size 9457368 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff index b76b2b2557640c24d59eabbfba3a05fc7faafdc0..f0c459ac610a7c0816fb0d45c7beec44c50d9838 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:392572d2108d44d34a345a42cfd4ae3bc815ce1ed0414c19cf2fb604c6cecd8d +oid sha256:3b5953201258898d9bf0038ef63007b06ca5a622e2ce0ae14aff29150510327f size 144384 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_3da832fdaa3d62981800+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_3da832fdaa3d62981800+613edded/model.neff index 9a2034ab782082482ba6b47a0f23d69e94261dcd..dc84b998ea94f856e26728266cacfddb226a9ad8 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_3da832fdaa3d62981800+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_3da832fdaa3d62981800+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40a0272341b8fef595dd59a7e3e8d9a17984c25685134e71fec1d3ae86a05a8c +oid sha256:08faf8e44406b087550ae9c49d4ef877fcdba6c13140018f989f3ad939b34b89 size 154624 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff index 4833864fb7494b15476cc5f5e7486ee160c72a06..ad91641b210d41642e80acc05afeb688c10b0356 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e208a736ae88636f43afb0b1169a9f0aa0872bc8734a4a9c5f91560209b694af +oid sha256:3bbfb2ef58b084162e10490249888440a09e7265099f49b20e2c168d523f9721 size 144384 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_4b31f40da9553c6566ed+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_4b31f40da9553c6566ed+613edded/model.neff index 2bb1cfbd7c4b6eb7432d7de8dadb9887fb2b6778..e3459f1ded54784d491498d5c69ba84b23efb24d 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_4b31f40da9553c6566ed+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_4b31f40da9553c6566ed+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc2eb04bbcea5e3d01459aff14b75c289752943ab1088dff4caae28bfb064ee6 +oid sha256:fb46d95e8e8f0350189f22a63e16d0284c74e90a5df67716c840a866e92dfb10 size 18494464 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4c9017e7e23d13737f79dc5ddd07c423862a5433 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:149f133518214d64a0e2e50e57c0893d9da927d6b187b1c21ed9a629422cbaca +size 136016 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..467dda6d5cb5e59c925d06d3b24ce2b50ef21f61 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e01ba939b43c5db207df4acbfbd640d740c261880526df1201f6abdc7c7ee50c +size 2202624 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_51d9fed86504dfbff43c+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_51d9fed86504dfbff43c+613edded/model.neff index f5e1075c3551ecbaf3f707ce5b433d85f537cd9e..d71713b4556336461b066fbc1e184ec3221d53f8 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_51d9fed86504dfbff43c+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_51d9fed86504dfbff43c+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07e6da04edd4e509f1dde649b832d76b581b3977d1be052c8e45e841d133177a +oid sha256:781b8b27ce00232acc6ea14e1c1ac2cb926d69e0453a1ea21fd1517deae13b4e size 134144 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_54cb17f251d5b78afb71+6e42245d/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_54cb17f251d5b78afb71+6e42245d/model.hlo_module.pb index eb61ec191521068d15b189e0b9f829de742b6672..a02cc458df77357968ac384dc77dfed46795b548 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_54cb17f251d5b78afb71+6e42245d/model.hlo_module.pb +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_54cb17f251d5b78afb71+6e42245d/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5dbf7f853f10426caa8d94ddcb892e7b2e974740e8367814788d466202002f6f +oid sha256:7a78a1835f3bc34db45679ed6e9d165ac4d2498361b4f20b53184a2bdbc17ebf size 81317 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_54cb17f251d5b78afb71+6e42245d/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_54cb17f251d5b78afb71+6e42245d/model.neff index 51081ce0ec5585ea18d7042d10f846148e7ae438..f4995b8aabe515b0cb881d2ef0d03dbba9c4b04f 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_54cb17f251d5b78afb71+6e42245d/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_54cb17f251d5b78afb71+6e42245d/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f23a8991f60a2ec9803d378c76fe2ff2c009dc9f75303ce47d8f64918366869b +oid sha256:9a8afeb021d5617367faa20df382eefa0666ea762d5a1df90259af1f9bcc0d45 size 359424 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_5c17a6fec29c60d2f8a5+6e42245d/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5c17a6fec29c60d2f8a5+6e42245d/model.hlo_module.pb index 85cfa88ff6a14e6302e84401d617bcad3acdd8d2..54fba7fcf24fe55997333c666c6d7c206dbbc6f2 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_5c17a6fec29c60d2f8a5+6e42245d/model.hlo_module.pb +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5c17a6fec29c60d2f8a5+6e42245d/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f93a5b8f8ec5956972291664324e21da1390d15f9cc34b5aeb7fbbfbbcd64309 +oid sha256:70523dfa46ac4527a2c01fb517d2b78287d79d278550f911d15a783348d7fcc0 size 81319 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_5c17a6fec29c60d2f8a5+6e42245d/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5c17a6fec29c60d2f8a5+6e42245d/model.neff index 30dd9fb75223f8284160e8c6d13c9ebfbbcdf101..c96904d841834bd578acdc8c863f764f74062712 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_5c17a6fec29c60d2f8a5+6e42245d/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5c17a6fec29c60d2f8a5+6e42245d/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eac9ff12a02c2ab3e075d2ca1407acdea320487c31f55cd6109733d8a38b3226 +oid sha256:adc381b8c686ee5ace4368e999d3626e9cfaf241e13d803f58f271c601088c9f size 359424 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1ce502ef9ffcdb5227ad3f4e41104ec47f203737 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99187503594f8f78277828f0fb987ae325da320e53e3c91f46744beb2d8f061 +size 927675 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b2511d36591f6390b48d1e6726339e46ae73ebba --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_5e9d7f853e39ea297ceb+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c2cdb5e95edd7704bd6a7d31b99b4e1678682914b210d3315660758a67e63e +size 32758784 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..80b1f23c866403a4c3fae72eecbd936b432d95af --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3a5ad3f53de0cc649ed06f3a14c58846fa99ac99add5016a21ee68cc50ae7c +size 81132 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..11c48dd0a61a51e9e2923de5af4afd57fb3bda2f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe86137f31d2026aaad8659c47651b4d94c61f55e6c4ea514c6215f1a42ea9b +size 461824 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a961e8875ba7221b0e5284a89e5b033f0a6595b4 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6a4d8f5e434c05928a7a+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28cf9d2998b40746684329776521a0fa1a0f8fa8065fba9c76b4e8ad4f84cdac +size 469663 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..804bf76079dbbd4a3bb5b7db904384f7719db40a --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e5c59e89c64fa3053e94edb34c12cf7cefe1ddb6a8463cee55fcf718c04748c +size 91602 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9d66da677e6d64d8836431873ed37e6f68233be5 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_6c15ea8227f10a4d650e+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae74e23bed43f8f95a7b5947ad255294517462a333fa9cccd4cb24a1429df355 +size 213474304 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_73a8fcccb40e156a3330+6e42245d/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_73a8fcccb40e156a3330+6e42245d/model.hlo_module.pb index 275a8f0b919837fa346e3eff561418f62780dc79..9b980c2d035b61725160c2a9e1c2f7d2c59e8a0f 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_73a8fcccb40e156a3330+6e42245d/model.hlo_module.pb +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_73a8fcccb40e156a3330+6e42245d/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a79cd0c556f5398514ab48a5a5cac3053816798a95b4a6164805acdfbe6c31c5 +oid sha256:9b7a7a2062b09b10f7f1d0769bf49855067b4bca35e9aa82f6076828879ea0f7 size 82168 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_73a8fcccb40e156a3330+6e42245d/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_73a8fcccb40e156a3330+6e42245d/model.neff index 01bc618ce4dcea3e0a4bfee3fea7db782ebae7b0..e5d7b10cea178929e8c5c8e7607d48839c20469a 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_73a8fcccb40e156a3330+6e42245d/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_73a8fcccb40e156a3330+6e42245d/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3adb707961983a7381e2e8fb76aae63c324d63a647787cfa028c3ee73bc5099d +oid sha256:394271094cb2d742fd388aafd0e3ee83a274a9b0ff11a084f0f9c16316a6e542 size 420864 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f253219135b1e207b220d429b4457012470bcfe4 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d68b3efbd6fe626a030bd0825b981f0288f542d8dba791033425416042e4be62 +size 927675 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d33d78c09d273dcf0890e82bbb949f67af415a80 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77093e50dfa3d6de57c6+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d9cc9ca144bae5735bf82b2fc4c64e4fa9c4ca9eb7de2f0adc0b771ea3c20ca +size 32758784 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4afeaea3966deaf22c34024bfc2c220e835a6d21 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:255d9e41011dd3904d358d058ba2c8c1c046cde14e9e920cfbc76f36d25971b5 +size 927675 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a65887528785c426035e6e3d9f455a21a4524bb4 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_77287bdd671a507fc11c+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4e144dc1391cb9f733ef69f59f6f70bba6e824621842d599645467f5a6742c +size 32758784 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..759cfe2def357eb27074a1ecb83c3ec63b0bdabf --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a934a6096947d1c1e935081d14b829b7be18648da3107faa7de34ff58dac4082 +size 927675 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f5ff4d5dbff8dae41bd14551488aa05d56415571 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_7c803a9634386cca678d+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1abcd7b32679d676bcdfc6caaa618083f9d8262f2dbb49e6b30f1adbb44af8b +size 32758784 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff index db828633e94505172c8eb2bf2e18e33f980052b8..066870f323a51dca1d1c795e58bda461bfd8c1f0 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e9d73a236a57c72f143ecf9428d04909bf0b0ad65ba428dd63ace83dfeed4a3d +oid sha256:739d2836b92b0f86d795152ea45e7808bcbc860d59b9f9f3db51b9e28fbc7a46 size 154624 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_8c063f8f288a908bf850+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_8c063f8f288a908bf850+613edded/model.neff index dba3e158432a010f2e97d51cb5f1ba19aef05789..38640ce74afcb0833c5954434a68c5d3b6dc95ca 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_8c063f8f288a908bf850+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_8c063f8f288a908bf850+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81bba48bd636bd2408241a041ec6f42357f4dd14628c6d40c5355ce3420ecf4c +oid sha256:0748c8e084b158b202b301c0e3c3a8a1d2b891a564a87cb00fc9250b4e877e01 size 154624 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_913f4e1e2b4632438fe9+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_913f4e1e2b4632438fe9+613edded/model.neff index a7023c911837416f16acd1db56eeb0536a128fac..47bb4b2ef8fe4342163f5491cfc81374a38c3ec4 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_913f4e1e2b4632438fe9+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_913f4e1e2b4632438fe9+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd06d8936492e6527589de835f5c5ac002fd4b6e821cfd147da5e17153c7005f +oid sha256:52c1d9e5f2717169f958fa99ee5cea00712187694341ad1a29061f23e8b5787b size 154624 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..29b3ffeab34ebbd8a09371d0a25515109e8a6fa3 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfe9de7bb79cb736de46bcf79fc17471f401cf06973189f6c75f2059720e4309 +size 136016 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9b6aef33da306c613184f06c9a0f318e4cf6bb9c --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a05cbb6e9bfee46f6408+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d158eb96dd30d0d26c23448bc9a5fb632d38aaed5a65851695cde4b6eb4592 +size 2202624 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f5bbf6165bfbdfebd806841eeec632d535477acf --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17df22e1115cb921702a90e909d68377636b82633f317591bd1cd5f0a663991d +size 777949 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..49141122b147e0ff4d2f5fabec6f731c23466ff5 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f552cb0a7e52030a9eeb507c83409d0faddd9449d5b2a304e152ad025bd61bf +size 4639744 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..571799ed4c96e670fb28c6649345cdd75e7d677b --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_a25a25824b7b9291fc20+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1a18b699dbabcf53a1a97166f8673418dd9e1697dbeb47b6ed0d69a4dc73353 +size 4777688 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff index a4df12db85afe70fbcc5e351859851dde3cd97a0..d5865495b9fca379b56c2ba09aeb1cde905b3a88 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:740f8ca8cb8cfa06600c2dbbba0af9279386827c862b03ee536d9503a4acab87 +oid sha256:106b5d604040afe779363dc396979104e0f4630939494ea59dd10d27594db047 size 103424 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff index fa1ff027a71369b6bfea79f426bb31dd1b42e015..6624cefec74df134ebc74ccea2df42bae49dd2f5 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f4224dfc540e7c2b714057d6d4870a055e845383f23549a6645ddd4f560da3d +oid sha256:bb6eb29b8892cfa37372bd38f3f0b1c0bed0e2440ef5be044a994fcf52cd30da size 134144 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff index b91c60e2580aa717dd642b19dc8f02d2a89fd0b5..3847a339db8b97b3e0cfd99dcc89f34d43c88244 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1783a48210ba180ea548119f4a725f1412cfd123b1f75b7a72415bcaaac1943 +oid sha256:cd5202d96bc9b62bc8619a0c47c805e25006b8e9747487762fb569778296d073 size 103424 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_cf41a32ef696654dc19b+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_cf41a32ef696654dc19b+613edded/model.neff index c87489aaa8fe06e0ee8ad02995d2617af185aa8e..391a8b70972746889a37988b9f453449b101bc46 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_cf41a32ef696654dc19b+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_cf41a32ef696654dc19b+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e546a3523c7c7f5b12250513e41cbdf153a9da9295460866c772101eacd303b +oid sha256:435f7f735afa22c63f3a1d4f3948bad9ae7f3a721c48fe1222290ad2ad745b2a size 154624 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_d06255807e916c398b05+ee23c5ad/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_d06255807e916c398b05+ee23c5ad/model.neff index 28e60720006eccfd9fd266002a02dc767c4bfdb6..c667ff8e5f1f5e3096994f03b4bbf9945030b3c5 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_d06255807e916c398b05+ee23c5ad/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_d06255807e916c398b05+ee23c5ad/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:462aa67ec8b375262f65fe479da2aded8975b9666365cdaca5b892208bc6f9c8 +oid sha256:2ddb5b2f664df99b34d78f03862e1d12ce902674446b1e83e3e8889c43d1bf78 size 318464 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_d06255807e916c398b05+ee23c5ad/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_d06255807e916c398b05+ee23c5ad/wrapped_neff.hlo index e0f5532f8ceb559e10c70308090424c4c1072f77..10c7583e09ec28920c178fefb1477ab9f36e41de 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_d06255807e916c398b05+ee23c5ad/wrapped_neff.hlo +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_d06255807e916c398b05+ee23c5ad/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69816d931b595b45827becc55c10cca305bba57038ddaf1046c0e7d56141426c +oid sha256:e99771bb94f3915bfe79d0f1de1176722a1da81bb8d2a190d05c41793d53eaac size 329762 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_de8368a717cfd6dfec57+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_de8368a717cfd6dfec57+613edded/model.neff index 95f95af1ce2348583acff5aee64b320c02d0b8b2..9fb5529a8c9be0f154bfa402f1547a590c61d319 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_de8368a717cfd6dfec57+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_de8368a717cfd6dfec57+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f404b4ef2c3ce5544ae42442aa4acc22296264cc7bceaffed1ae04e6de7c7a9d +oid sha256:e49ece40766ca87e306279e678839317c0fd9f7504d557a43b558ba3b9115221 size 144384 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e0765cf6df2204e3664e+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e0765cf6df2204e3664e+613edded/model.neff index a4cb5d87b9e1070d561366a9afedb1b42980705e..d009c45e3d7565f7bb8706a1f3cb1e8e49670f61 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e0765cf6df2204e3664e+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e0765cf6df2204e3664e+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd0d4cbc3cbf8901ebb24962d4d45cd3eb328224ddf49b0e1fdff3c3e7931049 +oid sha256:125e81ea7b0ad94097c145088014c0362a663741b5baa17affe52b82011d27fb size 134144 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2ab803bab289b388b6b62a9eead708a10bd40043 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba99b4922857a98f4f353f7e2691458fe8efc37e61eacfe48138e02310192dd1 +size 778349 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ad1bdf47c880bca1fd28c270df8e0e64df36c08b --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7921995c35ab0bb7d46645dbaed48c8dd34ba291983244eb9655dfd64f13cab4 +size 8346624 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..79b3da601e1caa76e75d58382e11ef1f22257a32 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e1f75e72dd2bf7fff133+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80890a0cc3ca0b01951b7688e27ac8ed4f89f348f66a5f3b250a08c48eb38c17 +size 8484568 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e36f587c697c4d8df3f6+ee23c5ad/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e36f587c697c4d8df3f6+ee23c5ad/model.neff index 760344a49e16e2c76fa3c06fc697eb0eac503d96..392f4599ac8a4c7bcfa10569fa6cd71ac70e1e23 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e36f587c697c4d8df3f6+ee23c5ad/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e36f587c697c4d8df3f6+ee23c5ad/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a75bff7639ea1a062399de077c3dd08ada7c9f6097f7d8ee49fba7e965c485f +oid sha256:f2d787acc44dfef535349a05944734726b166cd90a61980a6086e50715b27017 size 257024 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e36f587c697c4d8df3f6+ee23c5ad/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e36f587c697c4d8df3f6+ee23c5ad/wrapped_neff.hlo index 7eb30413b2bc8adf004c61c1569f9bfeb681cec8..cc6c7b5fd5bb18f80f52d60f9a1d23a4b6806c0e 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e36f587c697c4d8df3f6+ee23c5ad/wrapped_neff.hlo +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e36f587c697c4d8df3f6+ee23c5ad/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd1022f6bd3db24ccaf56269b8bd430d04271ed3e1ebe6848bdde05a00f2879b +oid sha256:21b39155c9250d88fd5f95f789251805309df2c48ef76e697dcefce9546fa88f size 268322 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9c02bea443e832e41478d82919cac792258550c2 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a82bf4a019e58ba45ef148711fbb5750985c9cc3c57bceef471ac4d50b7566 +size 778349 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..200b3db9c383e823262d76467a5c658db80fbaa7 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cedede0a4887406de6417675293bc69a383bfac71e5968517f60fa7925803b4 +size 7066624 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..67f2ecb37b0a863b761ca4e0fb788f07f41dbc6d --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e6e5f04b0c6795149ba7+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c9dc15ccf3ed7217d455dc97a482629177bfba1b9e5274978660b96d646761c +size 7204568 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e9bcfc17d832317203bd+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e9bcfc17d832317203bd+613edded/model.neff index ac2e4f0552897217b05f45015a05e8cd65be3d42..3c7a9cc202c0fc5c481f2bafa8b2fced7e3f5cd9 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_e9bcfc17d832317203bd+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_e9bcfc17d832317203bd+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b79325e7cd0cb8eb540cdb7091ee8c5ff291e3472acb2cbfe8d783417720780 +oid sha256:6fa967670b55e90a4f0f9017bc0708537a00a61458202d0dd000ec45843bd1b5 size 144384 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..32a1d56997b95be866609cec486e6980e9abed7f --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f8693ab72f8e85396dae390834b839aa812f4184f6845233e950ae5a2935fe +size 858263 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0378f263981498b52b51d6e15dc3079159e53cf3 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ec432bd70b0f3bb72798+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbdcb610ebdfb2905ed085181f5cf4cf2d655be656e03dffbcf6dc2f737e5667 +size 32420864 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..60ddcc80e16080ded570954fe3fc17240221f2ab --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad5e2b143c6562e5deea75bb243dac443b6aed8c --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2493d57a19ee7359dd7a52cdbe6793887678270a1026114c161fa23aa6e4e90b +size 927675 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a88afbd0978d5c0f33c56f984664afa76cf17ba7 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_f269665cd140e0a337fe+84f3e719/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7539b34dbb4e86112ee23ff23958961a2f86d77501d679062d7ea65fdd0b36fe +size 32758784 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_fce469267b2ad1b5d80e+613edded/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_fce469267b2ad1b5d80e+613edded/model.neff index 8aa01ae0d91943c6b64565ebeb1c0a90aca1f6f1..b3102a76af7863703f249b718f2b399b92b50946 100644 --- a/neuronxcc-2.18.121.0+9e31e41a/MODULE_fce469267b2ad1b5d80e+613edded/model.neff +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_fce469267b2ad1b5d80e+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fee83b4f727cf74feadab161ad24825d30cbfc6ec9d1dc1ae679e040b57b8878 +oid sha256:627e99971985c6f3983d78ef200b3f713401dcf11a585f8a80c5da6d8ad3b74b size 154624 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/compile_flags.json b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e24f877fd3bd437b366557076d7c7d635759a2ca --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ", "-O2", "--internal-num-neuroncores-per-sengine=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/model.done b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/model.hlo_module.pb b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..725395aada31039d62a6a32bc6bfd6f077397cfc --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:044a4f3ced301bf796880b25f86452670f8eab730f222fad302292dd8d13b576 +size 777949 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/model.neff b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..27edf8b4df6960cfa22ae6fdcabb47773f38bbe1 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f344ff8928aa97ca06c88efda00e0eff8b2548db5a5e525cc5e50d72f6e619 +size 5243904 diff --git a/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/wrapped_neff.hlo b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..af7147c57750b9938d099828a37369fd249c11b0 --- /dev/null +++ b/neuronxcc-2.18.121.0+9e31e41a/MODULE_ff48ee0ab9cd762b3c1f+5be477de/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e84387d99a461581e51e132091e898ed6d882ce2e4cf22e00f8b6c2678da23 +size 5381848