diff --git a/.gitattributes b/.gitattributes index 73583db76fa7936611d33347fc3532af7f3e8f63..50c80a06a541264df94f6d58bd00d5fb690cbc83 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3691,3 +3691,12 @@ neuronxcc-2.17.194.0+d312836f/MODULE_54c48cb9e0d48bfcbce7+165e9558/wrapped_neff. neuronxcc-2.17.194.0+d312836f/MODULE_8325137f39a46ae65602+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_8325137f39a46ae65602+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_fca22ddb2c6875b79fed+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/81fac095f3d6ebe884c6.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/81fac095f3d6ebe884c6.json new file mode 100644 index 0000000000000000000000000000000000000000..ee8ac2e5993245a71e71c4839d907a552732f59a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/81fac095f3d6ebe884c6.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c665cd695ad271120e6d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c665cd695ad271120e6d.json new file mode 100644 index 0000000000000000000000000000000000000000..c0b6762ca6b916f0158d66de4fa3f8748d931e38 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c665cd695ad271120e6d.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ec221ea8393e9e9fd62f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ec221ea8393e9e9fd62f.json new file mode 100644 index 0000000000000000000000000000000000000000..1217ef9a3dad6b83f40fe3f1a0cd75950cee2c35 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ec221ea8393e9e9fd62f.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/834a675df2e91e91bb4f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/834a675df2e91e91bb4f.json new file mode 100644 index 0000000000000000000000000000000000000000..699cf50a7129c38d5ad4789e074440c9517485f2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/834a675df2e91e91bb4f.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/836e81ae1c74a144e099.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/836e81ae1c74a144e099.json new file mode 100644 index 0000000000000000000000000000000000000000..73fa869cb02c66e0d2bfee9354dae11d445c210d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/836e81ae1c74a144e099.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/f070d170462fdefcc7cb.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/f070d170462fdefcc7cb.json new file mode 100644 index 0000000000000000000000000000000000000000..a738053bb07ab66d2698fe9aed3320930300c30f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/f070d170462fdefcc7cb.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/mixtral/dacorvo/Mixtral-tiny/69a038e29d395e086427.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/mixtral/dacorvo/Mixtral-tiny/69a038e29d395e086427.json new file mode 100644 index 0000000000000000000000000000000000000000..4d6b1c041faabd384ab45ea4abcb05f8a606f8f3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/mixtral/dacorvo/Mixtral-tiny/69a038e29d395e086427.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/mixtral/dacorvo/Mixtral-tiny/6fccf4132e08c10fb6ed.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/mixtral/dacorvo/Mixtral-tiny/6fccf4132e08c10fb6ed.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a91b13e4f6dec634497d7d2b5ea4c5c3745494 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/mixtral/dacorvo/Mixtral-tiny/6fccf4132e08c10fb6ed.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/mixtral/dacorvo/Mixtral-tiny/b5d0e0ab641066aac988.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/mixtral/dacorvo/Mixtral-tiny/b5d0e0ab641066aac988.json new file mode 100644 index 0000000000000000000000000000000000000000..df316204c10e11d8d4f980431a06c28506c7ac37 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/mixtral/dacorvo/Mixtral-tiny/b5d0e0ab641066aac988.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/phi3/yujiepan/phi-4-tiny-random/29065523ef8593d99a26.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/phi3/yujiepan/phi-4-tiny-random/29065523ef8593d99a26.json new file mode 100644 index 0000000000000000000000000000000000000000..b00257f8bddaaea9465d6d1cfb550f5c2efef29b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/phi3/yujiepan/phi-4-tiny-random/29065523ef8593d99a26.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/phi3/yujiepan/phi-4-tiny-random/32d20ad3c3761878bac1.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/phi3/yujiepan/phi-4-tiny-random/32d20ad3c3761878bac1.json new file mode 100644 index 0000000000000000000000000000000000000000..703efafa00a4165531fe1dee944262db9f76baf4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/phi3/yujiepan/phi-4-tiny-random/32d20ad3c3761878bac1.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/phi3/yujiepan/phi-4-tiny-random/491f4920beea5a951479.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/phi3/yujiepan/phi-4-tiny-random/491f4920beea5a951479.json new file mode 100644 index 0000000000000000000000000000000000000000..fdc6b3d4da91a665fdf315ca2314787aabf4820a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/phi3/yujiepan/phi-4-tiny-random/491f4920beea5a951479.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/4ef1f3351a52ca03d475.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/4ef1f3351a52ca03d475.json new file mode 100644 index 0000000000000000000000000000000000000000..781ef6ac28a9a4b0c7a4592da474b1bed24b4ed8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/4ef1f3351a52ca03d475.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/b14391e61b47ceee3885.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/b14391e61b47ceee3885.json new file mode 100644 index 0000000000000000000000000000000000000000..0b5e2c25fd663c4dbc46ac108139f7d029a45129 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/b14391e61b47ceee3885.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/e41fba961a8285bbb752.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/e41fba961a8285bbb752.json new file mode 100644 index 0000000000000000000000000000000000000000..d442d1bf7f1648ed576eac44cc1c7930ff5efc87 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/e41fba961a8285bbb752.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0589868b7472c4335f2c+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0589868b7472c4335f2c+a9d440f5/model.neff index 254d97e8ce6d9a429df4c32129e0160bfe7bd155..db562ef788fdc30915de171c24d0378ec1df4c95 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0589868b7472c4335f2c+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0589868b7472c4335f2c+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b67b02c17d5b839e9a712ad7f5382ae0b26a3f5493de6a8453ab4db4c8e0f94c +oid sha256:298d3b35d1f92a681eacddb4af1fdd02e6bdd529a039de76a631acdfbff8d68f size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0589868b7472c4335f2c+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0589868b7472c4335f2c+a9d440f5/wrapped_neff.hlo index a9bca880aecb81f1b1f865c55db5478797e37d67..9cfc1e78ae7ef3a4b24e28b37a51658009ac118d 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0589868b7472c4335f2c+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0589868b7472c4335f2c+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d5e0b9a22c7f437d85b83d3a9286597532f7725b112874c84476572d619d3955 +oid sha256:f4769f80ea99193a5d47a246f17dc35f46bd20a97fca988705cc73ac9759eff4 size 244319 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12b331fade64f568227d+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12b331fade64f568227d+431f5505/model.neff index cbd01d3a8919a3450bb4cd68477c1288634fd8c2..6d81a5074c35852448186920ece83e204bd78f87 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12b331fade64f568227d+431f5505/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12b331fade64f568227d+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1c13182ad393459c74b0+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1c13182ad393459c74b0+431f5505/model.neff index 00fdf259179ef3de49e6b997ac0a5c69f83acb47..9dab9058c3fe48cef0ce354944c3ff889623cbe3 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1c13182ad393459c74b0+431f5505/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1c13182ad393459c74b0+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1e6502638c15da1920c2+ca355898/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1e6502638c15da1920c2+ca355898/model.neff index 8c166ee82d3843d9c9274d4140f1e0847c742241..68f640bb34660fe224d56b3ad546e51027ef5fc5 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1e6502638c15da1920c2+ca355898/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1e6502638c15da1920c2+ca355898/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9fb38fc267f76592b0cc0e53b7575113377601ba3679375e3d8941bbcc00b2f +oid sha256:1d15344e3c6cf89fdff9db9a509d516e5724006ad608211dc7ef73d384471b3e size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1e6502638c15da1920c2+ca355898/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1e6502638c15da1920c2+ca355898/wrapped_neff.hlo index 108050fa3b5132803d82e0a7be02e6cfa8d397b6..4d17f0b0f8967b7746aab548f3eafbabbee37f39 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1e6502638c15da1920c2+ca355898/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1e6502638c15da1920c2+ca355898/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6432d0d654792167705df4312b66c54d0a792a710094d837584bc5e0a312021 +oid sha256:fecdabb646a7a4e95b4772d0d8e7fb8f0dbe3096483381b353733a751fdc4bb9 size 258114 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+a9d440f5/model.neff index 5350f4d9d85715e5849cee40a51dccbfb0429c3f..88fb4cd9488b8ae150c81e1931c97fb910e5c5fe 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:401307ae3203b370c828266d75d96845cb5ee7a8988eda91aa86ce3035185037 +oid sha256:acdcd77cff67340f006b364b59a633120708a4a0875116af0d53cc6f7db617b1 size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+a9d440f5/wrapped_neff.hlo index ba124c0a6e5a7dd745b067357da7466c1b4c6ab5..542b1212648a82b1725cc31bcd52570d9c591e50 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8bf4929dcc550a9521cfab337a0026ec344a203367c3323d5688c4e82e90a11c +oid sha256:deff04ad37ed2475c54f4332a554e7753ae59a10d5ba63dc7b244c26755def16 size 244319 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_23a2b8e835dd2021c6b3+ca355898/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_23a2b8e835dd2021c6b3+ca355898/model.neff index 2008c7edbd29aeb650e695cf6c7ffcd9c75f88cf..69001c9a04936cadcedcb3519d3e825865fdca6d 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_23a2b8e835dd2021c6b3+ca355898/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_23a2b8e835dd2021c6b3+ca355898/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b4558eb2841329755701439cc56f8f98132224b959955389ff78c2e65a7a2236 +oid sha256:0e08f1f01cedf071be921943ec1f121790e4e1b4ff554ac5a157c0750337903e size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_23a2b8e835dd2021c6b3+ca355898/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_23a2b8e835dd2021c6b3+ca355898/wrapped_neff.hlo index 4f2ac3a551c758eab2edb2c8bcb913ffd075e7dc..e7f20402f8b826a1360b6909b3385620429c9abf 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_23a2b8e835dd2021c6b3+ca355898/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_23a2b8e835dd2021c6b3+ca355898/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab5759b967302c6e1b1c73c853c73ac5dcbc4e9cb1ad8b9df837c6b81700ddb6 +oid sha256:aada4274950d1284626eafcc04cfe8254ea00fabe4dbb0bae9fe7aae1ff3b24d size 258114 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2a9a2536c0daf08da8a9+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2a9a2536c0daf08da8a9+ed72d204/model.hlo_module.pb index c06426bf7fedce358f3844db832b9281562979e5..8246040fc6817bbb107475d6c7d923db02050b8a 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2a9a2536c0daf08da8a9+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2a9a2536c0daf08da8a9+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9eca1f65c2409e5c7a7339ab1bcaa6b4fc78811995c18934532894fd3e6ecbe0 +oid sha256:07b496c63ad93ed914609263ea09a08cabdb784d4f41aa99d31c544d459d09eb size 83920 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2a9a2536c0daf08da8a9+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2a9a2536c0daf08da8a9+ed72d204/model.neff index 2949d9a7828007c5a01ceff35efa8a5cdf144bf8..6e46d3af4d303f1598ed3df0bb0c8f789ad8d80f 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2a9a2536c0daf08da8a9+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2a9a2536c0daf08da8a9+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2fba19c77e65c25692f52fd49b05880af3a3215113b393603abad25dfaf536c +oid sha256:d620a6353912cc334a15251c3771e49f7101ea25fc582e1ba11f5db4f4ada552 size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5e8d7727b0511a350878469d52667f508aa8d6e5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a19e4faeb0ab97b7be4e7f34fba5b4bf0937fc70ceb6cd406a20798082ec49 +size 7099 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f9085a5d2e9b32e5ce9d78f4f7ee0851fbf53a40 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2b02b9bd48b487eaff7f+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f2af00b78e63585aa79faf96bc6c252f88ac89ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddc29ec5d057780933c49deacb056003faba7a066c72bf7ad4005cf3cfcf5b99 +size 84647 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9f6058e13544c640fd08244de9bfb2553e6f126d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed2d129075eddfe8e029d989987096dd7db9021c607475024b045bae3e3c9de5 +size 195584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a45fcd8894146d3b4967fccbe2fcc0ba48d9131e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338e0e892339fea2aa330bd559c1c5cbf6fd53404317c11e4ab4703896b62c1a +size 203354 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2c3f8d60681c3a37a82f+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2c3f8d60681c3a37a82f+a9d440f5/model.neff index 57f58e98326baa3bd7c1e4427dd0b6f6e59414b5..4960e4f5d2f280e3c3c96f2c9682ebb832201a4d 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2c3f8d60681c3a37a82f+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2c3f8d60681c3a37a82f+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7835c0daa36730c8dbcb274ebc984c2359c81880f8dc46b26f52a3f5053c2353 +oid sha256:324ba0249c4f3c8daa857916263d84a13942a8eb39ba9c55b68c9e19622348e8 size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2c3f8d60681c3a37a82f+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2c3f8d60681c3a37a82f+a9d440f5/wrapped_neff.hlo index 9a2a852f1325762b43f907b31eba0ea407ac6f3a..2f64ca16a7c74be176eab8c10fcb1815a72e6ce5 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2c3f8d60681c3a37a82f+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2c3f8d60681c3a37a82f+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b1471ce3bec6d72c41e69a9a6f2bf1f1913342b80657ed89cadc5747e3831c84 +oid sha256:7a3635339eaf324f1684932551eea4948e0756192a52db41cf7777fbdc3022ac size 193114 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3422142ebef7a787c200+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3422142ebef7a787c200+a9d440f5/model.neff index 5e41f2a1bf2ebbe5a4aaf9ac54ea6cd180202cfa..dc6f531f44ee9cf8c367a6c042991b069a2c43b6 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3422142ebef7a787c200+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3422142ebef7a787c200+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d888ee8c853c08d07390153bb047d048af2442b3931da0f0f08684431bf045e +oid sha256:46868f9c178581ab44357e7b137684dd98ff2c1aa3e587a9163a59b45283eefd size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3422142ebef7a787c200+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3422142ebef7a787c200+a9d440f5/wrapped_neff.hlo index 02bee3ee1a54e8086b0a738a706648fa022273f8..11fcc93f070c69ff1bc807b59a4cb90ca848e067 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3422142ebef7a787c200+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3422142ebef7a787c200+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f060c0a30fba95285880a636c97af4a51e3592f2f855a27cee90bf86c4d3b030 +oid sha256:75d52a08fea26e2939142da8d2cea2717e3f93f3b4d37637df3e97fcdf260744 size 193114 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ff00987ee182c554d9ff86d8fc2bae220524ca62 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270799a68428bbba6689e3ec0f5bc39ba3102e97812e7e7f240050b4597f4944 +size 7011 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..575e981f7f60c14b7512f0b304b87f4aec13f1da Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_367840a31061aeda6f75+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a385f51ec93783ebfccc02bc4ef8865b40ef8be5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c1c18742fbb646d4a81924eef4eece4618c7ad8f1338a434846434eb796d5c5 +size 7106 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..17ece38a6ab9f02c130b160ea4d913640f8d90e8 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ed160209074742a9f74+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4298565bf741752d31de+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4298565bf741752d31de+a9d440f5/model.neff index e47d2020e9c4d661fbfc36b24bafd59cc36c2831..56f76c1cf02d0176f59f115ddd70efe67bf76a37 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4298565bf741752d31de+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4298565bf741752d31de+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47c060724055e4ba09c1bf8b7371714445e138adf8defcc86da6469ae8163d39 +oid sha256:9bbfcc60f177d83d8e82818d25fc28525c2b150c2097dc27263b0a23ea2979c6 size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4298565bf741752d31de+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4298565bf741752d31de+a9d440f5/wrapped_neff.hlo index e6b9e98026a2797479e1e591804cbb1742b853d8..ef11ed528c2d21cff44f113477b3b58aab8a0dd9 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4298565bf741752d31de+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4298565bf741752d31de+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b325437e1ba4b0058522204955031bced899ff95c17c9776bccf6f22a126af09 +oid sha256:aba64bfd493b560ca1a13cfcd0d97c4df4b18f10af4c0aa316e29d0f914a5d5e size 247153 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_445ff816d483a8457b1f+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_445ff816d483a8457b1f+a9d440f5/model.neff index 8a7afed0ae1cfe954480994106688789c5fbd69e..0cf18b1fe16beeb4d53f82ce44cdf5050a1d983d 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_445ff816d483a8457b1f+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_445ff816d483a8457b1f+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b21c91c8484bba6f5ef79c5c4db0e560daef1e964908a75f9581761a2d57b8a9 +oid sha256:874d8e09c6f6e3973eea15d6c7e1c51b909c71c4f9702a000aac510b4ae92e43 size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_445ff816d483a8457b1f+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_445ff816d483a8457b1f+a9d440f5/wrapped_neff.hlo index e011f36a378a4dec77225cb783a9bb51889ab959..5eb0e13d8bae553d3128e3058a58eeb4661a2acd 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_445ff816d483a8457b1f+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_445ff816d483a8457b1f+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2835fc17c9eba831425648f0e4b6d0d2d925ad2900218f29ec0e49b73eb77d4c +oid sha256:fbb6bfb6ee9cd91cac6f75c99fce2ae366a2b4c9f5b5037396ded429429426c9 size 247153 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_44b3dad90bfddb867dbb+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_44b3dad90bfddb867dbb+ed72d204/model.hlo_module.pb index 4eb2209ad8d31ec421f764c59c676dcb6880fefa..03548b82c0ab2db3fff16a81b87d2a6f6cb46a1d 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_44b3dad90bfddb867dbb+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_44b3dad90bfddb867dbb+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b3175e0119359ea0b2bb2b615f5a09d6822743674b887235b564464f8a4578c3 +oid sha256:1ce900c5b6f2ef81f75f395da87b22ea27b955c2ffcc9979035b2e6953b66f0c size 81323 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_44b3dad90bfddb867dbb+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_44b3dad90bfddb867dbb+ed72d204/model.neff index cb22c09b04ce6841074003a20834b094aee7ba4a..cd5560759282c09b9f23d6a7d638e25798106d07 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_44b3dad90bfddb867dbb+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_44b3dad90bfddb867dbb+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23ff45f78dfe130cf79956975ef433a207f471cfd30abbd235e82850f37ec06f +oid sha256:ca474ead7f7b0bc1b92cb0186f196f4b08d13965d75b129492a50925f1f2d3a3 size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_487ea60b2ecc168e7006+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_487ea60b2ecc168e7006+431f5505/model.neff index be740bca0b3446d7446afcecd63b988b86315357..4e3ac30bc70d3ca6ad1db6662aeed09be8a34858 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_487ea60b2ecc168e7006+431f5505/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_487ea60b2ecc168e7006+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_569380bce0f73e129472+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_569380bce0f73e129472+ed72d204/model.hlo_module.pb index 5548a48916d9425b1da36a5c4d5417a67d0a2caf..8adf7031949e81ac990ad3aac9dbd4802bf3bff4 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_569380bce0f73e129472+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_569380bce0f73e129472+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03b7c8bb73a84684e848a4401b656d46c0a39c5489b2d092bc0e77cbbdbd4945 +oid sha256:89337aed35960e0398ba1681cb4f79681185f5ed665e309d466fb739258e854a size 83920 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_569380bce0f73e129472+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_569380bce0f73e129472+ed72d204/model.neff index bc620a1a9471520af30c48674e09ab56adefced0..4d8f3479ba16eb0d556db22ea513cce3c9f12d45 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_569380bce0f73e129472+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_569380bce0f73e129472+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b0311ce7dfeafd52060fdff8847e1efd73583aeac078ded8001926826f160af +oid sha256:add447718a0e91bebe6c9fe36b99ad94d58fc390fd60081b5084acf0ebfa6f3c size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3fd0c8714c069297049c2f1265558ceac75586a3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d5e644c7fc5faeb4dd6e08572e6b953ffe9aaf266e02f6ca1ad01483807ef7 +size 82166 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c1f2661d413e923cdd1d906f5b269eb5312fe0c8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b55df3ee9866c55ea947a4b661cad05496df3242285ac3c32209ab3f4573eab +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7f879deab8d4006b68d0a91bf5c8cee56e266d16 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77171d923c95f81ecd155d404b4f07d35855bc8fb83e575a3b8883081299319a +size 254559 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63c2c34c7243bf28abca+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63c2c34c7243bf28abca+ed72d204/model.hlo_module.pb index d83153eb8459dfb5e53cce80d1c080439901763a..75bfe16768859f114df351149ff0b3187ab925d2 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63c2c34c7243bf28abca+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63c2c34c7243bf28abca+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:35bb527b959dc8645a02b0119a39f0f5c6a148be922d2261a4e4106937f361fe +oid sha256:497c866136fb799f26f306ec90faeeda1b3e41c8e46e710d1086e9d3d44f18a9 size 80469 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63c2c34c7243bf28abca+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63c2c34c7243bf28abca+ed72d204/model.neff index 429440a8a611ff67cbf708f0d2efea484d7e1b72..557d99fde60df5faf6eb1ae88412bc557f0fb26e 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63c2c34c7243bf28abca+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63c2c34c7243bf28abca+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef15b2e097ff580b6706978dd45a7d13eb102727e94bba630d1358ba9bab5397 +oid sha256:d9bd119ee9a0e036a1c56386b59a484c66903d54564c8741482cca0a94f97542 size 205824 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_65cc17d5daa2b60921f9+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_65cc17d5daa2b60921f9+431f5505/model.neff index 8afbdf953d8007264b537b5f7b087ca30d31fac8..83be213ca4ce16984764d91dc322fcedf2c429c2 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_65cc17d5daa2b60921f9+431f5505/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_65cc17d5daa2b60921f9+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76fd1a54684e4de0e33c5c6fe998a5d44c2c6d175964847950891d4abc39c2f5 +oid sha256:f3dfba26397142aef809f8b4c04dfead2ac8316b713ac204995fced22707d97c size 103424 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6a7765cdd7439b14659e+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6a7765cdd7439b14659e+431f5505/model.neff index 7e04b243f71359a4fc6d3f7b9234d96cddf57a2d..10326d1598e755b9c444fc7e200e301fa0dbcdb3 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6a7765cdd7439b14659e+431f5505/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6a7765cdd7439b14659e+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7361e4b296923179e6ad+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7361e4b296923179e6ad+431f5505/model.neff index 298e48adf0ff09c106d8db9955afb84be80a8d1d..4be7d899b1d8d8173a19830596be20bc1cec21b7 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7361e4b296923179e6ad+431f5505/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7361e4b296923179e6ad+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df9cfe9507d1e9536868c59fe942611a67eb5b0dc0f96286850201e568d5c043 +oid sha256:f1842777d30408e20f37794927d5f65c1763e9e05ae32bc2cf68c0dadd27993e size 103424 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76b0c22f7fc8c9e2ff54+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76b0c22f7fc8c9e2ff54+ed72d204/model.hlo_module.pb index bfac9baacc8cdcde95c3bec9b95c589cd1098421..e72d35fc4e69a33e8c6414b3432905cbc5d45e7e 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76b0c22f7fc8c9e2ff54+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76b0c22f7fc8c9e2ff54+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2dbb95034e1f21fa440a68f2a7cc4cd38547a04023591a5979d30bcd1df2edc9 +oid sha256:cee3109f1fbfdd51cbb1fdaeab195b1445646dceb88be54a041f6a9f9f96ff25 size 81323 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76b0c22f7fc8c9e2ff54+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76b0c22f7fc8c9e2ff54+ed72d204/model.neff index e2e69a6281d7c6f646c0dc220ae4b22f8c97c7d3..0aa0d1dc10c1928728e9b014af4bde91b084dbd8 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76b0c22f7fc8c9e2ff54+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76b0c22f7fc8c9e2ff54+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28797dc1c22a809dd208abf09354c73035cbe53ee67b8fe50dc0cb7a783397ad +oid sha256:d57dcd90cef63c61e270f0399be4d9ec4e48b86e924fb220247452e416e836ac size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e06e2fcd26e59afdf55+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e06e2fcd26e59afdf55+ed72d204/model.hlo_module.pb index 09c4c7f968fbc62dbb03121bb20e68249ee6f4c6..82b489c32873f00d0efff328b76997ed53d6eb93 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e06e2fcd26e59afdf55+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e06e2fcd26e59afdf55+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a29918a4399e6877e79cbf4b468d296db7df97b52d711acdc377ad211ff19288 +oid sha256:61295592ae28d8117bd19aa4bc70a5fc79b80a6734a196ac9415d96715b5dab8 size 80469 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e06e2fcd26e59afdf55+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e06e2fcd26e59afdf55+ed72d204/model.neff index d8652bcde0f91c64589a6ed037ff4a045c9b75c0..17abcbee84ddb6d9fad18323bbf91396257925f4 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e06e2fcd26e59afdf55+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e06e2fcd26e59afdf55+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4038d8cbf74d8bdde2bccf41f68a76a37afaee957c1fe586e399a9b54f61d769 +oid sha256:d5c0e8f392a7ebbcc68ee704af3782378f1111aa933c8f7621bb685847c4112c size 205824 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e8abd5578389b547eb1+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e8abd5578389b547eb1+431f5505/model.neff index d7c6b894a68f0cbdfa33f9631ffafb12bdffc671..508420a5a09ba5c71d5dafe7b00918fb92fb553f 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e8abd5578389b547eb1+431f5505/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e8abd5578389b547eb1+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3bd8cb730e0e09d9a4d4fb6babc098cbe8023263 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457dbd6282732a78ea32ae3f1dd8750fc9a3f1d373b4cf9b6dadd6a344e5cb89 +size 88270 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..86ed332082101652ce59c2a200ecb8fb4814501f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4624cda2e2783d6971cb4beae805e21e5ebb2a9bdbf2be6f4186cb108328f532 +size 277504 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a6e9c9e9fe59e3f8911d+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a6e9c9e9fe59e3f8911d+a9d440f5/model.neff index 330a47c0112dac994a062c5936987133d14a3cad..3b8cf66f8400de5a681bbbbd5c0f49daed98808b 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a6e9c9e9fe59e3f8911d+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a6e9c9e9fe59e3f8911d+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01b8b287afcf205604ed84eb06106e654306e4d0cbc97ddd6c223232550270bb +oid sha256:71b27452317f920f720aebd337b496affde238240b9835d912b023ac0a418276 size 216064 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a6e9c9e9fe59e3f8911d+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a6e9c9e9fe59e3f8911d+a9d440f5/wrapped_neff.hlo index f97f21bf273a7acd2a898ed316238465a252a3b1..72e3485383f041729d645713dfe7261c0661c096 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a6e9c9e9fe59e3f8911d+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a6e9c9e9fe59e3f8911d+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afd8d80164615db21657136abe59509ac27d835311e0e90bcc677ddd0a38432b +oid sha256:a5547eb46e421a4d941c47f4623b4bee25fcd4248a01a1e6b709457b964b1326 size 223770 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b092b55c6af9d765923b+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b092b55c6af9d765923b+a9d440f5/model.neff index f8af59c271c0bd105da10d1e361c1ca8985d06e6..b778828cb6ad4cadde8bf59112fc9c7c72670df5 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b092b55c6af9d765923b+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b092b55c6af9d765923b+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44357f16a7dd99d76dee8483d6707094002c36c492f2ce9c3cc3eb807d9c6dfc +oid sha256:08baa04ac2ec2bcfe37d17bca836289f71f07c92a38220c09580b366f569df41 size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b092b55c6af9d765923b+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b092b55c6af9d765923b+a9d440f5/wrapped_neff.hlo index 186ee52156ace5377ad9ee85d3098f6c22e68d51..9c23a6c4af4d7f645c369413675fc01f8e8f7ed3 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b092b55c6af9d765923b+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b092b55c6af9d765923b+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02f81e258f77b643879267d4871842fbbd3bb329999a788eba90107cb883006d +oid sha256:950226e7cca1d7365271ddd73b1095b0ff05c437165a8fd5e20deb8b165a63e5 size 195507 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bbd39f65dce3a32962495fd3940a4bb22bbe0813 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3efd264fa85cc5d8470175003fb874262c6b4b70a263349a61a2799de8165e7 +size 80360 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..99b3e58c9c090eb2917759f26ce12f0d306cce56 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965983486c0c70da0656edbaf785f28048536f0b87652feb42296e05745c8c84 +size 216064 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..12b825d138eaf5975633b727994b863acdc1eb9c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ced6b0953720323cad5bd107ba88d595adde8034789e47222f35025179bb8ac +size 223770 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be719fc3eedb7c9c2b8e+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be719fc3eedb7c9c2b8e+431f5505/model.neff index 0b9cd7c7f9aabe16fcc0e7e6943b2f4b0b2276af..f4a6fef7bd810b632024396139850a529a70e31d 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be719fc3eedb7c9c2b8e+431f5505/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_be719fc3eedb7c9c2b8e+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c0f9814a59c53c4073d6+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c0f9814a59c53c4073d6+ed72d204/model.hlo_module.pb index 4fc6023cc6db078be047bc888b78282f056f919f..7082181dde36b2061da7b16bac0f29aa8bfff740 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c0f9814a59c53c4073d6+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c0f9814a59c53c4073d6+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebe92eb5497f90051be9cadfb0d46b40993bb33aae28f61450e5acf136838714 +oid sha256:d84a277863d35907d662445efb1df12c4f6a5c3d14f4527ddd3a12e12e2a0c1a size 87785 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c0f9814a59c53c4073d6+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c0f9814a59c53c4073d6+ed72d204/model.neff index af1a8ebff41f63e9c9576b931dfef200dc825f2c..c0517df02caba50d79cdacb6b2fcba2e548570ca 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c0f9814a59c53c4073d6+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c0f9814a59c53c4073d6+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:807d4588e8cd6ad96175aabd13435e363d34494d95ea577a3d1a528a6950a9d6 +oid sha256:13df5caee314b6b48ab7b6b54cce95a9671e62bbc61ae60da8147c79df333119 size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c6dd950c4bf878ea25a4+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c6dd950c4bf878ea25a4+431f5505/model.neff index 5b68a01fa55404103be7ca3e7cb31f679ce18bbe..767e39df75cf987f95ced701327516d637a88c27 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c6dd950c4bf878ea25a4+431f5505/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c6dd950c4bf878ea25a4+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ca3a3e74bfb61306abb5+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ca3a3e74bfb61306abb5+431f5505/model.neff index f983018cfe5151ca39532a8e81de87d342c6d507..a285e44091f4f3bbc8d49931ab201fa4d026961a 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ca3a3e74bfb61306abb5+431f5505/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ca3a3e74bfb61306abb5+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:197d100c7c6024f083c0f4ac967a28a3df1910117a3e13b8bb12c62c984cb902 +oid sha256:ec637d13ae4a67a408500c92cc3626326d8479846f95ba2c07aa5eef08556c77 size 103424 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5cda18a54ea6b4609b8f59466ae0bcff0c006b94 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a356c9353b7020234d28466ec3cb5c39c393e6b10a912b8614b530eaba30ba +size 84819 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..00576ee01014e912b0e43e551a16291943807564 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f99d550862bd38fa567722e6cf9e7ad4672d289f508fcb3debf2b5a71408b224 +size 205824 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d691ea3e06f5d7d31edf+ca355898/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d691ea3e06f5d7d31edf+ca355898/model.neff index 257ddafd69f6112ba034c020b277d8dabc2decdf..9f274403448dff2f976921c99cd15d8945f82784 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d691ea3e06f5d7d31edf+ca355898/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d691ea3e06f5d7d31edf+ca355898/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b8c3787962c8ae038b5ebb3eb977f4e4e932afd7b263eddfee1ca0fa2e1d482 +oid sha256:00addaafb9ae272c100ae60700c2350525b8457ea84f9a3e5c0165aa00f2153a size 308224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d691ea3e06f5d7d31edf+ca355898/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d691ea3e06f5d7d31edf+ca355898/wrapped_neff.hlo index ff749f8e568914ed6ec2b10bf7a6d94ba968368a..dcf8d4c244627fc62922d99d4bdf24d368735a6e 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d691ea3e06f5d7d31edf+ca355898/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d691ea3e06f5d7d31edf+ca355898/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27bded8fc55a51d3c4e7306bbfcacc9383665a1458e9d971fe7ee453dbaea654 +oid sha256:23c1782778ad3b04403e97bd62bc1748aa77b08d86e5fc513555288b28d4c4bc size 319522 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e3a13b85dcb5dcea901d+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e3a13b85dcb5dcea901d+431f5505/model.neff index 2c6b3266b2e7acd94b09d3828dcd3a011518f25a..d036bf54791b2823581ae8969bb072d651a1e6b6 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e3a13b85dcb5dcea901d+431f5505/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e3a13b85dcb5dcea901d+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ea75f74c78af49a980d6+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ea75f74c78af49a980d6+a9d440f5/model.neff index bab19e68fc80010008d90e09a263262014d949d4..c706e0e2f788091940e1e6fb29fee10bcc581c03 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ea75f74c78af49a980d6+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ea75f74c78af49a980d6+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:967b8a97c2de0ec209bb34045a8c7e1cd0b8e32bc51f741f3a812e414520f61b +oid sha256:1a5e2613d1545896183e46759da4737705fa3052496c2c0aa4ddb37bbb2107cf size 216064 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ea75f74c78af49a980d6+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ea75f74c78af49a980d6+a9d440f5/wrapped_neff.hlo index 86ff072661c07fa2acfb0a7a757eda9bc36f63e2..e8572650f209247da9303c684b5871cbc820adad 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ea75f74c78af49a980d6+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ea75f74c78af49a980d6+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c36dd1718f1fab840b9a0f088edcb4e7cd07c80f3fe0be4f8abe41f02fba8c70 +oid sha256:97c21acc33f6e60cbda5020f093f53dc890646c0446c4b7c881d3b597d970e18 size 223770 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.hlo_module.pb index 3e67af97faed625915a174b2d6cfe532b1a06db7..5b857f4b4024797911985448ff7c2b6b2d07f5c6 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c5d3df36f2bba9a77067a695cad9c8cab7eb6d221e7924ce2e6aec5601887c58 +oid sha256:ac71ef9b4c634db790be9bc2da6cfe71b3b71bc0d838ef07283a2507070fe731 size 53803 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.neff index 8ca2562fd0922736b3a991f6f5c73ba82bb9230a..2e9734edd1f01b2621c3e9d5f2cfc445ab01a1d1 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f632a9386b3efed9d5286089a72a063e59f1d951fd21b9a2e0276e29f9be4e06 +oid sha256:1a93567bf07e86174a5ca27805ac10d9b3ab71ce7296a78cf64add8ad31daec6 size 164864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5f0041db35a0a18aabd683fffe15804fbdfef132 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78cd1a927975c0c11ec88ee08db6cf99296abcaead1dbd17ee5c1b4515661b8c +size 85673 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3d57ef5f3d55dc59958ff0bf486c3f7d23e0a563 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:000b8223e20b07db367a808dc8cf0136ad7f23400becb18c8ff276dec8f3306c +size 328704 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fbaa5b03f774dfaa214b+431f5505/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fbaa5b03f774dfaa214b+431f5505/model.neff index 3d1560bf1e8d557ae4b3c4567ba443379be1f620..1a2e1104a21396506c6792c4c114e2649a39ed55 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fbaa5b03f774dfaa214b+431f5505/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fbaa5b03f774dfaa214b+431f5505/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fc1c3fb8409b31c4cc3d+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fc1c3fb8409b31c4cc3d+ed72d204/model.hlo_module.pb index d71298b835f140fdb114dc66878f57078d8f2dbd..1f0f55a41a51a6eb581eb61a4aa6862881c6f70b 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fc1c3fb8409b31c4cc3d+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fc1c3fb8409b31c4cc3d+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ffb3452c2107fddfa65b7a94d75c1f37eb08f601c66dead00e01aa5c390b67d +oid sha256:72d355bd59f97b1c0a1f84eafdd160fcd75b76f140d64ab17d9160882192ce31 size 87785 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fc1c3fb8409b31c4cc3d+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fc1c3fb8409b31c4cc3d+ed72d204/model.neff index 92a112a036821a9adaa941b535e1cf3381add5de..304e6c86edf180bcacd9375abeaec5eba0b8d848 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fc1c3fb8409b31c4cc3d+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fc1c3fb8409b31c4cc3d+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:020673615b14e3f3406e47e7e085ec5b6835ff5056790616b235a99b8ec3e6ce +oid sha256:c583f05ab1185b65e73bb38a681cf39ff4378e229cdb17dbb5fe666a2b05f7a1 size 246784