diff --git a/.gitattributes b/.gitattributes index b91582bd3427f2cb27a8820a7ebc5f7dc8435b69..87083a4e2f105b7c0bdd7c471fef8518af260132 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2723,3 +2723,9 @@ neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/wrapped_neff. neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/46592604b62f7ce89082.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/46592604b62f7ce89082.json new file mode 100644 index 0000000000000000000000000000000000000000..eae69da7ad4256a65ed10d9aa0cf6705c2cb1b00 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/46592604b62f7ce89082.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6517782804be0be5e2a4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6517782804be0be5e2a4.json new file mode 100644 index 0000000000000000000000000000000000000000..2ba8d4d3e1492c85162a91cf6b7ebcff0c5e9bd2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6517782804be0be5e2a4.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/bb0a40e5cbe4d1c25285.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/bb0a40e5cbe4d1c25285.json new file mode 100644 index 0000000000000000000000000000000000000000..36bcccb268f07104571d19cc3200d4535b06bf76 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/bb0a40e5cbe4d1c25285.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/bfc3e6553a3d02bc5c75.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/bfc3e6553a3d02bc5c75.json new file mode 100644 index 0000000000000000000000000000000000000000..ab0080bb057c590e885279cc1a02810dee516109 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/bfc3e6553a3d02bc5c75.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/ce0df42e903c9a49fa72.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/ce0df42e903c9a49fa72.json new file mode 100644 index 0000000000000000000000000000000000000000..e6668f0b9ff19c3195df1f34c5e84579a9a44f49 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/ce0df42e903c9a49fa72.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 1024, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/ab418e732245677e7cd5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/ab418e732245677e7cd5.json new file mode 100644 index 0000000000000000000000000000000000000000..5d49fd922a23b75aaa3fb8a5471d5baa9db722b8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/ab418e732245677e7cd5.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/291ed174890a84141720.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/291ed174890a84141720.json new file mode 100644 index 0000000000000000000000000000000000000000..55266c252a4449a1d7d5427c6e4d949f507caab0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/291ed174890a84141720.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/41afd2fb7f6326db0c2c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/41afd2fb7f6326db0c2c.json new file mode 100644 index 0000000000000000000000000000000000000000..8be472e58e9f7e7e1b98bf9f520925eec2455df2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/41afd2fb7f6326db0c2c.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/650d381736581bd669c9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/650d381736581bd669c9.json new file mode 100644 index 0000000000000000000000000000000000000000..74d7c6c2ac38c2ea9868b970b2de7c5c314fcd88 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/650d381736581bd669c9.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/93956a41cd1203f773ff.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/93956a41cd1203f773ff.json new file mode 100644 index 0000000000000000000000000000000000000000..81a0158c06fbd297c66a85d87645058c69c0035b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/93956a41cd1203f773ff.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/b6837f9627ec2b9693be.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/b6837f9627ec2b9693be.json new file mode 100644 index 0000000000000000000000000000000000000000..845543a70ddef9411724c14cd900011a71fb1d5c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/b6837f9627ec2b9693be.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/dcd70056fa539895bb43.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/dcd70056fa539895bb43.json new file mode 100644 index 0000000000000000000000000000000000000000..f948edf0b481bf62102ffed432c74180077f7184 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/dcd70056fa539895bb43.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/3ffe53335773a02926b2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/3ffe53335773a02926b2.json new file mode 100644 index 0000000000000000000000000000000000000000..72cb5de1ae78e32428fe8339aad8f7be520de1f5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/3ffe53335773a02926b2.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/af69549e13162a34952b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/af69549e13162a34952b.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6ac36b691efecd4591e14a09deb5a073a8cd71 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/af69549e13162a34952b.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/be422824e6f4d14a5909.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/be422824e6f4d14a5909.json new file mode 100644 index 0000000000000000000000000000000000000000..5e30b9b4c586401ff72ddf79ba17f3df6d9afd50 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/be422824e6f4d14a5909.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff index ab8b3bb5fecd7b701c085531f86747dfea938891..2cbfb7707feb2a889c59e59d8217d63491448567 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:483cc3307d02e47a81b425a2b348507ea125562830bf11ef11b8e9176614a5b6 +oid sha256:2443e5ed87cd2321559cfc61edbc0d0b9aed79ea53c6020815a6e66e449c9823 size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff index 257b787a284febbe73ee31c44525455ac79429b2..a0ed747dc514f7b1c4f29a67bc3686e49303c411 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:721b582c9e3d677d1e7356cb8bf8bccbb7412056eafb188cf5b46e5f5a57f4d8 +oid sha256:64a0de5187c4d21a70fc592e7463fa702440eaf3733fefda7365e1e676adcd34 size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff index d0c2687fce3e90140bd283cf72fa79dcf678dca8..898526438801e0d8f85eefbd931aa0a0f5f29575 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f912e0ad529c1dc24ee1b4547ac0715c82afb234da288138b96fce745a38e092 +oid sha256:ec26c8671cbe08f14ab6cd99fba65b639956bf7fc6958535f68e2795b92937a5 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff index 5d8d64cc698c7d22b5c05ffd7754390de1b10a1a..9c25f42ef679cb30bb37d6a1f94abcaf2e1ac520 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d22a1e7b00f466d826c4068fcc9586a8a744618dd537fb27a5d48dba12d3584 +oid sha256:6ddbfd696a0ef85d93c9d3198ab829bdf6ccf964781bb6a275a60414915f2790 size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff index d7b396e07b846097a249a0e9053d42d4fb045b51..48b8f0d8c772ddf720b2225a61e47a3c92f1ba8f 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b27c224e28dc73ee6a1c8ccb9b7667a5733fea6cc3edde7aeb6e9bd4d1aef336 +oid sha256:2a3ca6387784cd649abf3757bb3844b1eb9736e252e40b1a4968c1ece1b1fb16 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb index ddce6194137a30c37f9fadf79df7473c870a9e05..11c60813aa45c4e0520e1943514d4184d27ade75 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b26956657e4985d91f8a65c2e50a278598a7d81fe9ad261ec755ae4fafb48005 +oid sha256:88bf2f66151be606218b2925a0449dd7916f07d36581a3b3b5bbd31c71807ecb size 68277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff index 255542790187182715c78331f325592d44caa0a9..d8ca5eea78f8b4aaa740c3f7567c4997b08267c9 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f983ce860d6d5fae63828bc4b0765911a526a11c61dfc6cc1a818d60567591f5 +oid sha256:1c182ea56f8adaf519edd44ac24b026ae1b85469a1f83eacc00cf20faabcaf4c size 257024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo index ac0581dfa659b7ab1e38d07ad5d4b178a85ff610..348562510616b4913bb3f081df2af57390e59670 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f84d825086c4ff10dec4e4f41cbdde297d95f64d4cf72aeca48b9463bab0c383 +oid sha256:a62d214cf48465f4a0b90420bc898a8fd7f5f9a8dcb6370edcdcf87b44b719f0 size 268322 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff index 130dfc5e3a5d9f313f7282c9f4920e8b662d6d79..5d07d9582ddb7d560c54abb423fdead4a855f306 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77dc8a6a2e4730499bffe4a7c4ddfcbc83cb23360dc07cf8d982887bf849d0b3 +oid sha256:a4aa452b0cdbab618fc46a764cba28672fece03e81d085eccbda181a1b57b1ab size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff index 9023f96d8ca00e33158a310580b3780248b8b181..29afaef2bbe1e51037cdeac1f7a0bbf9ee20703d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d4f25ebae7cb3e88441333237eda262b68a21cf2e36db1c4b765e21098368ae +oid sha256:7d7ca0b813faff5c0762966a281a46cde181daebae6b2e6da86be772e748dac6 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff index 91f003b17ae311dd1a62d2c0dc36f286c29fc951..2d13b467a2f9bcc1fa99b9ed354de63b24f0fe49 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62acf3dbb3edda20b1ca398b9815e08bc88f7b8031e892c2c8b6e0eadedb4727 +oid sha256:cb8166c37217252f0baf51dbf17ac221fad45563e85dd6ba5929fc8a55b3dd8e size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff index d2de2a35ba50a66660afa04cedd4d6bb2dbd2d0f..ff557dd2109a43d9c8b368c63d9b37bccd558ef7 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ed41f1b981e9f0dafae215296a5fc24f70084bace6a19ae6012b7ab362c0f61 +oid sha256:a3e8943204962ee0818af6e9711dcb0ead3e19d18121061ed3bf339fda9db608 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff index f43c0dd6ff069a864aa7c93e990d770c5ba6a8eb..e93b8ea0906e4d2b40c1786fe0dc7fdb3d2d7dfb 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bac2106f714d76ad9204fbd35121868918c15d9050bdaf3c2afb5b50db8537fa +oid sha256:e769f891bcdf52ba48bdf4d9cdc171f7ed5b3c7fa8e80d8b5142f02b0d9a39fb size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e2f06e19836e6ceef1d8fc718dcffd5f6962937b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a92994ede93768a306aba61f9ff65cc12fd693d6855af9e79912cacd24c53e8 +size 81347 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9ca0f655f8891bf3b1097ab1281e2013050b5c7a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21c560d53bd91367776a6093faa716f98f4a46654b4e26f21d3f03aaf58b6f5 +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff index 864a86c7fb9ca81786cd73565fc769d619f797cf..80db98f924e7b72a3f42399f56cd6f495cf77213 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5cfd34e58c445f08569b573f3ecadc8feeefa4952fb932d1a75ff750af6ec781 +oid sha256:89bae87420edb640dcdaf0058d4d072acc4dee3f5a7a0bc3e4bcb1e842cea2b1 size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff index fc517bb90dd654654b28b83f61909d245a60634a..3b7d7fc591092d2b48a9925ce93aae50403e1b18 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d6f95a7d6833dbe3393089435d052f75e29eb6ceaae03ed45f7c8484db314cc +oid sha256:c7da23595b827adb9d8b7a49fa9d5045523fe83ed5b0ea04059e93bac81a6388 size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff index fcb74d5152befcc69bc39fecd7e4f8edb36561cb..1b9f837d171a2bbdf38c46a49c7907b796574d9d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a46dbe504a3da3305bc17e8a918fbd6b3dfed8e231fca1897661561e2b99ed67 +oid sha256:612bab8ae99ee0d23a9d65a92642ea53faedc02eac5be589301f340ecc12947e size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0dce35de5c057defef417457f90df48d8e778ac4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36d8908f9c41121c1355085a90259da1519666fd64cab6bc119930a495d28013 +size 80893 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7061c41442bf44df4d26426b0f36ce90d606be7a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfab6c5ec7ab6dafc649abcd5f9dff3c79bf3a2e3141853a22ed2759aac7f12a +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8eebfed3dd8f7811ccd94180025637a604c252c3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb963bbc7241580114f5297fd62fc3e8cb19377eb953deb673fb5999c37e8c3 +size 254591 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff index 51935e700073c85d0c761c2de17861173fd0aa9c..62606222d91c3e1f4a361c2b769e36230765d8f3 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a6690dd12f2574b84fa1bb885dfc0d5c8e404367a3c0543467273995955cf74 +oid sha256:d47e234954a33ef3ab081870796384f1aeff31d539c4ffc08d234f774eab49a2 size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff index 0bd18c7e3d67480cab8615fdc0fd8f2cbd0b0588..6c5ecb1c4f542b7c09ef36201ce0b413e3c2089e 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f4e4e69e199614930a4c72c1eb5005674e416a5565f4af5acdd6c718c5db75f2 +oid sha256:a67d32a3ec490fd861d862971d7785f2b09b4d194a979085ca624e5d5d4c2ee1 size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff index 9364d95f09cb1c9cdd59c2c4575ba3936ae879f9..f24ec35c85d4ccbdfd8305bcafef3177c3f31476 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64e0ad916cf7fee2927046f42d1a2894837ce6223a638bc1e9d6f49a0680dcad +oid sha256:42ab95be0675292eecd4b501ac040ebdc56436de5b667c1c8635b309be7968a4 size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff index 50d1b4bc014276e6f05b86bbdad61b202af3ef3e..c80fb0e7908815a3097d909ec9bb9769a2f1974f 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:16984937693e7ef0c5425dc19be823c273d12c6dae836a2b89e48f36192ffd76 +oid sha256:030a86fef9f262d80476617fdba0b793965fed279d62c7e0b6a6766befde4626 size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb index 5a304d2baa315320e6dff0238090ee73eb3ce472..27f0ebd69a19693e4112f84c10b24a863c69293f 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41490d63779b527575e2162aff5fa91b3ed4cba95b2fd0e4204eaafa113eb759 +oid sha256:f894bf1ee79204a45884e8a3af4127e518849e995cbf7e8d136700c1198843fa size 79431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff index f5b3008c3a2a397bc1fd6297c584aac63027bb7f..db3f1df14c555ea73afef99f21ed016b63befcee 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a44578f4e0aa4f000ca0cc620b75ac32619a659c12e1ae27846ce414dc2bb0f +oid sha256:8d94fd1f20a72bcd3092ed00f14c78dd8622cc06a2d6343090e9de5d76513b74 size 318464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo index 9bf88838af0f00452e248581b32ca8a96c4229ef..2eec8cdb9a46e8ed97f1d9e8eb8c0a6f34ab4cf5 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06e51810e6dbf9633463d0bf3b533a3c6e6365923c6e96b2a836515d831506c5 +oid sha256:e7219d56c85106e3db6dc64691e64d0c47eef47e8d69b18d0d780f1e4efd4434 size 329762 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff index 5ea0f6aadaddbe8790b37ba04943f7e427e09b59..5d97a0670e46a6a998ca1f8019c881027502d308 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0653b6870382c6bbddba478c11df27a418861fae2fb51d7235e2a8d5592685a2 +oid sha256:7534f49d9e6d81d2ad3c7f3c0535230f910ac6ad0135fff8519cfc39ab1e29bf size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff index 9f00946d1d980c0f4bce8553395576667b860713..5cf0c64ec2866ad773cd7a9c51831906e6010d4e 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:705ad67c1257846117399f0857438a3ac6018074efc2b824f94e427c4ff726ed +oid sha256:977848c5fb76c6b48cf2a2c3d911d47366b76f1a23010177a494a420057b968d size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb index 534cbadddda59c7e8e1982180a4682ffddfb0391..75f0de8d03f6239ddbe6e17aa47e4e56f52fbed5 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49de28374bcaf21ff8b9e4d9599230238fa0c4a4b50a7feec9fa0c1578c20b99 +oid sha256:0e969500151ea3164de6555c44cc134ca744cdf5dc03e1ded8fcd7a096e40130 size 68279 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff index 0f5706fea4e65f6165067ced3dbb6466dac39df5..bdd5e3f8e06f24bcf1c13cd1c47592e96fc82b40 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18dd085594bcb680c720435186ec3c4c67bb2fb2851a29124e7da562fcb46c65 +oid sha256:65e05c75311a31c070885f7868daca8a266e3ffd4f1e9a86172b84cd31aeb9b5 size 257024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo index f625ecf51f6d15d15e1f32b605b1ae9f4e276ef8..d679c066583f37660b33e2f87e9775bde5aab73a 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9656a0ebe3e115e02a1d1efef024015f941e71bcc270262b6ad45d73dad831d +oid sha256:8abd89baa6b7133c794c01d4337a62b818823dda6d240f331369394e62eda164 size 268322 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff index ccadeb9e0c951e6175b1c218ff5c01f06ec2c159..0c09ab1a8da36c904ed15ed9714fd2e3f967d72b 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bfc3ad6a990d0a7d2ffecb4cafe9e197e3f35b7b06ddf12cf8352f747636435 +oid sha256:bc5d8306237c3b7cdbfc9b553a0bc614372b7ecc5caeaf606a7ea9a38061dd0f size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..373234af314f1667446490db9cbb1091b7d15328 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8804e4b1c31ce0a1faf1c20a2accde86fe1531d3d6b6b3b7955ef1ee66b4982b +size 80893 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e86da427162569f8b07383986bcbb7249f5498c1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ffa5f4adb99f3617cc34b352c717cfdc4f72247c0f9b13b31fc4c2d0f42cdc +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8bbc67eb48b68b2a265130ec0d277f5b56326e47 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cd13dbecd2127b395f61cf0ba83e6e34aff02c6228bf029515635b706cf976c +size 244351 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff index 033d61f252b452267675d2161bda25c8172a42fe..9ca2b767cb4dead3fa8ea93745da00ed3603450d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a845cb3f033e179e1dd42cd98072643e269afc1921cc1217db1ca9c3a9efee05 +oid sha256:d33e9cf514f5e1aaae87bf65497ff896c9e6f5c3a618ca6d39e66362e5762167 size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07063f7d526068909f4270de2de5d9ad099179d3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23182cdd2cb7cbce5b56f6fa076bec977ea03ff720ab4631f540baee2823a468 +size 81347 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b06a2191e2fdc9c43b76e91c1e5de2a0001d94a9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd77d420f86830cfb8ddcd832b8cf18b204370423d9edebced3953397f794e2 +size 226304