diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4c5b9cb78dab4bbd871f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4c5b9cb78dab4bbd871f.json new file mode 100644 index 0000000000000000000000000000000000000000..03e8182b14d42027a6cb20e78eb486d90415f0d0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4c5b9cb78dab4bbd871f.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8456948f6fde59831390.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8456948f6fde59831390.json new file mode 100644 index 0000000000000000000000000000000000000000..2d3bc2f1d4b097906293209221d7d328806d319e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8456948f6fde59831390.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8c9f1fab702cddae6156.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8c9f1fab702cddae6156.json new file mode 100644 index 0000000000000000000000000000000000000000..5671376852ae7ab1d81079f0e67a7e523088c3af --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8c9f1fab702cddae6156.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/1fb5c6b213ce9eb31109.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/1fb5c6b213ce9eb31109.json new file mode 100644 index 0000000000000000000000000000000000000000..f08c857014db99033b634bf5a12a5ac3baf4e3b6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/1fb5c6b213ce9eb31109.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/be6cf655a8b36069d883.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/be6cf655a8b36069d883.json new file mode 100644 index 0000000000000000000000000000000000000000..83e3a0f50fc3f70a0b6095ccff6dd990e2f7e0b0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/be6cf655a8b36069d883.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/bf840674318c6cdc6330.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/bf840674318c6cdc6330.json new file mode 100644 index 0000000000000000000000000000000000000000..326cf5ad134af156d829b5c65c687672cdf1af68 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/bf840674318c6cdc6330.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev6", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/c59993fff4b07fbeb262.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/c59993fff4b07fbeb262.json new file mode 100644 index 0000000000000000000000000000000000000000..eb6ebaee4b772bf26f82ab308c740d858e339573 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/c59993fff4b07fbeb262.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev6", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/e4975f3e0d271e6155e4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/e4975f3e0d271e6155e4.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7a5ee17130f6152413618a0c539082ad34fa5e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/e4975f3e0d271e6155e4.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/fb64b85b484e3ebb9a1d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/fb64b85b484e3ebb9a1d.json new file mode 100644 index 0000000000000000000000000000000000000000..c4a4c06adc05bd2ec8fcf8e04da3183b3978f27f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/llamafactory/tiny-random-Llama-3/fb64b85b484e3ebb9a1d.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev6", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/14f6b812ab3b13c199f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/14f6b812ab3b13c199f1.json new file mode 100644 index 0000000000000000000000000000000000000000..1e243d6e4b1522abe7b896ae749999414956aef0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/14f6b812ab3b13c199f1.json @@ -0,0 +1,56 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 4, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/1c3abae211f3e919b7f0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/1c3abae211f3e919b7f0.json new file mode 100644 index 0000000000000000000000000000000000000000..2a286d1ed2be63338126195474c710e618fc0eae --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/1c3abae211f3e919b7f0.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev6", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/b21b3ac2511d590ac1c2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/b21b3ac2511d590ac1c2.json new file mode 100644 index 0000000000000000000000000000000000000000..f8f42166083c37278ebef8cef259947e0546ab81 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/b21b3ac2511d590ac1c2.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev6", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/baf0fb4d07ac05e74df7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/baf0fb4d07ac05e74df7.json new file mode 100644 index 0000000000000000000000000000000000000000..e1c3f795f96b5397a180d861126352b2b6b98b60 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/mixtral/dacorvo/Mixtral-tiny/baf0fb4d07ac05e74df7.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev6", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/076d544cf144121b2024.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/076d544cf144121b2024.json new file mode 100644 index 0000000000000000000000000000000000000000..190d0e13b46880283a8e19f326d66171f964f084 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/076d544cf144121b2024.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/9da282b561774776761c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/9da282b561774776761c.json new file mode 100644 index 0000000000000000000000000000000000000000..44ceee6adc82fe96adc8eef4f9ecf6a00f21c890 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/9da282b561774776761c.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/bcf8d0c54c45b277333b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/bcf8d0c54c45b277333b.json new file mode 100644 index 0000000000000000000000000000000000000000..c0c91231e686515da13b2773c70423e9a069cb0c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/phi3/yujiepan/phi-4-tiny-random/bcf8d0c54c45b277333b.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/0dd437c3697ceec3b894.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/0dd437c3697ceec3b894.json new file mode 100644 index 0000000000000000000000000000000000000000..665742cc0ed55ff05166bf11ab50ae0cb1a84475 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/0dd437c3697ceec3b894.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/44c8abc3ecedc8598f8a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/44c8abc3ecedc8598f8a.json new file mode 100644 index 0000000000000000000000000000000000000000..c774b08d8e0d22b0248f58305c915c8399b3302f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/44c8abc3ecedc8598f8a.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/d472f81e5c657f79c2f9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/d472f81e5c657f79c2f9.json new file mode 100644 index 0000000000000000000000000000000000000000..151ad7a7881c5708ea63b21f1b536f503bc719c0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/d472f81e5c657f79c2f9.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev6", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff index 3a233e2ad3cb140d9e21eca18c1c872dea4be789..8dfa5a4fd0dc709002af1cbfd45b811b770251df 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:104f51ab3675a23b89a21db5993a27e9a5eeb09867470734c4497617e146159f +oid sha256:9087c314f6fa55423fc6a68ce21fe41a23755e3187e5ca7b6262bb32852394b8 size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff index b9bb661af47fe0a6b9906e2eb1994f0e20820022..428dd97727788bb36425bb4957ed567ee415be2e 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f539c4d46bd7143bb99ab78bed121b4565d8bbb9b15cd5fb0d86fa42eabeac7 +oid sha256:718f1087fa929f9e8019a422802ca71044b98f0f2737a1946a86fad0e34be7c4 size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff index 2c32aebde28ce9c1888970a4ed8f719aa2a80084..20a49873e5347c072242d0da446a8920a2b7f1d4 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4017e0a785242f34f89ad69149e5db39b1a813f6c647b8b31659cc1f0868e99 +oid sha256:433777267e8f7fbdf53f2c5d8ea049016dc5ac3db4ce58c1013158cac1b7fec9 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff index d6ee4d3e056a02793ca5ef725bf9517dc9c20d16..7ff3e47a92317f2dde04f50eba7e5ea4fe55247e 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bfd560af6819825c7e6f05303464c481e10d74ad257fc87de57528001c694afe +oid sha256:38e3d7eba41dec4525d00434677bda5b3628a20542f8dac4da11a04905ec8603 size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff index 9bd5cc02345a38014c839255db8a26d874df54da..2d2829b6f52cbfc6b6fc6c09cc44dfde7f2b547d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d20351b22fb897e1f04a4638ab32aea1487833c6b89eca18339835ba2fe27093 +oid sha256:2998d347ffb4da8ad8ae85b0de34b3708c9b95ab1a3d3a6ef3724d39101214db size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff index 41d72af9a60fade143b3d441b219bb185e1e1ee2..b9d5f0b3c698d67c3ee06a891249a9af86c5136f 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c20e3490f8986d6c6745d61708f73050a5ed63c89abe7876449170ae3914ddc +oid sha256:b8658d7edca7655f8ccb268fe74d58fb5c0e28ce000767e740e040effb86a324 size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb index ce8ea8140ac57a6ca4658e102461acd68aa0aa91..d8008cc92afcfe446544df1114e5a9d0d8d22285 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc90fd439316fffc61e3d8c3621617633a5cb8d08dda6c0736121c32054c76bf +oid sha256:1ae03fcab3b8d64ae3d2021aa19538fd99e346fbcb83856c5605c4708c68a674 size 68277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff index 8026db06bddc76b16b82740a6767b0e1993b329d..fd2ce084bd43afef043f872978b9ef43f61294b9 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b88ebb6d4762cb58b080eb10990ffb1777c6b5dc0aec53275fbe9a973c9fe294 +oid sha256:59365c55ac85b4319e6652d308793be3f7770506d36d9a898c3c95994f19ad97 size 257024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo index e3efef4c9c613692ea0a96d77dbffdbfd4968bed..91a9c03d71a323eb32ec6894cadec8851caf5523 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0dce1d8a73f724c37e0b050e6aabdd2a403608d397273e8ab815bd28129ade55 +oid sha256:9bbb2da6842ffd61c068a7dad0614707927a24cdeb69847ace43b44d48701cd8 size 268322 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff index 0bd0d0e80cd6724945c6e046d00746659483c45e..12097c4cbebfe7c868f0a6956c3517c43c2b0dc4 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8a1c8c7baf1dd72136b79095ec0210f7f7ea3d2a0aec460b4b0d187d2c2d03d +oid sha256:1a436f46163b80d40e3bcdf3710bae3f53053542d3a20179699b69e649a66f94 size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff index fd3efb0aba19623e1ebec383e907a966a0e538f7..3f50373a1f5f495dba010d21ffc4b2098be6015d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55d6ee114007d520d9c57754d8e1ea3a59eec00c4e6fa2f8168ded13fe644d38 +oid sha256:ef98b073c4b799ee1cebf406f19161d2bff5d27ebf91a7780cbc3309bb443372 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff index 7b6e5c2bcdcd177b7a165d1caad8a1833680a0d7..aecbe58cb0cd156b747542fde4a0f7cadf02028f 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba2fd19607592f515c10a8c9aa2bc7fe0a7cfdb61286057096848075d43a85f1 +oid sha256:4bdd6ea59a8bd73ff49bdb300a19c9c89e924f848b46f58fc0350c770d9a1aeb size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff index c43b3b1a4dfa6f50cca5c403c6b963b776f936ec..d8fa98659768e4b4e308a35f916011728f715560 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b956eaad5b3ff2e9453f6a41327cb43195efebec1d36ee916427be098eee31f6 +oid sha256:7ef05748f5996defa4be3ceb641b0afd5d573c02a707427f39dc316f28b1da5b size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff index b093e52b1aab4e8476171ffd4adce9d148c41643..1b4f6882732fb821e1b8b79a0e09af0ab0cc03d5 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:287a3588ce8935f52349c24164e47aa539bd56ee6df2ab77d46e95b3ee22d9c2 +oid sha256:ed6c2a97e73faa481af38694827d8c5cc6ba040780fe793cd59333e5d2fa99f6 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff index ef05193042d614d403e3d9f4a3b408b149d259ea..fbf5af04ab124ae56bcaa5194a5e0b344a56237d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f0b07361864dbb9075ea5b55536a3b994ae2afd864ff5545d540a4abcf2cafc +oid sha256:2e1a2489ae30050340d2a2d74b2f99e125ce55e7af649559b69436f1bbc1254a size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff index 5d3f97e7ee959c5533c5c04a2fde8d59691a7213..c22266e5c65859c8ec6b11b67f7c44efde649eba 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b3149c0dbff0f466cd18a167e6e043d018273baf5052a70b152409896e9bdfb3 +oid sha256:5fcf15abaffa10d5ccb148094aea3c11796dd108eef543d2c169791b865bc239 size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff index 3562218d435083a9ac90a794318e6ae5dbbd49fd..c8ee25d44330278e22c7ec66e46b78e565e59f1b 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e888aa43d68dd17d8a2e6425673085c9bf287b06dfb07cbdd37af13015759b3 +oid sha256:e905bcf110b6673f11faf97fb29fce488607c5de39d5ec35dd07d086353d8a78 size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff index 92989db15585bbf9ab05e92cd51d5f15f1f835d1..8ed662da82a68a205fce21a76c1ee93d3a0583be 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca0de0953ce248253fdedd0742bb581867a5c190e69d0ae38290643a44dedb6c +oid sha256:4f93c2ed8e16a660030c2962766e8574b694a4d3d1a5d715db9f68b047830da2 size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff index c33bfa40c8cec10436696925d90c97d8288087fe..a8fe0b848d0fe56b1c923a12a286e3b6265e8778 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92e21aa5950e02ef4f5cb0bb15b2c72bb3ab52cb2b08df8ceea51caac12ed808 +oid sha256:2bf30589edfe8ba7f48c911fbf185782d5d19b48913922c257b3dd6ffde9fd18 size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff index ebd3d563a4c847798a0ce2c64af70ddf4d222ea2..df62b6934e4be7bb42bfad4b7966ca040fdb1398 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36efce145688562b0282e7ef51a43f3dce3f4f7374733bd83ead4d3f8db13931 +oid sha256:fba211abd63e7ed62b5d57d0a4d73db93c7a282e59a7f9fdabcb6efed59c099f size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff index 7420a2cc06572a08e26b78246252491071684536..ffa8c557974be5229b9de08959ffd32f14dc8640 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2eb60cb7e749593797f7592d265d0f9624eccf916ae74c891d6f86c71845ce55 +oid sha256:29e3d53dcf1b9f2fcb975c725d8fb256a6a3319118511e15ac3ba34e410a6956 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff index 27a8ec0f30259f04c4b69621fc0b789617b4541a..2e8d4bed954444451aa45cdb0b6a9c50ac7833ce 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5d5d72f55cede7d9f840e9d12ee6d31b8a1091ea81c53727fc6b1824a8f49d2 +oid sha256:969bf0657f95b712f172b4bd0e69d6ded550c83713c18ca22e8d2e9194c74ea4 size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff index 8058b70ad201cacb03df8e1c6cb5f4d1a2380300..cd3c4ceb3546414a8b076dadf0f673350ee4d2d6 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d8a29e71654f0529198c27232daa1cddfd5dea5c29d249af21df6a0e3c61331 +oid sha256:7a84ebe3c800ade3de31e6eaf3688059f5b5fdd90e09214dbc0cf2872305adf0 size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff index 55e0f7ec8b69d72082c64d47ff9d48b0fafc7d7e..e88ad9646c351b50b841d6f91dc6a47ea46ac47d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6e030f3d49ccdc1729e6f470b39b2ee25f4ac8236d75b80b3eae3c2d8d2c380 +oid sha256:edc55474ec7083ec768bafb01ae9c71c99047535acdcbefdabead066d414d5f1 size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb index 08ab27aba51a5348c03691831ab1f28c4edb4232..7a09b438b126fd2df975c65ae2630d61bbd0eb9f 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ae0ad2fdc6bac0e334b78d00b097e0bd9b9dd5ac6d4c56422df49dc386f92f4 +oid sha256:994deb4a93f90da2b4fee4d4af87c5b9406751d3ea328ecc995da75cce552a2e size 79431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff index 7617a9fdd04fe5e28ee0cfa6738a9bee6c6b460a..d0623b716190afd76418c469977bc2a4a8828aa9 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2dc1cd13f63d5609eae1c77ee53b11be787caeaf54b7f54db033d8d1f322af6 +oid sha256:1588a7ea46d43352059bb8cdc393a447d1d1784af84f14251ceb80076511e713 size 318464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo index fef48e41c2911add6bd160dd7ed7a2cfc35ef277..12a4cdb78ba61c6d0f7b992b6fd0dcf19e086dcf 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:309ca07a8bf522d53c4f961ca1f8dc782cbbf51c8d928ffee3c42456ed6de287 +oid sha256:bec3a1886e11e568e658c14152e45db460a7a44b8fdfe83ff5d3dd864cd81e01 size 329762 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff index 86bcf9168a43fdf0a4605cd2ad890d9ce0148ee7..c94c15f86f59d30a5b2873ccf476e8f3f6dcf402 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0f6d84e6afa880313f964ac2446d89f0fc1b78557f0afb0ea4197a9149c9f79 +oid sha256:caa9f885f4efc918b827042f1c34badd68f9c844d90033d4730a878b3a4a9a0b size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff index 0ae39f09a133dda0463138c1b5936de27e3aae25..b53ff45f33077ef2d31310e130c308bc0b6c5273 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:400b052a3001b05fc5c17bd5cf499fcf80afbd17eb21e1f1b1f9f92effba8931 +oid sha256:440323f4af46865bceb7408b37140a999b2e02a4745b6736e1adedb1cd5ff64e size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff index d988fc4f8a65db17e7ca5ae843b02d43536608fe..5d650ce3ce442a07e5b7f8ee2e082037d6edb49b 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba5302b01ab44fb0bb3c86f3b9c0438d5f9d4be8f496501ed29ebb6ffdfb7923 +oid sha256:890ec6a0f753c9f9c9166051fe387da323ae33be211d724b810fbbbe8bc30282 size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb index 69c2d1f74b61e6da32c9d2a866dd2b90e8958636..edb1b66318f6467ea5e8c1eb2fe7e3cf0229944e 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7534743372e1709951ddf865d8ac9e7fa5be14b954891c9ee8c5191fc3612fb +oid sha256:77ace701a29dd40a20d0b86dc7d3c572990f7f98aaf5c97d77e8dd383bb4b465 size 68279 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff index d83eb6b7ce0e0c3ebcc732680e2002fc2f6e93f0..6e3c8e1622f8fa48990d47b89eef757868e2f28c 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78e7110b0caf6ff75c769d70dc335952cc0de14bc70203a074e5f07db31895f8 +oid sha256:791bad2c27620653dabe8b3e87b7e6291bff3a954ee2d9117a7b751863d20842 size 257024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo index 8b1d7f39909e833cce68b6e03547f6c0e9ba0bef..6d698f967d0abc48e8414d3c9b635fe9137fc573 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:304f9357e737d61d31ede4544a89f80f334324c217a7d1b6d8a9a7b37c05e044 +oid sha256:8fcbbc1b7ca4697376bd78b5e60d47e784f1794718e33613e3b6396f113635b8 size 268322 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff index 0ed6ee6d9ce3be5b1170c30e70fcb0b1a5e9f5ef..ecffd8fa5de66593fd64599198438d72b420528b 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:672217854cabd03e55889698c5f0dd1fb487e26626a77a309f2ec626d3996ab9 +oid sha256:fdb7711e1641f629288c767c3a3bbf44cac5a3576603be9880337bbcfe6bd515 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff index 069decbe95dd02e639999ae6554463764de581bc..b948b4cef41208d2518f0c08ac0cb78dc59e8562 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8886cbfbd5c73a8a5b45deda0c0311dd1083a4df5c082193a76d331ae08b146c +oid sha256:085e116349ad0e436826ef9a6c80bfa0ff8e53df649681abf1f3668b9633f881 size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff index 9792227cb19ee6d7db0ae6c65ec21522f2feb8b1..e9a469ae8d1c49c7ce36b53900d26f226e839783 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:466022d05cdac3a79f0b7868c2d020ca24ab3bb18ab425364307241b4b537125 +oid sha256:2add056fdf3d98d6bb7fde68eb6c18eb51087edc96a8ac3d67981a9e835059f0 size 134144