diff --git a/.gitattributes b/.gitattributes index 075f103ac6c7c2c4bcb9216d454a2443e516cdbb..ad839b1c275ec567bd1c243f7626fd25dc550e3a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3506,3 +3506,40 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_dd7dd651a0d4c0632d8d+a9d440f5/model.neff f neuronxcc-2.19.8089.0+8ab9f450/MODULE_dd7dd651a0d4c0632d8d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_fc1c3fb8409b31c4cc3d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/723693f5c05199783077.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/723693f5c05199783077.json new file mode 100644 index 0000000000000000000000000000000000000000..a5bae9e27dec8e6feae80c246c2688e38b607381 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/723693f5c05199783077.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/9108f11b9eefc7406c57.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/9108f11b9eefc7406c57.json new file mode 100644 index 0000000000000000000000000000000000000000..e28edd5d281961b3b067d3b064577686c13e16d9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/9108f11b9eefc7406c57.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/d8ccc76fcfd09ebcd668.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/d8ccc76fcfd09ebcd668.json new file mode 100644 index 0000000000000000000000000000000000000000..d7f75753a1b9e0c199c4dfbf88393c16195136e6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/d8ccc76fcfd09ebcd668.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/llamafactory/tiny-random-Llama-3/312b9767fbf3d819980d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/llamafactory/tiny-random-Llama-3/312b9767fbf3d819980d.json new file mode 100644 index 0000000000000000000000000000000000000000..901b8e8f49705ca136c44ace7ef734af8b8f69b7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/llamafactory/tiny-random-Llama-3/312b9767fbf3d819980d.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/llamafactory/tiny-random-Llama-3/a05fa5101e7a0ff89fc7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/llamafactory/tiny-random-Llama-3/a05fa5101e7a0ff89fc7.json new file mode 100644 index 0000000000000000000000000000000000000000..b97ee2518be7a834fabad94c107f01524d509faa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/llamafactory/tiny-random-Llama-3/a05fa5101e7a0ff89fc7.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/llamafactory/tiny-random-Llama-3/a5ce4d26d7e6ff8decb6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/llamafactory/tiny-random-Llama-3/a5ce4d26d7e6ff8decb6.json new file mode 100644 index 0000000000000000000000000000000000000000..af6734df15af75f08d1a40ef44af7ff5312c9436 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/llamafactory/tiny-random-Llama-3/a5ce4d26d7e6ff8decb6.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/4fc5b9ebd8bab29bc967.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/4fc5b9ebd8bab29bc967.json new file mode 100644 index 0000000000000000000000000000000000000000..e247922c5275635379b640d9fab1a7575984e06b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/4fc5b9ebd8bab29bc967.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/mixtral/dacorvo/Mixtral-tiny/650ffc3dd5501963b4ca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/mixtral/dacorvo/Mixtral-tiny/650ffc3dd5501963b4ca.json new file mode 100644 index 0000000000000000000000000000000000000000..c8804c12f91585027c2e19d3cfd602aad4b68370 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/mixtral/dacorvo/Mixtral-tiny/650ffc3dd5501963b4ca.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/mixtral/dacorvo/Mixtral-tiny/82639be6ed89731bb88c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/mixtral/dacorvo/Mixtral-tiny/82639be6ed89731bb88c.json new file mode 100644 index 0000000000000000000000000000000000000000..8388f3343bd8d823d714e87354e192a6ee73ebef --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/mixtral/dacorvo/Mixtral-tiny/82639be6ed89731bb88c.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/mixtral/dacorvo/Mixtral-tiny/d14042280c968ed299c5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/mixtral/dacorvo/Mixtral-tiny/d14042280c968ed299c5.json new file mode 100644 index 0000000000000000000000000000000000000000..0f87b3babac94df962ea56681fed2733e672b4ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/mixtral/dacorvo/Mixtral-tiny/d14042280c968ed299c5.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/yujiepan/phi-4-tiny-random/ce32d327b7621cce66d7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/yujiepan/phi-4-tiny-random/ce32d327b7621cce66d7.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba4d5f227e4ba9b454003c2277014634cd77345 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/yujiepan/phi-4-tiny-random/ce32d327b7621cce66d7.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/yujiepan/phi-4-tiny-random/f0c41d2c75ebac5012e8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/yujiepan/phi-4-tiny-random/f0c41d2c75ebac5012e8.json new file mode 100644 index 0000000000000000000000000000000000000000..c669dbe0fec7859fb6381eeb70f5b2e52ee2f19d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/yujiepan/phi-4-tiny-random/f0c41d2c75ebac5012e8.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/yujiepan/phi-4-tiny-random/f58e61e9cc51c46b805c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/yujiepan/phi-4-tiny-random/f58e61e9cc51c46b805c.json new file mode 100644 index 0000000000000000000000000000000000000000..2aebd5ed1ace6cb33cde999c7cdf7d5c090910b0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/phi3/yujiepan/phi-4-tiny-random/f58e61e9cc51c46b805c.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/3487a376552effdac425.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/3487a376552effdac425.json new file mode 100644 index 0000000000000000000000000000000000000000..35f396c76798391a5080caeb038eb48aed7fc73b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/3487a376552effdac425.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/b4c6140db85ce16ecddf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/b4c6140db85ce16ecddf.json new file mode 100644 index 0000000000000000000000000000000000000000..1c021f22adc3a18adcb764a7c30932a83ae8ccca --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/b4c6140db85ce16ecddf.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/eb5fe75368fe7cf30f16.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/eb5fe75368fe7cf30f16.json new file mode 100644 index 0000000000000000000000000000000000000000..26abf8a15b13aaa115422b4122a2740ffa9bdda5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/eb5fe75368fe7cf30f16.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6b9a9d6872e73a30d8926a346b9dcbed7cf2c014 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc7e7185210f12f6a2eb02ea192cca41c851d62bef86ae4edd72fa5c1658a398 +size 52490 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1b1ef469cd74940567c25073299bf7f657e12ae6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df70213923e9aad11f04521bcb75900cf7041d3fbab614a66a11045c652707d +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f5a020e7e7f8684af093e659511b14179f879dcc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_01af6db1929577719d82+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83da799e58f62ac78702dcb279044d009f4bfea7bbab3d13204d8f89027ca834 +size 195507 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b5464b54191da5ab8543d3bc7d77e592fc4f5c2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7247332e2a30e23667bcf96312db2825f0dff9a5edd81bea17bcf5e84a200a4 +size 81342 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8613519f8a2afae39bc372f702ffeb5636bf2e74 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9a55404a52572809ba5ea4756e4e9e2b648d9f971e6c366defad000bfe8511 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8e61339e97670ea59af0149013abf7beee01db1a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_03d38e403ebd62bfc408+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5f68c9acb488bf85677917464c641c5623dca68d9f1c798dc5537ac2e951540 +size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0de9a8882a099a7a6ceb06532311a157e60b6a7c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8330f317bd1e6589d0d897bc5934f0bbabf4c964f84a1c76e43776d9f5f57506 +size 7092 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5099eaacb8dac91480967418d9b4e17f6f0a3b6f Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_0507b0705d8d1fea9bf2+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f8d8bc71f7dc98edf9a4877cdaabc4b849903f94 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15bd9066e19c8f67670ed81bafb59335e3afc1481198929abbc374a3f17ea7c9 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..48881abc5e763b6a3441051ca642b04ca0deb9a6 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_0a941a976d4537ebcabd+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9a2e980aff2440a892cc8caef0bb1b2f04dc4b43 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90460a3951fb3fb92f159e322d4410d84c975043323d8ddf47fb66e109cc0c52 +size 10362 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4f08a8cb2df5054f7605f3262a1a04aa2e635652 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_1c13182ad393459c74b0+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dccf38952d680cb11b432c4e42a3a2db917004bc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18326d5fb6180fc809a5dfa2ec9bbde3caacf1caa0263ecdcb34b77e5b13e7c7 +size 79534 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..67f65cd8b66daff09b60733d58ef778f21811cf8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c00586c7331ea06da3376b2c5b88c9ba30533b6fb8731955c589b37a9164758 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b21b1c6fd559cda9467039223007b22577bfe289 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2031502066f33605d0ea+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c85fb0e6853d63b01037a7f56e7693f357d47b0b61cfea76bae11557e43d3f54 +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..33f3e64cd35923a4735c566cdbb6929bbdd207db --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473ef849b689cef7f622335bba72ee17735fb02e8f815b62a5be88e899c550c5 +size 69044 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ef75134ddc6f34858226e3f1830a49672914017c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_237bde4d2878eab12406+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad61a413028dcb5565bebaceb8420afc029b6d0a2738fd98223ce32dc47e41ed +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8eb934d08f5169db871c5ffb90c5cee675a0cb8c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d286857c91165178d0f6269c208a8df50fef437bc0ec241c8c0229cf8e03145 +size 447631 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..712877207284691535e65e24f435edd5f6e721c7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_296c0d4cfa7c8fa35118+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216b7c88865f2fcdf567afaa977313776bb74c3349dc65abc5aa1f8d1c1956bd +size 32082944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..aad98e50817363a5a27104af12ebf769ea14f91b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4190870df7e587c49fbeb3aa4eb5996292c91ca7cb812297c3d52df0f9e3675 +size 89230 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6c9b5c71eeb4438f07e93dccc9959e7bf9295d36 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be6ef9cb6d9f18cf8d09ef9306aef0bd1977685275d5a18dc823411b74bae2a +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4df8eabcad383486061a675817e12e772a737fd6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2a4c6c07772e176d6bff+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ee4ed8736267c9d576c3570ae301231b4b4d78b17d5f2ed88b40fa9300764d +size 247153 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..311d4c0294f2bb8c99f89f4a4ba20b903669c442 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a246bea7a7b699f74516fd8df7a3ef93b9cb13b04c33860e496161f4ff8e53d +size 47085 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c1778b43e32a18bb4613276f01cfcd0f0e88a195 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:740142233ab5e4d2667725beb273f35c21a2b90efec957b376cc07650312a00d +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4337962a7df8a8a3a3b9cab28d300fbe1866bc9b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2bf3848e1bc7d11bcf27+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883956d16990891b8849db61efef36bde4b7c82ef4611e406fd685f61d2f9592 +size 152045 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6f7be4cd0492a410c0c14722ae824c75bd064e6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df93bf088dfc1ff362392dbef7d1aedafaab635073e359d885bd72ebf3d55edf +size 379307 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bbfced57dba046fbfaf8d698314b1e0e636db250 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf424f023de170d58e329ac3d6373edba76ccae1917882b088865f0e58eb60cc +size 2161664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9ad701ae374d3d15dfa26de633760da80228ddc1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2fb722fd352dfb0c4ef0+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69ae53e19107e1a54d50192bf743c6d38332f6fb69e300f8ad4031451a14eab8 +size 2231293 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5e02f24284c4c4193f91075798d9e66429f36359 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b9fb7be20f5f578aba11656b9e01cca9f1f16efb0013f618d45b3d3f7cacf1 +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..65a597ae64a8b8743728529dcef52080816b9a99 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_31e14bab9c1ace8902de+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..74dfc224aab8dfbc56708bf7efc4672fa6be3ab5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f90785b02824797ea0ae29760d7ce36695ac71f4da76c7cb6589fb60a90c57b6 +size 79534 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f1c295a90b29feb58d2432010c08be5287cbbfff --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:129e28e61e50f1c96f703481094ba4129093724f04ee4941f0acabab64ebc643 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..15d362b11f93c261c213b6132b07d3f12642209b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3ae74890eb9055522dfc+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4946e6a857af4ea5708290e96abd80ace538f381924d26a04c26b766de975387 +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..87a9d0604d5431cfce7d1d87722b3719458597e8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --auto-cast=none --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e368bf4cf425ac301b040efb612461e0a4c42370 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34b8863e5e31ca539aec62a5330c04e1f4f7761bf994412bd4f1825d4773e28d +size 68831 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..40f8b501def46650ccba342b0b5851c71657d641 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15c052e9c900cb4d557acfd90d1cbccabcb7d72d3a5269f3ca131f475a08b5d +size 257024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..29e2fc686c9e0bf15352875593d5b949648fba74 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4ac4f5ffa7821caa2254+63ea7cf2/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d1eb8404998b4bbd242185690a39d1eb086638690e04b96366db5e7b41bdc8 +size 268322 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..66b3569ceeb9f63a60ff8ab9da3372615d5da18d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:720dd36ac01812e86d2be3c13dc4abe0b86caef04322f035f072b6ee51adec86 +size 11183 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..af6129f4a675ceb42bd8d0e11d2bab15431038fa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5c7cdf7c9d803720623f+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0e919044931f1c6ce8f82aaafc8ac84d23f1351c2ccc221f929723cbb04437 +size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b81e3ff6c3f878d00fe35b74b53fb8e3f98ab09f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32329dbcfe40d77d5b7fab482041cbbc6d84800ceed3a20a19261f5e3ddd36a7 +size 81342 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..90221b6a3c4efcf749c29fcab6732952e354cf5f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88a629f47632431528581d85facd7912458f11cf897aa8be6a0fa8d39a1f0076 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f7b8e3167987ca024e12dca47dbef8b05ea772de --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5f49a1d8795d09fc1c7c+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a94cf38fd5003389afb8aecd8f850f855043d511e4f2e4e637f9831094622e +size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_64950c85776a119cdf83+b001bea4/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_64950c85776a119cdf83+b001bea4/model.hlo_module.pb index 655824d477944d6ee3d25958ed00aead4f38d940..a1d50bd07ba3098a2788d83b1d6d78a5882a5bce 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_64950c85776a119cdf83+b001bea4/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_64950c85776a119cdf83+b001bea4/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c72a13fb767a6fe25407888d3085e20d62771714a0ae5df978f373d454fa1c6 +oid sha256:e8d6077de8d68a05606afd001979d23e8d54fde393715044dfa207b8a7db108a size 82456 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_64950c85776a119cdf83+b001bea4/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_64950c85776a119cdf83+b001bea4/model.neff index 7eeaf873ac7fa4507d95e4f649aed9b800fb9efc..ba9770b5e8bb3995b3b16f4aa8b38e1c60c43650 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_64950c85776a119cdf83+b001bea4/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_64950c85776a119cdf83+b001bea4/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7668d4bd5f98b2af1e738ec0794a4c2767b5d87a34e0b4ad8f63bd27074586ea +oid sha256:b05cbdc120dd257d8394be5bcd037f5e0145c6461b880c38dc45ecd80e48c0b9 size 420864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8508b0621281903c9e232c5a5e5db55139cf2c83 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0f79e1d9744e31374274c65877eb1317f14db8f679e3c660305986a7cf9f00b +size 7011 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5a30404089bc81156d441db3687cfa871a6b8b21 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_6a7765cdd7439b14659e+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..87a9d0604d5431cfce7d1d87722b3719458597e8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --auto-cast=none --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6e1aa1457c64d5641e5fb81d815d626fdc0fc6db --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f811eb3e09d070497eae108e123e12bfbdf3bc63b33f9b74bc1824912a0ac97d +size 68829 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5e591857468b013f36482dacb78e27158f6c3141 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7053799d3bfa4f21a34e73c218119c246b59f33e8ca963466797249d72ebf254 +size 257024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..34b06e9b6b5cc138d2b5c61c544637ac94555471 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7af77f8c231b0912b481+63ea7cf2/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c97e3b684db71d95d3b1127ec173cc6081c8468350ca58e371150fb98c89dd5 +size 268322 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d6dc87730378724ade2978019f8627f10eb5538b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc47e151efab0ffcced51d3891a69009c0dfe3477673b4e5c6eb656f45cb5835 +size 11183 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..087bf26d06995f2d1fe04d9ed8800e3b71cc1fc4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7f00c58343c6e5a64667+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c112f959e78a12f7ad5c10f6778c8cb6a6e20e6493052f238980f4dbdaf93792 +size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..87a9d0604d5431cfce7d1d87722b3719458597e8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --auto-cast=none --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..20c5cc854ba85c757bd20741cbf1dd1e94767786 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ee2f66a15826d8d02affbc64590ffd6f4849aa667d606e46ce0f282f098ce0 +size 79983 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ff20d932ea67efa7bf881b807098bab77ad35957 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:046908eb512c826c06f5beaae614cb191b19296af7accc9ed99e4a794afa8168 +size 318464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1e22fde805afaf8a7b28dc55ec356a6847dcf4c1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c2a6b120271c37fee40+63ea7cf2/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a70f48ca0ef9cda5294f463f52de110526695a8e90ccfca9ac429a06b960350 +size 329762 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_92d7a6b8bc621dee02b9+b001bea4/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_92d7a6b8bc621dee02b9+b001bea4/model.hlo_module.pb index e526506873c908a96b959e4c2c79b1fbfa26ac25..5db319518dee8749e2f1e99c9505e5d18e460dae 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_92d7a6b8bc621dee02b9+b001bea4/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_92d7a6b8bc621dee02b9+b001bea4/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb1a3918f1d35a40bdaa63ca2ab0414530c2e6f67355299fb1cf3dca6b849c47 +oid sha256:dabd7004df6faa46a5bed2da843e6077fd70fd64a670e9988cf4ab4f36dcd912 size 81609 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_92d7a6b8bc621dee02b9+b001bea4/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_92d7a6b8bc621dee02b9+b001bea4/model.neff index c8bdcc937d596649cee5f384dac0a8c4fa70b25c..9e7b038e37ef393a0787da208abbf3b29348670d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_92d7a6b8bc621dee02b9+b001bea4/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_92d7a6b8bc621dee02b9+b001bea4/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77a270efdbffb9c3038c8583bfef6090ea5f2f00a09a5f919b96c74fcddb14d7 +oid sha256:418340ed0f5daec6397af055e4e237c35cd38b71aa9647f5c35b696e9b4c282a size 359424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7c7f54109c7b2dfa47108793669ffed33ba64c25 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aca923d5616b066cbb9b10fe6c32d0f300936671cfcf5ec1ab89e4ba297e5781 +size 44602 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..71b37e7501c9ddf50188c66f3c21d118ec2008fc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d74269d679ec8d60d8589a5952473e895bcef8b694f516c97a7c4c54595eba2 +size 175104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5848b9359409abeae49bcf869af5220430cc1257 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_93486ad8f65a66501276+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087ee3678b8c7d2ef629708e7a218aa504fa2cbb3d73eb3446e6150026824f25 +size 182770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9a53db93d18e769ee7ea+b001bea4/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9a53db93d18e769ee7ea+b001bea4/model.hlo_module.pb index 7e219e2d6c59ede192f4f732ea4316c0d0a612d1..48cade0b6f59e358563ea3e04ad7bea97a1522ed 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_9a53db93d18e769ee7ea+b001bea4/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9a53db93d18e769ee7ea+b001bea4/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8870e76fed555598af6cb631f0f764e8e1804fc1dab8c94bd0054ee172bdeba +oid sha256:daa935dc2aef177aca71aa4c0b97090eb5971f55c8a833833000e9e9344bb9cf size 81607 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9a53db93d18e769ee7ea+b001bea4/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9a53db93d18e769ee7ea+b001bea4/model.neff index 42a4df1c986c949c51b9c3b67534564145e40f22..a9c82691382ac2049e189b1663fb1a8d0b186c8b 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_9a53db93d18e769ee7ea+b001bea4/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9a53db93d18e769ee7ea+b001bea4/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08db08ccd4d705d4f63a0c7fa0468ba421d78943b9197b55e3351d76fdebd7e5 +oid sha256:e15dadfe460e410ad71cde19020dd05dedf0f6af671a562060ea3e18650ecb52 size 359424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7abf4013e6f6a15ab63aee3d80883afd8c358612 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd16add865b12ae3806c07a1213c995ea0099b2d14689ff1eb558d1ba1dc3e7d +size 83825 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..413f41097d6451a9fba0980d8efa1e47ab5079b7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6d0f702f74844edde92a820ad11a89f2839544280dd5ce86f81a653a326c02 +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7a7c88baaf363b10607c26adf48731657b2b1a2f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ba097d82cae64a421d8+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cb97a9a71329183f2950e71118cea5c89bfa5da7453fb14ef74a5bbe44da0d8 +size 193114 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..30a0b80dcf6b32fb6e578e0b568c06d906d4ba54 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40af146947893d4d8f85a9812172dbd08fb580fb0333b5a4fcd99fde4257e62 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9eb2c59dbfb22aca263f8c50b88d8592cee5e4f1 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_b7aa701991786c2dfc6e+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2d4efb6a62361bb4fca80ece8c5e3b792ea9bfd3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69ec8adc0e62d42819e2e31a593d500e4ecc872b5b66fad58c6413dcecc02964 +size 10362 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..95618627e06b3550ca4e40b034307491a2a3e9d2 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_c6dd950c4bf878ea25a4+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..52f6974122eaedf426538b9a8c696d2c2cb4317c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d5f38eed070917832ee4e46d54e9d3241285fafa27c20907e573dd208e12b9 +size 89230 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d1fe5bd3f5eb804824e92f15c9cd7449376394a1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29551e52e446e1afc543b715aecdacc1f7c4bc4c1cc6080db60f428eebc9ef6 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..095031b62329a5ddfc1d35b826ab113c6f5352ca --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c71097e8f6025fec59a4+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64f08f3f1d896ecd31daf8c9dbe190cc277e9d535e831ddffb782ae3b56482a +size 247153 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f00aad96e7f154c7b6cd6eaaac9708994ed002d2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:246c85e2f046e132a670dd4f5bf4c8a006c6eff5d324933ef94bae5d13ea68e1 +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b88095dc997bbdad3d8e278b89500366fa4b9aa0 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_dc0cbedebd6955513631+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..033e5bd9a8e970b6c4182e5a5fbf69c05b884885 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8f517b4bcb23eb042997d5f007bc191cbd01909c90f92834d7a81c8e067f8d6 +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5192db03c51225c764d2d3a102e2d2900973dcf1 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_e30bf980b957282a9728+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..68aa30e43cfbef54a9968bc017dc327c2d7d2bce --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee70ecbac589b9d810beecbc33403025e0a508fbd6e1da5a4a39e153dae8c79 +size 10010 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f20637fc6797213610dc9607a0f981d3bc07930a Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_e3a13b85dcb5dcea901d+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2d4ebe2adeb1c2fe4d49daa513484296d0e0ed41 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c6e9e4524b83ad9d134cb0bf2a4f3ec0adde39b50051e5d885478d8dac62e5b +size 42794 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7515ff8280ed5d93a8ae5b9396d6059b9b5a6184 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f665328628f7abdae2fe10466061620c2ad60b349872110003e39acfa069747 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d2bdf0a4f6dbffe2167ae5d534e8a5f78ea7097a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ee8cb7dd9350a372ca5e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c327b67384de8ed1c70ed4a1364fe156416700790423ac6dae32da84c6f7147 +size 172461 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b216ce499b8af5a1eb8f75a1edfdab80ed6e8422 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84d98c42dd7ffb260ec806f4b7b8555e0ebf2dbb04f8672f74e1536ebd18d015 +size 7004 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6e9f4faf5da637e29bc45bbe5c870a4e58591b94 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_f3190a8210cd76cb727d+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8d9164bbf6bfb96fafbb6fae2181380d5318302b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3434931dbf68431d1194bedc6d92cc8a3797a71dbb0da844f0427e40f54886f9 +size 11183 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aa1d5611bd6d6666077b95e5002851da360f9135 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f43684c4be211c439d1f+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2587a700449736f623b9d9a187221781bd442e24517cf5833fdb7f3d6d6dce62 +size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1765dd747b659e06daf3fc7a91a53d910ef1b8f9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d5b0b11f69a6896ea95e9f02724ed80b1a7dd219005d4d91cf35c7e0447e82 +size 83825 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b5ca0e25720721df66ab722c8107b16d8762a10e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fe84f10f8e975d6700f163125c25ff096ba2eb5ed88afee0c059a5023c06fec +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6eaeeb3d1b39724a90a98d8ba74c1733c041b21e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f9fc90d7b6679b6ca84c+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e591c21506e454a928bdae8b80463737c86ec3b6f1a766a384cc63d1d28aaf79 +size 193114 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c1ab58f32c71f1d32dee441b486f37f11dc3898e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9301197ab622d6cf7fc9b997b77ddb90e143075decfbd981455569f9c3a19328 +size 7011 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c7c8a9be0c4f509560de6d5279e384b225e37b6f Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_fbaa5b03f774dfaa214b+431f5505/model.neff differ