diff --git a/.gitattributes b/.gitattributes index a33405cb4c16e773328ef72c10b907a538018c21..1d54bce7b2392c0f18bf3738cb78da43158f48cd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2965,3 +2965,28 @@ neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json new file mode 100644 index 0000000000000000000000000000000000000000..f9db0fe2b47988867e19cfa9969e63566683b31c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json new file mode 100644 index 0000000000000000000000000000000000000000..ed3b38c98d9e1b387b66962b48b9690a173ee94e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json new file mode 100644 index 0000000000000000000000000000000000000000..41994e12ef5d0f71f9b7a3da3a0390d1cb5c6606 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.3-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 0.0078125, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct", + "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49159 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json new file mode 100644 index 0000000000000000000000000000000000000000..6c4d11132e576f710384e40866d2306385507476 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json new file mode 100644 index 0000000000000000000000000000000000000000..860b69d18a402bcfa80933283e8fc5a4a66395d0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json new file mode 100644 index 0000000000000000000000000000000000000000..93be311cacd8ddda195f8cec66372b7830815ef7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json new file mode 100644 index 0000000000000000000000000000000000000000..aace1a40cdc895d5610a7fa6a9c9f41528481741 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 1, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 1, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f0e9aaa862adcd81237840cf8f54c82bd1327e3b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892c32ed92799e95d2ab40d5a34738b04d2f7143e1f4624f0929e59758e23c64 +size 535749 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d76ae94b4a4a396fe00b113ab1a65e61056cb5cc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81343b6030b10b8e0afc06e398050dec11ff840e24955618dc85d1e7fbc5d78c +size 1772544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6240d7fad258db83a139b204f752988d8fae8c35 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da48877712718e281765474b71b2b7197a4394e8f1c2aa326df846e21f1d6ac +size 563496 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c3091c3924c89f79d73ff1496202abf570c2a1a3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4a8f9b622920e767a57582e10bc9ec2f1aaa1fa3e330093ba80d09a509cf8b +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..098ce4a2a149182948cfcb3c3c054a380ccb8ca8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63751a0bb5e7aa9b0373ea81eb9ba1650b5eb601b4169dd868046c083c38c781 +size 971593 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9866f2ba36d1b366e5b587bc59c21831197c0256 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d945a6bef3d712dbcb309c056d799eaefa88d0b05342e5420d0f6d014cbc82e +size 3369984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d6b00f34f8d0437751a92aa607cd661068dbca47 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2019af15f575af164d7ff719706b72c086c5cadae8453404f2cbd0a16472fcec +size 3543112 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fab2df07043d16d8e4f8be2fd6ab0e9f465ddd16 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd95ce79e3100f0f971325b4e1a0b5d080fb4443153405b5869b9cb4c8870c91 +size 378945 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8b54245249a63b9679f08fecbb863def34d98f5f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe54150ac740c2430e66289586ee7d6049c299eeb3e0d2db71b5ffe42c844a8a +size 564224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3896babd2c7b18e58568f42dd2d98813dfa378a4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b003122b37563ff19e4c2d28bc52b81a23c0510a6e7dd43292f0f777bed842b +size 1887935 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..81c05a102555a487c8aeb18ffddc37f7bcf16ca5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e6aeaf87f5edff5233503800ffd18f97c872b18ca3ff2ad3ecc68f0305e905c +size 25160704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1a548604c4387c1b2757443d1f7ef9633e1a437d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f3f22f5a5aed80511ff092bb6c5efd7cee45e557c3b5bd10bdc47754f159d3b +size 540237 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d7dd05c8c24936f685af8137b1e30b9603684458 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d970a56f621f6c119ee6f8fdebf4107a3c8d50fc760b157ce0f1a87412d41a78 +size 553984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ef99ee290057db16812cf266e1c9ef4d58f3741d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291b718353dffab0649ec844f053b3402735e48e4fff5c9bef9275c03effb4fe +size 688151 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6119318c336c8f78ac75bb168bbc986b1c5f59d8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b711200dfd242da19e652bc20b035b0c3436c19ecf253b6a5f2d0f22febb349a +size 172324 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f3cdf9ae90b0df8ca1fd3642d8760d66b72bae73 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d97120bee25e3b2ff4a812692964b8c62248afb0d20652b898baa6856e8966f +size 2397184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ce2a3bf3336f6bb70e419807e84fec6a110ed816 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b2ed73a544b049277a3e081d48cd74f24a067c1c0d8df51acf59a9db985d1e +size 2400727 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4f4be35ee915d4e394e25914879a78be7501cb45 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85dd8e9800800942b8f4f44efc280befa8d4d41e1d9a489a9440de4d6f0e3768 +size 4885504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a11f06f5eadbe7ba1ca0165dd4e2966757d866df --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8781e8141bf01e24b6e3d3eb546d7fe388d234ecfd5bca446268e7a007ed7d +size 5058549 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9211fba2aba3128de46f1f5f973d6d901eb8f456 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:401233f1288017ae6d1a5139e126e22161c3152a6145c923b5b78040dd3f8c83 +size 171444 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9b41ed5096109bd095ef6d5bb110406ff6dfaf16 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2eae1d3728ce2c5fe18d5df1ddec5217ef5ad2b4a0973aea8e3e9707bcc9507 +size 7353344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c99cbcacc2327486cbc3518d8d009679d2f104b1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33a1cd799da49294617b2445cab66236a026ac0e51133ae310607b88893c2e3 +size 945923 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..df69480f80b87175ee027b3999114ed9e7f23246 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d0772cf3bdd271e9fe396d7ac39333b80ba4a64d2cce91bb55ebbea5490919 +size 917457 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..407a791d3f4904522c420cff0d5a2b011392837b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:141fc96d1818343609c0bf818dd0a83270912fd4483455868a9bab88d2d6ae7e +size 966389 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e555d6704bc91fede87ce374ea97bd97bb06fad0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec46e38cc84cd784f0920bd6358fffa01c7e67230c84573d0f3a7f89fa18554 +size 977926 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6df8cd04b22da726d04f5cd129e3d53b349a9ec8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e091ae75e1440a5da5acf08299d17a61ef35c0cb387c8b01a715274c3c72ead7 +size 7855104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d91bfa8e488e97672e29ee599396fabc1f665dbc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3588611a69dd57f77d779be59e050cac0a8d4905dc22b70918e200c885df2d +size 8028152 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5e61681ff03d9dd6c61b17c68b443c4dab10193b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdb65ede47484276cbb461fd2f18e9e7f30d802794b8eaff72aaae4a32f58e2 +size 945923 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0ad26984923816bb113a1d1b615201345ca1f8c0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a78092af6d0ac273d51ad6b0c0472a34906a89040183360a458412798c303322 +size 939590 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d914c63b7d9f51d75362e6ce39b4618a9bfa2ffa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64f711d59616053676d4c52871e26b476790b67af3c61e4dfd0b718c26b1c1e +size 1055910 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2b14b1ad2f68126d829c7343ef8139f72610ba77 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94897bd4bd3e98c6ac72adb5a18d582200d033e86bc7ae23d99f8a26aab9a4c6 +size 4393984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c46d714ccce964090191a0b0346157aa2cb567c7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90678240da35ab496780bc71748aa7fd8b09f4bb2e2d37b927a085847851a00e +size 523648 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..afa3655aebf9a9522cac8a1712b8b21d28b12107 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b139cfa84e4179f620d157a2f5fd2a271bb7453b0e7113153fe155b50ad1aed +size 383578 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..185a89df3e63dc9e467cc590fd49aeb666acb26e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32207d864061997b733a99dcf1382daee8a3183eda3ba816c4b7dff82bab9fc2 +size 939590 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..83893be71d9728f4d1552b7d7238c63238e4e5db --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764b792a19734d86d9955a6731822b4a916ffd9915b9c9d78247c6c0bd628d9f +size 940070 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978a1d4ca28e11a7fe66+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_978a1d4ca28e11a7fe66+bfe5714b/model.hlo_module.pb index 9d660b8320e6f63366c3590b271e86e134656745..5118a49c780d68596c38bd009a069a8a49d27d25 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_978a1d4ca28e11a7fe66+bfe5714b/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978a1d4ca28e11a7fe66+bfe5714b/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5203ca7238edc28eec3085956187f5a1683d523190a1b9955df9d4697f8b82e0 +oid sha256:0b10c19e280101ecdeb0d8603b0dea7d32697a82850026bd9f36a19843b7acb9 size 523648 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..60644dfacdbe6d926a447c1d87fbeff565cf3854 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0ad95e983da77817794f6ca8727a7778343499256f4a2143a39c99c22c4d0f +size 589112 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4a062c6caf83d61d00ccfdfe9d9838d5ec90710e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa5a86dead31163c2d51bd9b3206d59eb8f57c279a9728b31b514c800d4a5485 +size 1516544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b299a2ea8c17f6087a354568756ff706b9d5a9fd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898a0ca136c13f16c9f6cab2a9620eeb4696c2898125092fd458af2557ad1d3b +size 1674868 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fe80d5b7baaa3accb5b439ca0b3b2634e9f287c5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ee0837f51cca42a03aff9285ab1d5368e28472e631809ec21ce3f777e8cee16 +size 164777 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a766d9b3537809126f20744e6c20853eca18fcc9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b8c4603124eb044be652a39f22a41081ab3fecaeb2d06d068894d993dfe4b9 +size 1393664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d66a8f9dcb1ec3f00ef3998d1694bc6353f4537f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e7ab3e9082c00c6742ea4543b4464c593dd80bfc77d8961bc3da966750e76f6 +size 613271 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3a472c9b68abf0c292c123739184e8a3a7b224e4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb1c412750939c7d016b6554000e73d83c2b0878223176073dff6a2a43de7b33 +size 1117184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b38f412fc0143206c587bca463c49843fe95d53a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89db4712c3825ebc994e14e872bb55d79143d00d2fec9e2ab5a2c80920d8767b +size 1262210 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d105a6edabece89c48275e7736772e1be21fa945 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea86f9f5daadfb321afe24deac3b0d3813a2fbf862f00955a629350f345ffc20 +size 939590 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cb505de3daddee73a0c6087519f60b8da0290c64 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d39aace1d85f74ce8e31631d1a5933a0dbbf064a4b86ecef8eb0848b0b07bb +size 940070 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4c64e6101aeaf718753f7d103aa694a4991ffd68 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c426b71115730b23335e06509feb8597628447db9a3081d8e285910e962c2f7c +size 172361 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..434efae57d7fd2fc5a8f1905cb694c3f15a614aa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a48f63bac534e4a9cc10e4a0505cc4487ae7551ae02d3222d7babf762e48a0 +size 2243584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0be232d70946660c5254a9948d9979a857895252 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3cb0c71f937117958ff0bbf0af012bc7e6647ff92c57d84580b38864a7498e6 +size 1062083 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3871691b2d3da24946fd4a98868d35abfbac9d2f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfb94ed3bdadaab4b7756ea9930c8046e79d0517573a9b241dde695d1fe159e +size 7312384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..921b08e2833aa384ae501f7767cf5fa6f2a84b9d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ebac139feddf2b2fe04c45c67d1fe0dd9bc01bbd9f2fa32f72e13ca22cf3b72 +size 2413541 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..00e34411e7501163c12fcab25095b18c544cc9ba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d27df69eec3defb21e98a4f5b0b28b794d66599f2b03cd6bd3046cd52791e241 +size 418296 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d959ded2dfdbfc87dd60513df760c30283d2647c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452f051b9d89a1b0785f45bcc5588ba665641a708f21fe60c89b8990b487ce69 +size 502784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..320224da1ca237a62c4eff1897fd35500decaa73 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f724e2db52d31ef4fa016cabc471051ddc776d20e5fb2a7bedfed5de7e639db1 +size 571183 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f7a81f27b02be85dbd25ddeef1750b9c536b12fa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:425f99c3f8a0d71f05502fed4ee1ffd958f083be56140fe38719058eaa6a956a +size 67207 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6f416197509aeb3f8ef485632ba9cc14442bf19b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc49c4ec99ba04d5bc0e04a07c02b2c48e7f23922ccc2ea755dd26160bec46b +size 236544