diff --git a/.gitattributes b/.gitattributes index dbc6606c5bbbf003e02f0346df87c7dddf275c20..901ba6d1a6b589ba590b509a0b596e2fc74dd963 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2374,3 +2374,55 @@ neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json new file mode 100644 index 0000000000000000000000000000000000000000..d74b1f6cad110aadc6f7ba864a4e56a05169fc94 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json new file mode 100644 index 0000000000000000000000000000000000000000..a321dd8afdf5bda01b0e16b81dc8a26ffa956888 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 16, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json new file mode 100644 index 0000000000000000000000000000000000000000..d38172b14ae7a489c79aaccac26fc3f196e86e9d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 64, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json new file mode 100644 index 0000000000000000000000000000000000000000..9e80009b51724d4cae514746d961ea6203139c38 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json new file mode 100644 index 0000000000000000000000000000000000000000..966875a25609e1452ad6022778b369cb4183b467 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json new file mode 100644 index 0000000000000000000000000000000000000000..146b222d1439e631a573272f85c4ccd50d18e91b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 16, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json new file mode 100644 index 0000000000000000000000000000000000000000..91fb853613704680bf9fb154cddef54f6100e7c0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json new file mode 100644 index 0000000000000000000000000000000000000000..5c45d0daf631e4aba99856997a9a7b5c497ee565 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json new file mode 100644 index 0000000000000000000000000000000000000000..1a68633a93ac094b5515b50dbf2a64d258e0faa2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 32, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d5702208316ed7c035c347fd4f87571e4585ab78 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7544aa0228f9807878b3593b3d9365ff45083234a699e7a6bf47b13bafd78d +size 136713 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dcea471796badc25e42350934288e7df95ebdeef --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e37775bcf0f850099391bfb1e72da4cd5ceae7bfc2e9165983bf8ffd016057a2 +size 2223104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb6f2723d1f18312aaf8a1944971fe98c41bf53e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918100fe462b8b8d4a67a7edf8bc9148a2c0d891ab575554f4055604f3838f10 +size 777673 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e76137ecebf61d0022849d22820e9d2f0bb73098 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4196cbad5f93409e7072d3f84c49467f52bf5c8401cdc6044319f4d999e3ee1 +size 2724864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..574f13d205ec58fe72edb9e796c4669649a4b118 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cc744c22e9c0e5d585da942928d2aed8853154a05d0dd0546c211df5c3b7e74 +size 2862808 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..84b92a2975c1eb782d2da9454e7bc14e4881db04 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c0f4f84c58d9d114a5eaaa32456932c142d3aa585d6196c105de1c1d210d188 +size 136009 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ba883d9e21b72f8230e91bc78e6eaa99150aa521 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:943fc968a87a68bb3041bc5845f1465523a85d1a8d40dc14703de0ca92ece72f +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..793e479ec0e97d4082c9492e96f85d6e7ad508cc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9ce39a84c7039460a99091ac81bc3cae059fa3e84d41889117abb0046577ca +size 136016 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b26675f6ab21b02eed2280291e95e86dc119b771 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982dbdbe9769b168a9badbde0a61aa4248a9d8e90f9cfd3bb49fadc04bd59929 +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..17e465b29500287dfc421c8d743d31fbf97ec159 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d306ef1a91c6392e7b30deb0b89e40d577ce203876c1cd05ebf43e75ea116d00 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..67072b4f8ca1b229d5edc270b6d19e5e1faf6324 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:998cf0bdf7be0346d1d879365761c2b59e43e60f7d259d938d7da977733d9953 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0e50b96848979c053ec9ab2a6c2c642b1dfb032a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f3824fa2b74064f4a61d73062b0f353faa3ac1e01342ca30b3854cf84258163 +size 851466 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8063ad511f39870498c7e2bb6ab9f3ca152a2bed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd57a925819ece1b01115e8151a897c5223e0f9ce66671e4a3662e1646729e5 +size 36301824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..985e1e6ab81169edc4faa391269ed2be96a85322 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc5f8d4de9d781f0419a893cec6dc3bc62ac2040ce88b930d2931ab638a4e78 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ae8b79dac162be357387dc957de9a18019f4bf6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80fb0c317021aaef844fef7b99e44c023249d59c7425e85fb92ca347198d7bf +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2d178778fbebccf6842fb5503485013d618d5590 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b1571bed76d1fcca273b4f1da1762454696fd38aee799df079f2aa9696cc10 +size 850865 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c9b27e3d0bfe7ef18e9bc11889ca3924e1de06c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7bf03400cd6f77525afba5bbc885e5d0c316eca3ac5f44f8713809e32a49337 +size 32277504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a49e90ce2887698d8f02167aa54f4ea966b086c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5da061dda32ca95842d9626ad2065911721bbc14196b4937d3705edde9b64c +size 774401 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b85d06d77ad46a3483ec224579d3adeaf0843be3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82431da774c7a22311535f345020110bc1bdc5b5bc60a48cc6cffdce209385b0 +size 3789824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0a89d37ed6e2834857a8a55d1b787304b4340341 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b48a951a0fcd160dc01ce3c77887bdcba244ae3e72914001a3c9853835d7d414 +size 3927641 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1c52faa3b0db6f1d0863e0ca75793455aed154e3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86a3dcd2ead65060dc66788a9e49031115ab3ff754df2cb0d10fe04c93f0b53c +size 774385 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..edae4a5480ff3ff2fbded6ea977eb8f6f701896c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1009593c6f634c326a069d9c227a4e5cb9cfd27aa5ee3989d61d858b2573bb12 +size 3103744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1d3831c615d914b06a6f02b71d8b7732ad068c6f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c354b3cd9828d7f2de90962a714d173d592082f973408d8077689765ab30e56 +size 3241561 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65e576ffb5bb5749c0c87c3df58021ac1291a78a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f34d084878c04c6615bcf75cc4c59a958888f10e8aef5449f60733509befd2f8 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9a7cefe09a6cc882ee87ae546e1c9ee07089c7fd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611dc8ad87618de23b41c21879d25be94cc12f484b4c4240022d63cd91763c79 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eb03f69b1f55081f7ce2e8a95edb2babafa8bf65 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d1ae4fa1f8df057937b891eedca83f96c80145297bebc61014821f220bf154 +size 851450 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..01f9162f5136cee8eae9da34c783de5d54735231 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02fbe4c78f0035709a032f2add6db202d9fc720419b70c9e261fd7c9871d2e8e +size 18545664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c8a3c488084489d00f1c5a626dec2a0b940bcb10 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62ac07377f0886a6aa4645110793df0deb2aca8574845df4c62f8a214feeb70e +size 777289 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2169e3a09d31d1c6418c3c8b8939bb8a569c868b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c7b0724fcb3a5edb36045d45a11660002fcadfc0f6f7f9a2fcb20d2106de76 +size 5889024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ae4c8e3b14bc6aa816148ce6d246356d1b3d144f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c0267119b61367457d7c416935d5af7eeb41c287d46b1209c54a3c0096f869 +size 6026968 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d8884eebf0b93781b9340c0bc6f1806029ad85ea --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d057a696da9d4013284fdb8114b3fa6802eb8f8713e34a0d83d61b0e385f004e +size 771744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bb47392270e83d1b59e2d0d5fd384dfaae8c7b94 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3131ae16afa64bd7d04ab96a60864cdd61bcd823768b35b8419a090878281d1c +size 2171904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9dc3912fdc032211e0816962673b9615f6649f29 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96803eada0e7ed5d9324a999ac82172d6936e04feb34af34bbf1383e9de1e293 +size 2310233 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a2079d54d518f8ce03954361efbed553a7fe2c35 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d36631128d9369ecd150de3b27d12c13da0e66a956103c213cb0ae6dcae73b2 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..841cd55640845960e3e1aab1dc81f56358d1204c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e152ecd5db53775955f76490b5cce40680b885e39893b97cd562b5661bd0874a +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d675c2f0549a9a33c7e404876fa969723f5b2397 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e53fe4370d8cb8a07547ef0d795003bbf38c154721c556185a87123a090b75 +size 774385 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8c584fc14f0ffe260740ba7dc557fda411ff65fe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab02e76b4b49fedefcd3b659e05b8201d8304fc0bc8a7ce1b8abd2b3d1599677 +size 2520064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a789d29ffcc0b195cc2be4a697a05a5e2a2455f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04221b0238147d09e98e081afb2921832cb737fa7a6e7e0164e5ae782a8ff8df +size 2657881 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..191cbfe3521a6c7b9959c540d58dc085f5afced0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62edf6a6007eb4951bc240d2fe5bab835f0e038bfc124805770cea4efb3189d4 +size 851450 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..68672572ba8bb66d1fb10502e9ef559e59c6426b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab011d402d7098bf212b1efec89fa95865f372d7c51e6425aaac727280c7dd69 +size 9155584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8fffc794732adcc826a6c7548e255e1417908e1d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d5e1e45f49b3c84ef59b31e960b488bbe5aeefcc8ebce945aaea361e9680c5 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..086b0c3e90d7e6fe9b9f29f2b0620bb6ab6dfd1c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9364f278d2c3346f1eb496a4e7aaccea22d2c8e6ccebb31a21ed45338ce85364 +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0aa9bd6a1f2082abcd6501fb56c852fee896b855 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3624a583087c4b1c7d35dba22851f8fb5429263b219e13ce2179d8b11645f2d8 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7b6761c18373a6d9b5dc37beefa322ef0e3bede2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597093128bdbe3b3e5e50005930ff0cb93f133a5369e88bb02e03c18f253620f +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6d632c9de2d3670ea609ddec170e0b4613fe185d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f68e6fc2c2247ce7381ea1fc3f522cc57be90a48dc4f55878b41b4159a45c26 +size 777273 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8a3f2eea28f3842761673eadf6b78273f0d54dea --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6603874735ad499ac9feb7d4f970810cb351e29e36f379192acc35e1a9d1f2f5 +size 4619264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7d0d1397d389a9fa3be37449fdd2cdd276217711 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f2ce5be81baebd204abe8a836723e6baab4b821e50581c118eb58a10542a8b9 +size 4757208 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f89477c294b7c20142b91f810f45863616c6b69b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5d556198cc20c7c9fe86ff05e1030327c8ae09fc80443c737c0a31dda6fe7a +size 773836 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..486ad09e24ed7967b02699520e9d0b3db107f012 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cfe8598097595eb0bfe746aa5e2f66501c19ad77f6508532862920fc3f278e9 +size 2171904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d61617cb220ce1c88e9ff0badf75f67de0d5ab23 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d10f54d19fa2d213cd731aeabed2625efb780e364c6bda22711fe377ccc62144 +size 2310233 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ecfe1c7b9469c3f7f52ff88ff10bad52236713fe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42df04557626b26ae2cdbabf2b5d7b2bafad04998bb8ffc9f5fa2569acec2ad0 +size 777673 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e3406aeb93979f49907dc7707daec0b6922d62f3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f131d3d3241dcc0378d9d02fcfb3a380669f1a6d9c4403f1d15bdae6d84cc9 +size 6994944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8a79b59913e872aecd75777933f4020c0dfa55c3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f249ad14b6fc365498028665a1bd1d50766b17f8859061df2b61875424a829e0 +size 7132888 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1562dee19782fa6cc616e563f7eabce02015cbfc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8069b733c164fe34861244cc8c0b5d32ad6ef73e4f005634a7830c82912eb0a +size 777273 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f931a678c9db21198fd32a33ae7c0d0400131469 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c04a2f9cff6d7bf88be21b604d8ef0a264722ac417dbd8562fd022791f7369c1 +size 4619264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..dce7644e9672372dcad74e6d58d81fd59d603e68 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f63ee8ae5b3e31007aa28546e3dc20260db96ac926479a04538859b0f5dd8a0 +size 4757208 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3a073e9aabb9fd85d0ee3bbe84f0943e25c23439 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:158ddd56dae2e72f5249dd5769bba7cb5a55a8d3334dd6849f11507f19d69c42 +size 771744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..20dacd28127c440d9138c4d718d0651428b00c6a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd3ca2fcc093163dbaf658f2917aa9781a65a15b7714b13ba4ad4cabc69bf82 +size 2171904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..43fc1405d42a0c4491677ab737d567095186fd86 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac184ef110c27b44f08e34084c5c7944bd17b25bb95474b80e3a111575cc42c8 +size 2310233 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a52ad1255674572dabec0179d092e712a1575830 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138ea54b503db177b6308ad5d590bd33d12244592e1aa218001eaa9d4954363f +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..42d8f0c498cddda38b0624e4b52a56b9c6ca8a65 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0830700171e0de8043e5528fbdf604686e91111a9b21fef8b125b13904bccc3f +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d7530ab5c36cafc0f065752c8cd2b45c725b63bb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:029bfcb6a4d3856149b1d65d793f4d8506d68b35c298e6b6f6bfa46bf1bffbf3 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..97bf55b170cc73fb6d7feeea95d4065f04c68d32 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe4a4a14377a5de8e4c361308770ed05e91a7052ef6dbba8c5403e28c50a9ab +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..985c6ace7b8b76db71706c8216d3b49caba9a675 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ed317151e832e8e2bbb5f5309d59f834a6136aafe42cf5fe9db157dd7bf3730 +size 136713 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..79bd6b25bd183ff4763eb173382786d077de8129 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75168a4f77d5eb3903f75546aeb4a93637f327fd6cfeae1c8ce8982cbbde91c4 +size 2223104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..573db90ee70df1e93fdec9265a1a4a017dfc6166 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ea01efd364ff4226a41ae2f3ae4815733a507c3426563823b419a61913f2c8 +size 777673 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..abfbcb32837c39b340590ad8ec63956b433ebb96 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c692f31bf47de7add767573b1bf534507a9afe09dd6df129cff00eabd97463a1 +size 2857984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2ed236aa80cc8fee7106cc52de5f1d496028b54f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf435b9878a087e4f47b7c0be24afb52cd6d1d242e65c6726f28b7e94f406c2d +size 2995928 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b881cb71abc210fdf66e628d7a054c2607bc2e7d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2282c60b9e7a88a9f3739c0796ba6a76f3dbc7c34d4cbaac49f988d13b890cca +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..df15e49aeaec8fd825fe857539f56a45e14f664b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00037336a9a76178b7c52a2b646c487b123c4e82dc9360dfcf1ce69cd5f15c5 +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..758c8b0b14043cf84245cffdc6c93317cf0b86be --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3751ca70154e5ead11b898ec288ee32be94ea5e6d2d1f183e03f97fbcab9de64 +size 777289 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..32b288e1254394c57d8044d65fadc70e5b86eaf1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b483287f14e8c1be8f78f4bcc07347f1af2b92cbe9f19869d5fba8e10f9b4e2 +size 5889024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..222cdb2f50f257ca9256902216657fc2ad782ad0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803e520e36a5330b8f1dacd14497c050984908e6c7b5b272ad6ccc96cdf97131 +size 6026968 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..958ebcfae5870711e645c557d1a16874d474eb65 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f529d177f160037dd711356628769f9cef61a3fbe878909b494d4f4fbb53a5e +size 777673 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..509b64239ad3755c6a8cb701f94ced2c45891bcc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac534e5b49f3131e7d4fcf81d4325a7d8b894a946090edbfb823571d630dafd2 +size 9247744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1541695366bf3b6a56239e86c5a4545d9e8592c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dea0db8daf215ee0ba41633c8854d6c2060decf85a6f0dacd50c167064e3c147 +size 9385688 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..41b58f296f457ec399b51d242a0a618db47dfd33 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bec9cd09095d18ecce29ecab423fab08e52b20be4f9f6d0e86eaedf82967e12 +size 850865 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0d946b1841db87ab29409a12fa06ce32e333b537 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788d907015c3ea08b3db2bd9cda362aa515a6aad3184947bdb99ad28fe30b423 +size 2417664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e8b95f304e280cba9bdbffb3de26b9776425a745 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee5d453b3cbb1e35f65df13e217227cf0c6694fcf8019de7e6b576ff3f60c269 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d5479d42c2012f89cc1944369266105ae4e5e081 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4de12968be13c5c1148c5f87074705682b7f664362fe6f68db162949fa37ef93 +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a0b2b45aeefa1d75b73e952a7cd520223f195a6d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97745b5c52bedcbbfc2a41a1f3dcc13e1cb8c040171d75716e3e00689416828a +size 777273 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96634a881f040a11c80b562b00753857d0c06c8e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44867e67662d01ae9854d806923b0c6d421da78276c960822dd95540f6e48463 +size 5192704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..405d70045dffcdce56da636669d8625f18e5e748 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e6fc3c6002297043830734eb637426044a076f40c57559422301e9260209a9 +size 5330648 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..305107c75f95cd3ed63b75f9d225f1f27a5c2c23 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5032e57364bad1bb17e281e455bfb513a91103b5f420d700d83fd215d4754ec +size 777273 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..074cd0999c6ce0d91ae3db6ba8d096aeae2e7367 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0260d073e94d990b0f721e505189a50a3a44c0064780140cd3c8b122a0ff0c +size 5192704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4e1a23824d4999fabd4f95789b3c7d498103d379 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222b8b025c05a0a5555d81b14b7bb3226929d94d04cc9a47071d31e2c1958a90 +size 5330648 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d28c05c05087c7731d2d1c0a0e9565e9f5bb8a28 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2683e62e065e4a1cae87648c7d7fb6b2db8880f391acf20fcd7b60e38804a428 +size 136016 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d4939ca189f95bfb12d9d8ad42c5ec58be137fb5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5000ef742cddc09845d5b98d6e9b3db9d7884cecaeb726f8b195ce6e1a7880b +size 2202624