diff --git a/.gitattributes b/.gitattributes index 42587e8fc4fdf3cabbb073b6293ca3dc51551510..1a316248c84db567d8d677519f7575de5387616e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2662,3 +2662,45 @@ neuronxcc-2.17.194.0+d312836f/MODULE_bfedca8ab047e39c175b+431f5505/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_a97f776a1a796fa145d6+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_f41e2b0db5189f1f38af+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_f41e2b0db5189f1f38af+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json new file mode 100644 index 0000000000000000000000000000000000000000..688bd6be0f8633f0d6ca1cad9dbbaba652ae68e5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json new file mode 100644 index 0000000000000000000000000000000000000000..6386bcb91a634eb25c024e21f14c1feefbce542b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json new file mode 100644 index 0000000000000000000000000000000000000000..a3acb9b5b9afe4c1481f59f0077003a8feb76e77 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/0988b920dff9a2cb9736.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/0988b920dff9a2cb9736.json new file mode 100644 index 0000000000000000000000000000000000000000..061d5602bf018696680671b6067dcd397e6ae8db --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/0988b920dff9a2cb9736.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 64, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev5", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/61853a0b31b294a846cd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/61853a0b31b294a846cd.json new file mode 100644 index 0000000000000000000000000000000000000000..66689ec6f68575f64c41aa744aa3ef4e52c5e412 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/61853a0b31b294a846cd.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 64, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/8261b9be41682c346506.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/8261b9be41682c346506.json new file mode 100644 index 0000000000000000000000000000000000000000..fd1ee81050085ab51af0445495625da21156ccbe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.1-8B-Instruct/8261b9be41682c346506.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 32, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct", + "checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/acb8373c9ac1d7f31f35.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/acb8373c9ac1d7f31f35.json new file mode 100644 index 0000000000000000000000000000000000000000..56348a198558c57ab0e27bef05cd73c4ed920246 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/acb8373c9ac1d7f31f35.json @@ -0,0 +1,56 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 4, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev5", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e8f7b8c083bc968773ae.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e8f7b8c083bc968773ae.json new file mode 100644 index 0000000000000000000000000000000000000000..f576242fe9412b05009d4bde8e65f29eb49f6a79 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e8f7b8c083bc968773ae.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fb7899d27ccbc59330a2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fb7899d27ccbc59330a2.json new file mode 100644 index 0000000000000000000000000000000000000000..9a8ee57db4ad994ab65d7e1539f38113be4f1e0c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fb7899d27ccbc59330a2.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/f96bf36952a158cc9e11.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/f96bf36952a158cc9e11.json new file mode 100644 index 0000000000000000000000000000000000000000..903e5cddf913d527d7b6eb450e842236caf5bdb6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/mistralai/Mixtral-8x7B-Instruct-v0.1/f96bf36952a158cc9e11.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "checkpoint_revision": "41bd4c9e7e4fb318ca40e721131d4933966c2cc1", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 16, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 16, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.02, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/442d198b468f7347f4bf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/442d198b468f7347f4bf.json new file mode 100644 index 0000000000000000000000000000000000000000..7203ec938a266339c7d7eb7038e3f14695c5e1c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/442d198b468f7347f4bf.json @@ -0,0 +1,49 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 4, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev5", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/86f34a5b4c3c146ba263.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/86f34a5b4c3c146ba263.json new file mode 100644 index 0000000000000000000000000000000000000000..02c56db54e7b21c1fce3f9bbfc967188854ed825 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/86f34a5b4c3c146ba263.json @@ -0,0 +1,56 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 4, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff index 0b4c2b55689cfd6dff700ea6c59b9c3dab6d6922..19b74f99d7005f6baf16a7a958fbd178265724f9 100644 Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff index ec16d748f1392d208ad23d83ae75c10495bff447..f3cbd0d0dce0fa8ef87d28cd3d3c5630e4f86d58 100644 Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..243fd9d47283bcf4588c37ab3340b935ec5020d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d8c682f938ff3b1b9b3c57138e1a5076e49c6e55c85df692d8d69af43a97b7 +size 445522 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ae7581e4de25d4cc2fd70b389f4acdec862792c2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_098093605321e39067b1+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f6c5a5b43cab2c955b06302c0df17da7b0f5a6746cdb45196262f10a38c702 +size 32072704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.hlo_module.pb index f992af9212eb3c2825a0113eaa5e023e67cf88a3..f835360a753047b6e1da3944147288e5a1e52db3 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6acbec9d61392836a93b8d1db69240657c81968bad2d77ed4aa5ff54732cd263 +oid sha256:f3f3dd80294c9051a9e8ff1f5e4eb5a646e3b8c6dbbd4c5a6aa8a6d237217298 size 375531 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.neff index 6baea4d0e73795e4da7364bf6ad5c9e3d5442f3e..0684b0d41ced659f681c7e5452941a169718fa60 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8283daf2cb93d48587eaaa3ad9bb35da0231f51b4c0d2dd273d69f72196de3b8 +oid sha256:39e9e6ca1ebbe07cae334441774e22672f454eb4834a34019ac399084d261cb2 size 1516544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/wrapped_neff.hlo index 1fa38a705ce88ec30ea176d8acf8cb94256da9a3..13b0a7330d5a371039457f253406976ffe6d23e2 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:026a47d3b0d244558d3d19c0dd3a12d944648dae0da53a0516c0a4a5d8672622 +oid sha256:348b9649b6b8a7df2a1e571bab067492308d199a24c2ca9e75f24a483e150dba size 1603514 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ed0c2760aff91a7d1ea35314031e003b82824566 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:659bfaa3caeaf8610ec9442a59ee8e9fadb041133f90aaa310893d3076743607 +size 919265 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b8d079fd8d8799ae0b8d462b050369d582933cce --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf4b1c9280bdb27a84+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936afc71e5d663be56247d59efd97685b7d97e15ea16c901367ff64b6e1a4de2 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c73a23b97abf62134833de3bdc54d90dda4a8e2d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71396bec0288a8bc3a788b55f55dbc69045eabeb8dfc89f8feb28b91ecb0c55f +size 136016 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96f15429419adcbf3579437664dd32a24ac3a967 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2b346bc95a0a22d338b4+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03da83020d93c33e44f4c975b99655077aa1c338c9ddcfcb2c52effa03a8e83c +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff index 16f18225b0d80f0642ad658ef90dd3dceefde181..0937180031ae393d94924f5b4f693ad2ad5ecdd1 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:277e40266261753d321701565433ece6105d089a926dc9b8d5205a103afa4c67 +oid sha256:075eab47089f6b5408189c16efa63c02b2be7813b34cf32f7ac353459842c720 size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..68ecd7d2b843f41cef63262c50ae56ad5abec956 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea52d380fe4f37859998791e1672cdc8e24a757aadf3c9954d545acd81742d5d +size 339944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7b68692ad8b0db4fdda14db6547e510ce4f1d861 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837753189e7d070d26672d57f1a682fed9737ab7d3094ec0fdb251419d09e97d +size 7742464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..59ea220b4b61e61a87fe62d68d89390d42d9386b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0732ce84be45cbbccd3553ef983356fc1a66fec1d645948ba5bd21ad3ed53959 +size 67559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ec43eedb5a9b4cba2fa688aaf4de74a6ec155fa9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaf2f909aa93d035b6b+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08b6a54201f19df9a20d3431d8db484c2ea1680e561140e2a26725be7b7d2a5f +size 338944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3a116f5bae95f7afb518e12113e9c89e253ff69c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:911683839345c4e7ccc7066521965c7158fe86de4be24320b0625bac3031c68a +size 481775 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6a5bde0e3a599d303df7e9030cefc3af956001b4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3fa377f9222d3e733b57+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc320ba77bbd50659655c35d1bdb8b4d67489095f7367ee52198afe0c4186990 +size 9350144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..587be96c040f716a1dc8765262f417bbe2b03b16 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f30c313c013d0c1bd9bcf0e0a13feebc2af2803ad3f3337b94d7457d7ee4bb0 +size 1914485 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c5b0c95ee4bb307f124f40214201b01918453025 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aec6522cde94b94130728fd1f7121ae2c635703e4d9e57561d6ca0fd7496b4a +size 10333184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9b1bcc6783256ee9f744ef9b86b1aa0dee521a17 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca560e921973586bddfbdbdbc5d097e1efd8bc81cc29bdda3650c64e7c35c9f6 +size 10676882 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..631e7237448512c403484a65ea2415232f0d50ca --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f661df1dc5c067ae27d14596c792d048e251e7a27b3a99b5e356e9083e29e35 +size 391520 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..daee7fe3ba627eaad35cbb5bcc6d6be9896374c0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bad12a320939b96e8cb21672539a37646fda9d7c2f31df23d4f3b7cd66a587d +size 1874944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b654c506d1ca45639e0d07b8bb3b1a86f1ea4691 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48a1dea69e2dc2d09e06+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:154fe2edab408901eaf91c35a4f57c8329eeebaa0a4a8113a7e2d609c1c71f11 +size 1943827 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5e90bc92bdb22a2c27ef281890860a92856bf462 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:937307ec4fcd1402e9376652173dcf530be878628d31ce9638c8fd25c3f2a7a4 +size 69051 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5943ef244a180097b4195c3e39d15c17b90a3065 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4aa0e3bf1a6a1fb52787+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed8b822ae4c28e96cec667246760be4e649c042e3e77064cb2f0db3229ceb574 +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e3a902c7884b0ade6945d24d152eebc3b2864dd2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:624f1f0ccef8db37e11551d1fe3f93f40beade155ebfdf8904fb126640b73bd1 +size 363567 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dc223300576cea84ed24ea58fd7e92c47e6e4c85 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4b322d0da325d45466b4+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f49f8d13cc003c46c52e1d54b721c58611d1bf69784dd17e498dc76268e3d94 +size 10077184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..58797ad4dbee1adbfe0492d1b2eeebd2a2229b15 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:499b47176f6148cec2112f5f61595728e1e9a63b42efba4209f86d4632f5ba48 +size 2268363 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..36405006d1b6bd0d934a2f74a165230bf409279e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73707863f3e65ce1080a8d8687761f9085a02e472b380471314e782c02184a9b +size 3257344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.hlo_module.pb index febf93cd66f7c725be9f9482688c6e011707bbd4..ca18f79643c17be12440885b24ca13a83a84813e 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8abee4f2f59d915c77ace5d59ac6e2328bb6e874c9ca72f02e4135b40dd4d1be +oid sha256:0d0ffae83cd5de1473d83f2e7cc99778b46d17b5294fd6771cb15b924f63e789 size 47194 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.neff index 7a42fbd8eda905686b0b718df7a0dfacca95a1ed..4c260b91f5f5bf51b7516697e869ac35745ce3bb 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:403599404640a90fe81e336ced26f35356f7a935eeb39be5dcd3eac89f162301 +oid sha256:d8a6f31d06a967e0605add0a59526083df8e89d0eaed1f0652c7a5a23d2ce5f5 size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e010b9f41d7982dfb347d022d8aaaae379489254 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d8f4c6678f640cbe18f7831799fb15955430608efef13668a3d24d0ec07d841 +size 68980 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..04bbedd2221a3f29cb8f49fadbb9fadd2e119ce3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_638e58431e796ebec9da+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea55f9825ecbb64ff8f748e4357261eecdd17674cd5df19f4c646f0f8a5b4b1 +size 615424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.hlo_module.pb index 35ac41f37b039568a8cbc781e6597f2d7f7edfb4..294b3d8acd2a7f62dea63bf67b0bad796cf6a9a2 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3f8d49a49e948fd58a00763e8da581ebeecc0278eb7a1abf2f8c6830f3c9ca7 +oid sha256:d05dfb434ac3afe697289a9630e20cec8959061e6d0244d2ca7dd42c498ce80e size 80244 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.neff index 88589eec563a8d49eacce1576f1cc230e95bee74..2e3cf8886bbfc1f7af32842437304909b23fe417 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1a63707da04d8009f2ba56882b4a9f750461bbee2e5f9030072635ccc6bdda8 +oid sha256:1a17ff48da5d0a6299577ec66a90b74bba08ebf9a679661ab0f5438620793ef5 size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.hlo_module.pb index 0c0b7a334f678f0e5d4acb34c166032136032183..974349d60930a207cac52bd2f422f188bf0cd00b 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c216197c7aae03e2860a7f1fe84edd3b2eeaa4cb809c5696d03509e85e27e11a +oid sha256:5f9610dec7ae63287ed558149e7f54cb819cf7c7ce9876aedc1b0931e3e76ea4 size 80284 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.neff index 30ec4a3299c4d2058ef1286a6cb14f1e82e666e6..b881d03cecead8525b5968da4cf55315b86ece27 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d7d7a5adcabd527badfa6922bb4fc896574c492ae0748a6a90c3506fb4054e15 +oid sha256:b620936c28987328afddcf4640fe4578dda072cb8e1f8b3179f47ade9a923609 size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/wrapped_neff.hlo index 3f598d7d225a8c50e43b7d4807d6dfc4a6d3ce67..acdf9ec49499d24ba04c3e59fba8e3c951c5b6a9 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69f7141a82e7b87c1ea4d20192f29a818db94c94f3fb1ab7992a8a06c4627a8b +oid sha256:038fd23bda7aa7939f823eb0446c52833eefc4dedefdf94d22301cf30c4f7070 size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.hlo_module.pb index 4ed54e8be41eaa82bc68841d4d9d35bb9aef1f95..7eedc45142d6eb0847737001c9e13e57f2a8f442 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae55c7b4fafc583b6550436d75415825175812e90296d8f1b8004b78edc90e5b +oid sha256:361c93cf23b6c861204f434b6f4b680add2a341dcb521642f95ba99ad51408fe size 48045 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.neff index 525ce100376c03da90ffbfa9d70050ac549b4392..b880a5479f9900d38ab7b86043b7c904bae68545 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ca62ffd479599cb9695b4f92e01c089e753495b27a3aeec124ce61a63c03c13 +oid sha256:f54cfcb9cd9b7550a597e2281ab355a7dd7da3e349151b415422ba5c9fd3e7cd size 2356224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/wrapped_neff.hlo index e0d73f0599502498205e731413c86512d414ff65..670b72cefece981dc55b5e1878f6a2658bf5e376 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:625ba7e80dabdb5d80f0ab44841d427b688729643e9aeb1aef92beee6df0a44a +oid sha256:c120b74390719a05ce1d398d4507db96cc267dbd826310f08de08324b19611a5 size 2364014 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.neff index 27083ca1ad7dcc2bc3ee4bd43b51a5e4b7469d4c..0f47fd619be380d689c74c5d846b5702ab9390a4 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5139f50daeafb972e2597705cce6041c3bd33b0db71f1eaf3edb85633494d383 +oid sha256:e80dd4b636c1c07e1433626d50e1c4c27d1c17ee492c2f785c4736cb7250c630 size 29072384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2ecaf8f6ee3fa72fcd28de58dfe3d46d39af951d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c706c65ac8e2d64843a198094d42a612c0a6950942e4ac69355368e1ccc19e +size 233101 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4c57faec9465bf30fee9729e2875a9d02ef417c5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_81cb65755241019475b4+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b266b382dc7e7158631c934e443fab3230cff87b68bcc965dd8323ce4705b0c +size 29062144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0dcfd1d5d619cb25e89b5ed820db5b844dc97800 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f3ab3b249656fc52584328ebadfef2d0a05ff0205356c28da49e948d0f65de1 +size 776653 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..79eeffcad6804d9a16a988e4fdd01dc57cdf999f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eeccdbfdc088592bbb745c23a6266ab8a1d6d9d8220a1738c4f44c90c388530 +size 9247744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..deb6b1d9513dc71c7c9105d11d3260521aff7d21 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_83951e6d774a87a42cdb+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa74c4216443f194759e6c6b420e97d2a3e65a9fe019a7f094d0102496b385ab +size 9385688 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2b16638633a07adbcb728812e9d04777380bca5b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b244b19147fe8840d126af77740e3a76d63916a03d659d87963e65cdf757c5df +size 919265 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..43f15f1fa12a53d8b513030221ddf71a9f80ccbe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8ee20cf7bba38ca778e1+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7ec8633d0cc16a6779d66c6a4fbd7151d3fe202f52ed4d9af8d6fc6cfafaf4f +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.hlo_module.pb index 1d3aed0fc319379e30cbe1eb955421e657faff76..aba9225a0a83f76097df0d502ad837d8af71e22f 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c19d4dfe71692002336bb6a9c2d9ed457be8911def9f0f1bbe49a675ca0ed471 +oid sha256:fad8c0722b47a516e23cc7f9c16463e66f908abae39ce7ea2d1e98932ee04110 size 410718 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.neff index 20d687cdd93b552f0417341105bbcd447a52b8ee..ea2b70547ea65aa18aa9e3f376ba18a8d3e3b72a 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:226b432080db08ccc734633a57e15fc3e3dfcedd5ad3e731714ab86a6a6137e9 +oid sha256:8194fed710fc1236fc22df831a1f8e5c3fef7e747f69b093a4751902741aed28 size 2683904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.neff index b053bf2d6a5983514e1fe6630efc884d23b401c0..82415664f1a7a2426757c8fb8679f74576f5a34c 100644 Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8961d64d75806b635263866a7abc0e99b41beef7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5868766c097b546ed0a0fc35364ebea619aa954f1ee08d075c9cf4890b2bdbe +size 90431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..823381e27df7ff618ab073996d335bf343fc424c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7e78937ef2e29cf6d90ad789c71bcf9959bdf78f0251405262a4ec55880ec64 +size 213853184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.hlo_module.pb index 27c0739ebcc7a80babde24afe2d59e5379bee455..565301a112c43c776865041527cb372dcda49ea4 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4be8b3ed974bc7a4f0e22783bc285a31c6cc6e9829cceecce4d69696befab2d +oid sha256:dceeb7dd49382409a9411da04b550b2b7703c090618d8778959ff6624060325a size 80244 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff index 2d390ec66c1122942133d24f65a0bbd8fec4b6c3..4897ff3d473797b1583a2125e1ae47aa24224717 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7eb4995282101e4dd46d49d5e493d85839c705a15e62641cbda2007e271094f0 +oid sha256:2703a655a98e6c0b80f77c69e9c7f9067c21931cd629b7b70e27dac8270ebf3f size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7399c3270e3b74a5fd8037a0c2f5bceb4a3bf460 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4226c65b197e328c540f0b8c4e9cd586c349215b6bdf06aaa804c0307784a12e +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1884c7f52468112220f55709ead16b35edb97dce Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..aff59ef45c22e9ef602867aafa5ad08bd70c0747 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a90f3881d7fa68007129bb24fd0353462670fabc9dbbf2ff2b0978c0e15371e +size 466825 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..31b415060a54c8a0704e14d53cfd6d8a17f75efb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a176cb00890cc7100d7b+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c43b36a5d1d76ba2ad6d83431564254bc712f53975c2cc648d1042c3403630ee +size 30639104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.hlo_module.pb index 23101bb115f125b46c9932d8277178e574fc4727..88f71e18dc0baaf312d8879a86d2fb8746b3fb7d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a1db116f00c8bf43252e+793f1a96/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f85d52311c68339b61839757225d2069162fdeb4d155d5d741450693e5dfc555 +oid sha256:7e0ebb84466177f1eb5105390edd847097e8ae08efa07ed94c1ca4e00f5b2e1b size 55270 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.hlo_module.pb index d0694b8edd6139cf51574a56f00b02eed0d0767a..9e6759a6665238a9df3e444decead547074ff8f9 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39c3b9329104c2ec975f082b004a3db200509208a253bcbdff5b46f06fd5e32b +oid sha256:a8c2ea4cc7a8e6c80169d0e98c6b723f5850a5b21b0ebfe4b239f171d610487e size 80284 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff index 96f1bc8107443610cbb1efbcb4b0dd81d4afbe52..b03eb4fe35b7e8334873f82cf26d4496c706a0fa 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a44777d97cf993e5722eda218e17b13d555ff27c024c0952bcc89bc526f1f30 +oid sha256:1e7ee718e24df70b9e6ac412d43a459121ef084dda98a82934adf3f5aa681130 size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo index e3c665ffeb1a4ed0e24c88ad7ba501dd7718c58a..76b5ec2312bd6638371afd8e7a4d39c2f44b05ee 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8b75d5f08eb598dfcd73a92954b34be95e402d6d332aad12834fab7e5c8cd90 +oid sha256:4af63caec1c4d6a3e460d0d31056cddcc76ac1b45d4f985ab56fea5b95e06080 size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..24173d3f729ed47c1312fc4fae7f3807205adf21 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63c4345aa7426dadf56657c2100e10b5b5f0681eb6af5462b439a3f779cf19b8 +size 776653 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2558462e8556e260495e041825d8eeb4dc6ff4e7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:618238a62d08547235b590a1c74171b11f7e54df86fa66a100ac36a70e16121a +size 6994944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9bd657339879e6a008e3a4188988580a555215d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_aa317f69a340c77d5b24+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a8be3bad5d4226a5354995700d4891028bac71cf48535f5ffefdb29d7a39c52 +size 7132888 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a0cc3528823b528b0feed4e4a9bfabfcc83ae382 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac51d0cd862876974b388b745079679da8be654ded686b0c58fa58ad0c0af17 +size 2084464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9d62a82e18cba5191ca4b28ea9d6a2b4b61ddc81 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2fefc2ba188f96b31023f7e5feb1e3a6a561902207db53e500075fcd886c10 +size 3349504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..26d3e1a5214c6d94617656132c37cd098a74cada --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:044eb79fac79e9ca16d46f9796ea4e1c0316e56bb8ae6a09b0f857020a54dd90 +size 80474 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5bddc3ec2ba51eef13818de7a36405a3c5037316 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11cbe6890a2c9ee83ebf176997878d3d2f1beabe1edf6a38a9e1c8a90125b7f2 +size 461824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4089e756cd80166c33fbe7dbc443740a1fc1275d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea0c70ac6bfb5a52631a6fcf61c4d2e9b5a0aed55e0367447df9d7bac568db74 +size 469663 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.hlo_module.pb index bf16cd4c43f6accd2c35e3fa09577edab4c688f8..0399c7b4a765eb888224766628cfb3b32c36d528 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8991f51ed072f44c85249c1051d73c114d5d5b8bf2a198916b39429397ddf5d7 +oid sha256:c9e5991e31a754de89899879b4185ef1a22a989d63bbef123e1fb97946023894 size 416420 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.neff index 12622d22feeb12fb7ad8a1487207106cab16819c..914626f9df1bdceb6951b6ae289767146403f1b9 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dcb197f36efe1ca2df3a0b93920298b07263f6f2dceeced910f21377325f0333 +oid sha256:8df18746c8169cea1b8c0090d943ceb5be0130737853fa14652474546670cfc0 size 31642624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.hlo_module.pb index 0df696c47045b5974f8c6485317a85255d253e9b..0590bae928b0caf241b1cadc616f92b69133e599 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7ca1cbe7c0596bf195c92a6981d58f97a15f5ef962024a9c2af6284578036cd +oid sha256:4c31ca5c42669af732c52e7635faec4f9600d0cf781e381c4a2a7f791c10b3d4 size 445394 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.neff index 2be0468e24587248704c3ff5c0e47c47bde19ab8..76fbe9ef1dad7084d0b9dac9bb0ead1c6d921927 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ec00109ee082b942225b34bca892da58326723463e3dc5855748b4d2dbe95d6 +oid sha256:b31e34c8894efaa104e01140f1968514edb44ca9a21a3bccafa8e11c5ce5b78d size 32072704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.hlo_module.pb index 26285702feacb71902c06b69e4b8524c47e25347..60caa389725f3928a756eb5a2ed7db001291bafc 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc7169938d5d2448836a0ba2caee343313acd17800c1c8d5340847866063cca4 +oid sha256:fd29b9fcad2a945198bae1058538f54be003576d1675a8d9da0832151db4194e size 378951 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.neff index 0f0add3aa6cb312905816ff394763ef7507b6ac9..39d62eb70f79ed172709c72c3f486ac6e1a2ef41 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b69e7f3bc5be603f95247402fad05e67dd40e922a65e527b24b3a99ced7904d +oid sha256:ca0984fe4b17c72150b3f1bb21b08b7e480ba250f6e1084471aa1f5f7a7e5084 size 4404224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/wrapped_neff.hlo index 168fb5dd44574f5923be612d28db606389a2614a..94b8ac0fd4e5d2148102fe77330c87012b69f107 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0fff35422bbcefb32702e89c4911dd8a1702ee1ee9f6d3d8121ded87253fb327 +oid sha256:dc5f7c13ca6d344a2adce9d3a54e07eca1e4972c01f35ae91fc1d2f12b6d0380 size 4473980 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8cbd485adbb2a59f2d2a4454ebc3543e11fdb320 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c77f0850f018366f7fa0bdadeea0b97f6a93b1c1b52602f2a50f922fae410e +size 4802248 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..91c69487eba73a3527928ba06b9abe607f71afb4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ba7fb9c764ec187d8f06+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e21d738b1eb8e7c0c2fcc7a3d325d9a3c3a0c1a7344e3ee86d8aac48cb9f3eda +size 9739264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c5eb8ff60810a892b2f033f356d193f5ed1f7ca0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6659e3360ddc2e0969b571bc08f57834b57408d9f58fd45811685d3717c6f59 +size 136016 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2726ac5f5a4534c944085d78f974e82a9f8aeb4a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bcbb3942ae0e6e62f9ed+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826c8105de73c9093daf7aca18bbc80dadd99dd02869b02bc247df9258eae662 +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d2afb603b33ad8e0d6d083d989b5fba9b871843d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b6f5487b0f79b306fcf6c190c32def1baa5036198bb4566302fdf4f8c384064 +size 181367 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..af118f61748dfb32503d370b7ab6fb9a20af2003 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1ff08289224c1071721+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc2a777d5659fd3a7e55d0d4d824ad635c50011d15e4db485a5de4e3db909e16 +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..caa06aa827793434a0f68a44c6c0bac365843693 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707bb33d702d0f80d0e09777ca3bd20f98aaab37d64b82380b9a96ef85865985 +size 1227398 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a013f7bc2b6b648da235712a6521426d5650acec --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb54924db7b7b8788cfba583bb13c0a2be3b446d9807e79ee01fffb5ebcce83e +size 5213184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..420c9d406cd5c350c20eacd1f4ce927a1ffaf78f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d39a8f077f8ab2cd34e8caddfb9106d05010674b7209df60006acf5c296e79b0 +size 5358478 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..44d85a0c129de58eca961464230623f32fb055d4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c898f1bab83b894dd95995a7b66fc146d70fc69f167264e20c6c88f4d10e9dcc +size 1914485 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..efe2aa91c21a7b8532fe39d9a4bc03b76c7ece14 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429f7d4190e60c6ed8145b27795b1231af0748573b6edfec61e23cf348ff4443 +size 11787264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..da4d54b695cad4e818b35b0b405848e3e4eb8e70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c96b0f6664c50c8b6a6e93ad18dcdce2a3238a054770993ee2ab5436d07c3cd +size 12130962 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.hlo_module.pb index 4274f8d3ad5d38276f8554d881bf710268891729..5c6ba173442e3618c13d8186f2c450cfee045dab 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0dc15ea8901bd875e005ecf67f75b5b7b35df4c821b94824bc21eb54af7b7e3f +oid sha256:d0569f3a641913a8553a9e73ae662da99532bec69e2ec79a0208ecfb478ae786 size 57479 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.neff index db79e25ff2a5542762dbcca79fd11a7e3c198619..dfc7fcab66d6137fbf60a7a2c1a2c45a2a1e9b6a 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:effa541313d17c62aa28c21ce653ec67ca0a04aa0978a4e1416af5567611f2f9 +oid sha256:5b47be23f9ec52311d836ba8c81ee7f34cbdb88006a619bd4e3254ba942fc808 size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/wrapped_neff.hlo index e795438b49609cd37c051c931211f53b377e0f3a..54e5b830931423c3335df6ba0322794fa49224ee 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d12c8352f8de45811c1f+7e4da68b/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:421c475843337a184b39d8d66a267ce3fd318f7539a77a928384828c3b19b08e +oid sha256:95736247cb7341459857775f1f4f97d4adf0b6511ed49370a403fba0a1b4855a size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ceada16784908c90f2737084cd29fb240d785fce --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b7ce902472a6afa072bf875ed4ad3d98d0532276139ac15b41d6c6393881a5 +size 1908818 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8ea3c30765757f868b090a472830180075a8ce10 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd922818927a72335a8525543ec4e32ba21e11aea3f4185f5b2c500626f6b740 +size 7138304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b805b3785c3fb4498857be3296a52316fa7398ae --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e87175ac7451885066d3461f8fc86372568707274f6f49f83fcd5dfafbdf3227 +size 7481879 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.hlo_module.pb index f5e16fc6103beece39fbf4b59aa203c306ee84c5..0d1d037151ebd0c087831fe289a6d2f2ced53904 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:effe084f034138e044c85d0398c714c0015f70454b05881dfe533f59b1f0e322 +oid sha256:fef740edfa70ce225aae83ca14f9018e30db2e3ee491fecc8351129f60751703 size 373614 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.neff index dd9242b067ace627cd6155b99ab4dfb5a3552e0b..225ecfe90be363408c389a06b88c59ae6116ce51 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a202e69321860df94a6bdd68a8000efee6e97e0c5d28f392f38f777843ba242 +oid sha256:1c703e8bf1cc79ace196209ab7d95f7c0067b9a4423dab41cf813bc2e8509980 size 31693824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0aeab94286a0c69a0d434f8bd23cafa7b8f727ca --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce95de02650201b0eb91a5ec6ab62f6c994ccd6226cbc944f3a5de5817a36d07 +size 2268363 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c995689ff072de0071f1d22c6607bee8c0077fee --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7efb6811b9babd119431288df5f7d1029c3e81e74bd654ef0852c189b0e7f3 +size 3257344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..713260a27eb4281184298694c3f8043ce37effc5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98c859f2b2bf49033c62cf64e472ce4b88bbc05a41288d6d2a199630cb7b6fe9 +size 410816 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..963155041ab1f37eac69149b2a4975b0f4ab053d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b1b06faf93806532300c7bb66d5bf0c3a8a3efad605f7383d34770f9f74193f +size 2980864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..92e6a71e390ca0953d82945e0537ea95644e81b5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ed703e23be7449f8c891+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb83edba09a9f03ba89866a2e7a11b0e36caf6d84386bef37a6e02b525bafb0 +size 3050677 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.neff index 655d291c9cda12d3748f78887e386460ff6e6f3e..055ceaca43f9356639b71b7a38b71b6117e4cd62 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53c358bea95a8eef0a9dcdbf3320f6b695ff19816390d4754c37b7f34840bdbc +oid sha256:d9151cfe4e7b3ee29ae367d4070d739ad94be0ae7278665a51139aee67f3b264 size 1178624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c86afbbb88c43f3c86e3147a49aa42c0889dcf42 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980b451d3a78283aa39bbefffa86e54b4ece94569ff3112b70ca09a949e1925e +size 378951 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cf5018e719e73702de84c15709c68c153fbb871d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e2d2a6301330d1d924459663234a081b551c1a060c6193d608e8bbbb6d8734c +size 4404224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9125cae82398753d35f001721b13ebd1e9a46aa0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f2426656f5f2ceb6f048+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6252486dd345635ceeb7457adeb3bdafffb11d4c3ba73613f8db60ad541ed28 +size 4473980 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..83352273aa01aebed0f820f17430be11410a11cc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55355344cca3f76d552c7d882caa4593f48de4d2e0afd3fbe6a1eedec586e818 +size 437724 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..628d93cb7c9e859515e2311d9e17d8470e3180ec --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f2bc87b9df15b7784add+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e16cbe67e941659aa06a0b91a19f0403717b6a30f25b4876bca93249b40b42 +size 236370944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dd15899b44b34f45c6fc22c6f2c0884d8a2943a5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:998625bbddc15ef9e80fd9cdbbba5d30b61d1992bcbe676584c27cbe81be0ddc +size 339944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..12d3d59702181ef108e1658e871e45c81e7c5741 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b2f21896a15b36b8a3f35e5898819292b4003b538c614c4f502b4861ee1887 +size 8059904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.neff index eade2a1cc260bce68008bcd0602eeb3d69a4456a..a3eee59199b4b3663c1e8d1dfe1e80dbd01c9db4 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4c0b36f25c96a3f3116c488fa2e76da05e47dff7fcf7f2e5e0e3ec14e3f806a +oid sha256:d67e5567b210909beac94e83b6f1552a1216d5fbc5f284b8e1118e5829351ca4 size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..76223f0a3aa971c3b9bb2f575da7391971ef72d8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d15ec4bfbd7e8663d446f5ff9a3f10d50d287853df83c8c43dbe27b24e51da +size 146610 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..811f076f408df494dc68f214f4fbff9ece210842 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fdcbd41f965fdff93616+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be5874f6e2519322b51a2f03fbdfc376a3435272f7567c4715763660dfce98cc +size 338944