diff --git a/.gitattributes b/.gitattributes index 5ea2ef7e9e226c8e24378b9c5bc5d0eab4d1bcf5..42363f9702f08895ed9b1a33f0ecccd7bfc04452 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2809,3 +2809,54 @@ neuronxcc-2.18.121.0+9e31e41a/MODULE_e3e99dc2a67feb5066f9+84f3e719/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_306e43064416feb5de44+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_306e43064416feb5de44+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_3d29f316a0cedf338ff9+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/16eb66e6f195b2f2f3b4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/16eb66e6f195b2f2f3b4.json new file mode 100644 index 0000000000000000000000000000000000000000..8446ce90e2d9ad4f5ba75da7afe2ac084850f856 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/16eb66e6f195b2f2f3b4.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/88744e5aa5d753d1f538.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/88744e5aa5d753d1f538.json new file mode 100644 index 0000000000000000000000000000000000000000..76ae49d01bebe6516cd8afbf3a20ea25af47e7e0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/88744e5aa5d753d1f538.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8a3e1095e7ec1ddfb65b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8a3e1095e7ec1ddfb65b.json new file mode 100644 index 0000000000000000000000000000000000000000..0b5add12a90a30edaa437560d4ba85af17395f85 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/8a3e1095e7ec1ddfb65b.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/021f11fdefa8c3b516bd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/021f11fdefa8c3b516bd.json new file mode 100644 index 0000000000000000000000000000000000000000..46fcd0ba4590aa5c04e1aee016fb94538a7ac151 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/021f11fdefa8c3b516bd.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1913cf3041a0fe975f3c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1913cf3041a0fe975f3c.json new file mode 100644 index 0000000000000000000000000000000000000000..13908f95369b0a43ba1245d72030264e26eac9eb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1913cf3041a0fe975f3c.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 512, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 512, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/50411def20a2b703209e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/50411def20a2b703209e.json new file mode 100644 index 0000000000000000000000000000000000000000..f3554ea054eabc9b92261f56bb8563a7a2d3b033 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/50411def20a2b703209e.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 1, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 512, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 512, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 1, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7bca5f2b6f1034c1fa71.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7bca5f2b6f1034c1fa71.json new file mode 100644 index 0000000000000000000000000000000000000000..20123ccdd84691c6826582391f2c6629aa106085 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7bca5f2b6f1034c1fa71.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 1, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 1, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/34beaf9835b09fe53395.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/34beaf9835b09fe53395.json new file mode 100644 index 0000000000000000000000000000000000000000..208b41849b5dbc24cf502a9a430a656f10fed302 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/34beaf9835b09fe53395.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/d1ea2689244397f649b7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/d1ea2689244397f649b7.json new file mode 100644 index 0000000000000000000000000000000000000000..8ef036b0f08d405f2fa040a87f17b1ec8a908a15 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/d1ea2689244397f649b7.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/321051129e499a9d100e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/321051129e499a9d100e.json new file mode 100644 index 0000000000000000000000000000000000000000..b2d619ec41f2d494df34a9fc6022c46d8f4b865e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/321051129e499a9d100e.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 32, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/59e56e036f276aac27ec.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/59e56e036f276aac27ec.json new file mode 100644 index 0000000000000000000000000000000000000000..a00e9f9fd54626706dccca740d785856b32fa366 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/59e56e036f276aac27ec.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/aed3ac4481c88779a26c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/aed3ac4481c88779a26c.json new file mode 100644 index 0000000000000000000000000000000000000000..0b2822e3d9c6b74b411de13080dff12b22502d96 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/aed3ac4481c88779a26c.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 48, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 48, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/bcd80b4d12e05bc045ce.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/bcd80b4d12e05bc045ce.json new file mode 100644 index 0000000000000000000000000000000000000000..cfa0dea1208bad41e59a4ea0864e9c2831e711cb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/bcd80b4d12e05bc045ce.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 64, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/c28ab0c7d33e28708b3c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/c28ab0c7d33e28708b3c.json new file mode 100644 index 0000000000000000000000000000000000000000..807e19641aed624b05b86dc3a7a1bc6f89060b95 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/c28ab0c7d33e28708b3c.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/f0e18f873ce42aaa0b9d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/f0e18f873ce42aaa0b9d.json new file mode 100644 index 0000000000000000000000000000000000000000..1a155680f9bd7719a9694e3bd562aaad5142a0ba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/f0e18f873ce42aaa0b9d.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/fb1938af2d9e7e083207.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/fb1938af2d9e7e083207.json new file mode 100644 index 0000000000000000000000000000000000000000..62d3175ebbb29bf91ccd2471098e079b28f637f0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/fb1938af2d9e7e083207.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 16, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/3da196f521260e769ca6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/3da196f521260e769ca6.json new file mode 100644 index 0000000000000000000000000000000000000000..f38e7453723601092cd245634eaa887650a29105 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/3da196f521260e769ca6.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f4d3009ec7e739e5ca73.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f4d3009ec7e739e5ca73.json new file mode 100644 index 0000000000000000000000000000000000000000..2df82512c87ac793b3df5abbaa5f6017c3c0b5e5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f4d3009ec7e739e5ca73.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9f509ae99c9e1837405aadbe9b3bb3fbfe01309f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b6a6b4a1369a831979e2c28c208afbf421b340891cba487bb8c6d49511d31a +size 1915034 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6fb119ef42eaad1fd631fa25271a5f189f716479 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aaf3fd8a2d76bb58beb1ffcd2c98841e8eb0164b432ad31f3066ac6494cd1dc +size 11787264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..36da3f55f68117cacf985f1827a863d4166610b9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_056a0c30057367f9353e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a9d6cf5b5ca0842341498a231fd41087cc6df62fe6d04e8b924611073b418f5 +size 12130962 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9bed5099dc9483150fe2424d67ca7efe04b4b819 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4e39ff7264047e564f306270e3be1e754a5c0edc58e50acbff1bb553452b53 +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c2f3e72cff96dc89ddc8c057e2e9bf93a9b58a97 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c6db91e68c2de260b3d051da59a240cf5afc202104e4fcf45f4ba09cc6bd59 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..72961b4cf5274b100fb3ba04cf7b196bd0afff31 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23333e0184c701665b77443dff9e0e7826cde3df7b3a8610dfd7a6cb3bf1916 +size 79913 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ac22dc315fe88dbd543348193a35c19a0e083e2d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932fc04b510efe964eaa7ba1983cedcd49d5bc2fb595b9b914ab4a3f69185d6c +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9d5b7992e6b8646cdd50a3e3ffcb5a20de0e60fc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb326a41ddb5d6d9473b240905d4d1e8cb2900bada8756712c4a03aea93ee03 +size 244351 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c4dccbe1d2bb154396c4f2e11e90a4ef6f5623b3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5928e1e3d71165915a25d2d9778aedfc58cd57ef9ba8621279558be58e16fb7 +size 136713 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..92b8dc4c0d9c639a32b308212f85380ea42012bd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b15ec188343dcedabc48667e269d3cafe56b8fcb199b8eed4cabf83ab5851d6 +size 2223104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff index b433c15a375080dd7abe572c17c684f2c9838540..19e4082a8bcf35c1f026f775a8eedbb4cafc8dc6 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba3260531785002864c6b2fc10ff07a483362feec8f3f92f14b2922bc54d58ab +oid sha256:8ed82a5211308e005fecaac2b616b3b285f234a482ccd5ac1a1be4e47c53f2fc size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff index 393010b108ce238289767a0752fe1e8caf6d4464..00c32ec2418806471f0d821cabc803ebd25950b9 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9f764447f7adb9d89140ff8b6be640016cbc8a0d1a29789c1017bc25559107e +oid sha256:0b6bf86580e24a1eb275a27ab3a061cc5eb70cbaee41a624f938f0c23833e3fd size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7bd4ac6a8c0d4323ce964930d6ae98a535948761 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82480b63e69fde09f4e77537a22aed3dea465afb13ef04e814662744ab15993b +size 76491 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fb67dee384d5de47983b1ed4eb2a232f6f571341 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd9b7366fe903881c52a003d874f50af91d5c94c4323aab4803b0718ad2ce874 +size 287744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff index 5abb8fb060b6c3bac527a4a487dc67a4884bbae6..3b036f58375100f336c71055e32b35dd32b80767 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:239edbb96e8f3e0e0c753a35097b352ef1b3b4e395e30c174231428c7b901399 +oid sha256:67a4245ec970153c19f88ea1e6c239644af48f531cd128eaf43d20a49b0a8a44 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..720ae028658683d1c9bd62350db5a2304efaee47 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e52820e19bbf945d45b897bc3900f508886cdb8efdff25515450a7f8d02f8c +size 777821 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7fc9f82ead868f8cf68115f31ac779cbefb5213e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f54a4af65ac42af3f85054285f1db2841f87c479752d76d7a20dac051e03a6 +size 5889024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..25551823fbb632b0e87b424574bcb16462e47e62 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54ea37c17f95777faafab50142f5108b134aa732136033c29ffb86aec876bc8e +size 6026968 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..03ef86da968ef3310bc88c3519592ffa217973a2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1461a2ed509ab92da33988fdc193eb987c507eff9dd6758b21b339856f2a3dd +size 72141 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5b568a0d71075087a322708cf884498087a8b9cb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2268158d1e455f7dcca19bf11dd211da9bd0e4673ae5493cd2b443817272fd7e +size 308224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4c9017e7e23d13737f79dc5ddd07c423862a5433 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:149f133518214d64a0e2e50e57c0893d9da927d6b187b1c21ed9a629422cbaca +size 136016 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..72caa39353c809fabb45731a770a40c2bf367780 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e5362dd4933cea9c4726b7655a33993e0c9a9c9ebcd093edd5b4b7e9b1bae5 +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3caf63fc3c751b2b4e928e50f9ffa5fb140ca59a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcdea3312b01ffb145fc8f515f2936e088920c874284213e9451eb86c6d8ee8 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..409af56d1a157e8c03946a1d592fe5e1e1e57efe Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9d4d89fb5927614c39f6f2fe56792b1da3ed8cd3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32bb5b4e60e6137e47352e1f6acf2c0e514d97969ad07538833c86b54af2730f +size 2086176 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0b3907e4e1270ee4e01d6c5efb9995086f5af45c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b3a65eb2452f301e932f38f4d28bd0185d7994e1f1640575cecb182b8b28d4 +size 3349504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..94604d78d07ab4fb4cb1db1179e1e911e59d3159 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4037e6b92e1e02bcdb1e2252e395b8498e4d7a4948994163e19a803a6c60af78 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c57dd1d12acd369e05dfa89c900a301506ff209e Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..afdb8449d0ede4de36484484b3074b148b111ad2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d0b94c552f60c526deb2934135cef63dcd3a4630d400ef2a4fc75f68de21762 +size 777805 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..980ef53ccf32c10cbc3d310902ec262eca7502e3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049b3fd347bf9f3e7ee596ca51646a5d8340c4003af50cd550f6897a91680ee0 +size 5192704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7352ca14d4f780248a261490f20b46cd3cd6cef6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bddfb68ce8bb7a3c751b6e5a4c756dbaf8ce8e38c6733beaaa2131575b2f9bf6 +size 5330648 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..521eafb619f06e02d9a55b73adb8a5b61dc9ed11 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b40feb88c18f5c98cd5a8fa3c300fdf6d2907cb64d7e032688fcc3caee5f6e +size 48142 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e00ad8b63244ad1a15f515d976c4ddfe50efad32 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80943ed76c6b3a0c8a4312a8cd9edd1033f96938321c8f823896283f6c7b3aba +size 2356224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..08ab01680ad93af2843fd4984dd569303d499073 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffb0553219112833cbad182f747ba567a3cd540d871208c487906c0fc6959a3 +size 2364046 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..caaae7d4c6b57d6b79887fa945c42561b3c0def9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca3db0257f40d5f315148f4daf5724c2d1e9a135a7734e104d2c422fed5a1c52 +size 1916098 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fb010b6508e3ded988b82fe023788670d94d6c5f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5be62c133067884d30f68a41cee3b2d2c0e865c354de3366f700025a0e462e8b +size 10343424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..29613dd7412a495f28eb3cce38cbaf2b3055fdb0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4173cdd894844064ecb73b5771a30bcfb44fb98fe2da29ea71534bdfbc76a817 +size 10687122 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ebfcf936f4fbf0a94f8+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6ebfcf936f4fbf0a94f8+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ebfcf936f4fbf0a94f8+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ebfcf936f4fbf0a94f8+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6ebfcf936f4fbf0a94f8+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fadcda2a88a9949da7dca016ef3f9829c3938ea5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ebfcf936f4fbf0a94f8+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a306ba792307a9b08e9384ca686cabb14b9720a48d7c04fdfb9d3649971c5f +size 1920487 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a62015aa85d3281196f2652234e832b9770dcb3e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b73ad409298f8c773b040b39a885b8fc204aa6cd6031f6ef7a7cb516311dbb51 +size 339944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..84ee624666450084274091a9fe317a4c14f36046 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_75d614ebd3722304448a+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:370f255ba3fa3a0dafd9068feff897035020b3a7b02ff05d33dd49a1f530bff5 +size 7742464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ab4bb349521213e4549f00c8a6c9428e4b9f639b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bfdc653294fc5b5230a64e865c0867574faebbdf3cea7d5eb0645df40148b7e +size 1910431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e483ae77d84da91ff839232b0bf343d5f15de72c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9091e88a15bbc42044e459b73ff81dc53387f5dc3a11e1e2d2a653df899da719 +size 7138304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..097fca3d4a2ce58f7616790ea685e4602bf22c1f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee733fe51ed123e8a68ef16c967ba7ca7f7903ed5f64be725c97768965b4d32d +size 7481879 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8650c19b613bb2a1a94f60014e05f11f6c266f7c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7e85c7d1c8b3724c328336229e86eefd2929d69c03c76392f16f071e2a7fd0 +size 79391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..70acbbcb3fa1d0284c9cb3684bfd4d45145fd825 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7cdbf4ecf517cf5678495f975eb43a3a3bd95845b6a6012a237b3b7480a184 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..660e7c55d426e5bb9c5f36d7ed34802f4e6410ec --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c3a96e94e40e65a65aee5dae462c8ac82f267705e4a1129a508a6fba700e654 +size 78246 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..89330bb2963a9af707d290f34f87993a3df72703 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8158ef6e31774adb8b6d2d35981c6ff7d7a3627a0a531c38699e7034e36c1819 +size 2376704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8045fdb7fb463c815a93b721fedb60a7dacbf02d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70fe8c7db631d867109dc0371f660b1552837d1c9adfc84b021f28da2c5e4fe9 +size 2384631 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0ce20824f3aaeb974c62e09f3ca110aa5e87027c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d1716333edff8710cdfbba294d37e199b0b6393f9f28abde37017b11b3a33f +size 777805 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cf86dafa154864c4bb9899690503fbdbdfcd0b7b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc9fc885c4e11d4a1f3a4f0cdab770c9c2014af157f00cf973e284063524fd8e +size 4619264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0039ac7267d2eff37481129f7174bae3ded0dcf3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7746c3764620653509fdefed28af0028784c0fa183052d5d8609e4556db39c3f +size 4757208 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2288a18f1312139defef51c7c8fc56678c718fbf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67e0a9bac3f4a45c41f063cb7c340ed9f6b1815bce4662d6c96e463ad74509d8 +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1216b9a83dee92ddc72631512710ea4744ce39dc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9969ddb8518badbbf4ea734d136f2722ecdd6bcef743cfd5d7b8840b5c7bba9 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d37fc8c52103e7a032fdc154879b488d0c1405e3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4fc15a4c473ec9658ace42b6645014b0e29bd1569a5931d1e7b95123b68424a +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c7e9de3bb13a2da01088be7c7d40de681d0d00bb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39cc1284e36060e464a4aed507eaa1369628acc20a2d407ece78280276a232a9 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f21d5b6d40bbc519ebe2552a5bbd984c1ec1d136 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:617ffa8f001e32ee0196c13b0f23ae06dbdb8de720d647aaba4753171ab16b84 +size 46742 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..91dcfe213d6455484bbeddc1caafee21058a7169 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c9f37eb322306682ac2b6f70982c41deb46ba2a8871ed59b43601e91593b65 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b4c249e05dcc3fb89fe6324e725f92f199ebe2c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9113dc0e9762cb9331ceecc475920436e309643b52d11693b1647afebeaeb496 +size 73017 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3caf03861bb9bbd0dc97f54d6583bf7e147e8252 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4035f02301558ea04c11606f61cf290049d7ec3387f5c0438859358bf6cdeaeb +size 308224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5c6199b4dfd37c87f104e670a3713c9c48f3d5a0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c89c8035ef93977f62e08cad284fadebaab1ab6ca25157cb7858e016c75b3e2f +size 316031 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..01dcc4577fba2cfb5ac8e14cc790ae56703d9f68 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c733cb6b8ae8f7dedd32e00a03b9d77bd89dc2f455c2c991ab8e5a736a635885 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..904abca1eda03d66d1add321cabb8d682a25bc2d Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..29b3ffeab34ebbd8a09371d0a25515109e8a6fa3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfe9de7bb79cb736de46bcf79fc17471f401cf06973189f6c75f2059720e4309 +size 136016 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fc84f338cb6bf4ee870238321acea63d300f6e9b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a05cbb6e9bfee46f6408+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c727e7295cb59ab1f1ade7d2cb55e3ca4551a7c986f249dabe88ce7e72045c +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..10c5f22c63a16404c96e6f8ef59352df186c9633 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:122de2e112de7b431ac03c1f8b535a0783a3d59e8b3dd8c8c8c580eb7b5b2f5c +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cfb8a0b9ae40c2c86cc9c3be66431258819a6ff6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a446aa72fb4ce75a9fdf+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88e3b497012da2b729981e14a186e9bd2961125c9fb101cba3c2b77e0fd72030 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0035c03b96cdfe4522437d7b3cfe18a04e45886f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20fe486ff812e2f05404f114bcb45e7743d0d06236eeaedf7e1f4b3dc5346b57 +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0aeb64d937e74def5ebe62e0aa6e13c3f404b944 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4eceb90c2d382e732a915233f419b5edd7a75e29f684b8f26655116bf71dce6 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..101c97e3230e614f804592b5304a5c26c5499084 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:072ff5d810620e7a9a84b16fef3223b86a92cf8c634d2328aff6230b1c7e50fa +size 7109 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d21392b764c5e5b7c2186216e626448bc7b30581 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cd91c4cf79dc2cf068748116e76eae78c8317e9b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2691f2383b7b223391dbd7984075c95b969a75d2ce0911a54c10e074ca78bea9 +size 2270072 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1ba5954e9bc09680c36ef73d248ae55ca50afdef --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e2f6f39c3be2a0667873d31d9107c396b415d965502c408ee31e241be43f19 +size 3257344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dcc3494228a20f8b7998e468b7418084862a8006 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:411cfaac8a3e7ccf827f19fedf00727036fc15dc8d472b7ecb24236b727778d8 +size 778205 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bee518d412cd0aaf92481011aa04d6240ec94b78 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b861c844e563e6b80a0ec64bd104ec3672f392baaa0742e89aabcae098358d +size 9247744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a2660130b51b2721e433496a380cfcd9a3dd5f9f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e307ef947b34fca66549c91f96b57d569289be2256903eff3b1a220bbfb6ac7 +size 9385688 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7af22d06dc69bf6d1975c2674500475899c169e0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a607185ac8e1c9eb201959b1cd6e15e43d8678894f54a331dd268de52495bf +size 772276 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4c14640263604bee4cff58183cd1f735bb78c8c4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dab6e97937d459068207d265be26660df17d9cb2a803327b91b888dd557cd42 +size 2171904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..feb97e210b76c899d73c357b5b74f1acae9a08f8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5edf94ccc57de4922c5718137c281a6404eac0bef6ca5e9a78ad63f130fba276 +size 2310233 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff index faf5bb33780a7839180695dbf77eadfc2dcff7a0..ac8f92e69e46ce2c24354d0fb1c5a17f7e375b15 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4235889251db818829e9e820d76708d1a82e997e608b3e0ef124400e639979cf +oid sha256:03dda9ca3d3466e0d8b4b92f6ac8e41df9732a4783392dea51d095dcbe121407 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3392d660be8743f611b20aa6a8c1e962695e7645 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfcc7bb7f8f112f0aa2e76043995e160b4448236c38cf01bb47e68d3c3319c45 +size 778205 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9825252df34bc903849d71694065f8006539e1c9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0bf2441989f2c8b4da70c2a48a8af2ad2709440121efbc94e5b89fc03fd8b8 +size 6994944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..425141e038208a72e3b5dcb6e81666caf30992b8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c5c5cbb467631d860a15+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e5517528758cd9a73d1fd4b2186fc45e5639d3e2ab00152fc7daaf2302eccda +size 7132888 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..151d018cb385a2c66474a7b08d9cf11dd024070a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4cd3cb136cc0bf987f16dfb7409ca294275564fabe191c1e05a0ad061a6cd0 +size 1916098 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..713be4e9b7e6ee8ccb83711047927ea340d2a461 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05d1e4367d1bc117624e8ac2f95e65fc43b5e6a6202abf68217b2d0780f2945 +size 11787264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1b9e96f86c4d466e54c3b45c62b4749dc55da842 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf69fcb5750632201b661935888a311dce0c694b052fe263e4b3c04224de9e74 +size 12130962 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..babab798d113e425e9c7854bd8edcf310dd77534 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200d251291134ed265a4f3699f200becd08012f14f43322038dce8fcf5698090 +size 851463 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1834105fccd6bbfbc855c65bc5ce5fcf9f302450 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f422e5d1c382e6ddcbb9643552740134d5d5903e397052425b220d25ce4703f0 +size 32277504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..382db3cec1b88161d47155fd5611802c02f1582d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82473f655e940f34378b426f6260339b353288bdbdd0a17c905b5b2934c56ed4 +size 2270072 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..831eef97b23c26aac1e851a92aef8e6d677e1a8e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032e02ccf18fb38a221032d169b23d8199b824b5d9fdc4d310c95fac02f06993 +size 3257344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff index 1ccf67cee3e52eae5250f1483b5f46a16ee5b26a..6866bfae6d1e54fd95823bcf923e0cae34dc8c51 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6fa44cd5fe7409ad484663acf9f8ddc760208de881c0135f2cb8fa1ff1a295be +oid sha256:4a0273eb7a77e8824a8b886166b9e9b154668894dee0f497492232975e640763 size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8a554e1b77b9a7df908fa74bdc87045aa82e3fae --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff9d6f42eae55702201bb0e47eb34ec59feb160a18d9cd679c5a34266c5256d +size 339944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a7a70ec57b7a12609826a08367ad551f796f0dfb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c411f72cfea09d1fd2e949afebcbd79d1b4bdcd1fb49a3cdc073c21b6e3972c +size 8059904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5657789062feb0f4c892180983d927b8de1a5b56 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a94db50c586dda0c6e6ff5b77a9b3fd73a337d193a8b37c5614e790db7f638f +size 375876 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..df3ee63e6b4a3c60f06ead2e8e1c5225ad083cc6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a519bfd64a19778e7e07e3bdb6db884a2c9e5d9012d2755db17deb84e86ba91 +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..eeaa154db8f569971c3bd834d9cc8704eba8d0a8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b579ac82cd273c18439532dc38822c29a405a08938f9b47b3b56e6a1216b757d +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..80cf0e7d916f54d6b81059eedf32e3b33660f9e1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce44c3da60a141c45bf52aba45ee0284557f2a2333feb82bb153bcde724d4e5 +size 339944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f229c4e23aff36d346b3e804897b3ee0cac445c7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7835b966c4346d1c0d379f864e4868b0b770f67a8c74f18d00a28d7c5f8c5d41 +size 7742464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6b510d4be87ec49bf3c76155e191c68ae173e7c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3408a18e64c05a03ec39c2b12533fbbe6bc69c67712e90697aa89abd20b18eee +size 778205 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ec3d719d1dc378ad3212155e623872c68ca2770f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ea9ca3fb605e724ac71ec24231b1601a808c0e93f6c2d45df4abe5f53369be +size 8285184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..98290131dfbd77806f18f9c07872124ae42d81b9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23c996060cec1f449494dd6ae3ac9d7e4f057de890b0ad1a640677e80e35f0a2 +size 8423128 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3fae42df210c7779ce2951fec04ef1fb09d385e5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1743a87c67b12f9b0f4f0652b180a421a911d35955e6184b46575ed5f2f56a60 +size 2268990 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..93fb67f998e55209b824321b8282d81af08a355d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f8a0bbe4c09b370fbea0+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7708f1f9eefeb672997914aeb5a17e57356055e868fc87f5d72c5e2fd8f22f1b +size 3257344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff index 5f03e3d6dc6fa7a166c884b0747a34aa9b40adad..c16d74d6b331f433f8ca43d79e3a115cc39b5eb8 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f72c56aa0bc4fc5b0c2fba0a4e4adbd7c0df0a03dfbbb7de7556e702910ad4da +oid sha256:b304168e9f32c2eb385c1e58472edb7e004bb6633814c997d09fd123b0aafac7 size 134144