diff --git "a/train.log" "b/train.log" new file mode 100644--- /dev/null +++ "b/train.log" @@ -0,0 +1,5995 @@ +[2025-05-10 07:02:22,858] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-05-10 07:02:22,961] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-05-10 07:02:22,990] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-05-10 07:02:22,994] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-05-10 07:02:23,057] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-05-10 07:02:23,122] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-05-10 07:02:23,123] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-05-10 07:03:03,382] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-05-10 07:03:03,385] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-05-10 07:03:03,391] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-05-10 07:03:03,393] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-05-10 07:03:03,397] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-05-10 07:03:03,401] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-05-10 07:03:03,401] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-05-10 07:03:03,404] [INFO] [comm.py:652:init_distributed] cdb=None +reward_funcs: [, ] +Loaded 178 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_179_vqarad_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_1691_mapqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_2152_vsr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_8495_hateful_memes_cauldron_llava_format.json +reward_funcs: [, ] +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_21076_raven_cauldron.json +Loaded 178 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_179_vqarad_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_1691_mapqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_2152_vsr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_8495_hateful_memes_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_21076_raven_cauldron.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_30728_clevr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_30728_clevr_cauldron_llava_format.json +reward_funcs: [, ] +reward_funcs: [, ] +Loaded 178 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_179_vqarad_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_1691_mapqa_mathv360k.json +reward_funcs: [, ] +Loaded 178 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_179_vqarad_cauldron_llava_format.json +reward_funcs: [, ] +reward_funcs: [, ] +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_1691_mapqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_2152_vsr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_2152_vsr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_8495_hateful_memes_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_8495_hateful_memes_cauldron_llava_format.json +Loaded 178 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_179_vqarad_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_21076_raven_cauldron.json +Loaded 178 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_179_vqarad_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_1691_mapqa_mathv360k.json +Loaded 178 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_179_vqarad_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_21076_raven_cauldron.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_1691_mapqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_2152_vsr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_1691_mapqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_2152_vsr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_2152_vsr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_8495_hateful_memes_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_8495_hateful_memes_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_8495_hateful_memes_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_30728_clevr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_30728_clevr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_21076_raven_cauldron.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_21076_raven_cauldron.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_21076_raven_cauldron.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_30728_clevr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_30728_clevr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_30728_clevr_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_99995_figureqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_99995_figureqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_99995_figureqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_99995_figureqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_99995_figureqa_cauldron_llava_format.json +Loaded 168 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_168_geos_mathv360k.jsonLoaded 168 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_168_geos_mathv360k.json + +Loaded 168 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_168_geos_mathv360k.json +Loaded 168 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_168_geos_mathv360k.json +Loaded 168 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_168_geos_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1275_intergps_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1275_intergps_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1275_intergps_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1275_intergps_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1275_intergps_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_99995_figureqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/yorn_99995_figureqa_cauldron_llava_format.json +Loaded 168 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_168_geos_mathv360k.json +Loaded 168 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_168_geos_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1908_tabmwp_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1908_tabmwp_mathv360k.jsonLoaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1908_tabmwp_mathv360k.json + +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1908_tabmwp_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1908_tabmwp_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1275_intergps_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1275_intergps_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1908_tabmwp_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_1908_tabmwp_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2091_geo3k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2091_geo3k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2091_geo3k.jsonLoaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2091_geo3k.json + +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2091_geo3k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2091_geo3k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2091_geo3k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2429_ai2d_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2429_ai2d_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2429_ai2d_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2429_ai2d_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2429_ai2d_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2429_ai2d_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_2429_ai2d_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_3536_geometry3k_mathv360k.jsonLoaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_3536_geometry3k_mathv360k.json + +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_3536_geometry3k_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_3536_geometry3k_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_3536_geometry3k_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_3536_geometry3k_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_3536_geometry3k_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4943_iconqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4943_iconqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4943_iconqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4943_iconqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4943_iconqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4943_iconqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4943_iconqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4971_scienceqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4971_scienceqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4971_scienceqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4971_scienceqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4971_scienceqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4971_scienceqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_4971_scienceqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5000_unigeo_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5000_unigeo_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5000_unigeo_mathv360k.jsonLoaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5000_unigeo_mathv360k.json + +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5000_unigeo_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5000_unigeo_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5000_unigeo_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5898_figureqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5898_figureqa_mathv360k.jsonLoaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5898_figureqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5898_figureqa_mathv360k.json + +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5898_figureqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5898_figureqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_5898_figureqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_6425_geoqa+_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_6425_geoqa+_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_6425_geoqa+_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_6425_geoqa+_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_6425_geoqa+_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_6425_geoqa+_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_6425_geoqa+_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_12723_pmc-vqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_12723_pmc-vqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_12723_pmc-vqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_12723_pmc-vqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_12723_pmc-vqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_12723_pmc-vqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_12723_pmc-vqa_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_14361_visual7w_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_14361_visual7w_cauldron_llava_format.jsonLoaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_14361_visual7w_cauldron_llava_format.json + +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_14361_visual7w_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_14361_visual7w_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_14361_visual7w_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_14361_visual7w_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_15713_iconqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_15713_iconqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_15713_iconqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_15713_iconqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_15713_iconqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_15713_iconqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_15713_iconqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_scienceqa_nona_context.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_scienceqa_nona_context.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_scienceqa_nona_context.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_scienceqa_nona_context.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_scienceqa_nona_context.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_scienceqa_nona_context.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_scienceqa_nona_context.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_tqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_tqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_tqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_tqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_tqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_tqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_18645_tqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_66488_mavis_math_rule_geo.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_66488_mavis_math_rule_geo.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_66488_mavis_math_rule_geo.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_66488_mavis_math_rule_geo.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_66488_mavis_math_rule_geo.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_66488_mavis_math_rule_geo.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_66488_mavis_math_rule_geo.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_67823_geo170k_qa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_67823_geo170k_qa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_67823_geo170k_qa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_67823_geo170k_qa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_67823_geo170k_qa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_67823_geo170k_qa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/mcq_67823_geo170k_qa.json +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_18260_chartqa_cauldron_llava_format.json +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_18260_chartqa_cauldron_llava_format.json +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_18260_chartqa_cauldron_llava_format.json +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_18260_chartqa_cauldron_llava_format.json +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_18260_chartqa_cauldron_llava_format.jsonLoaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_18260_chartqa_cauldron_llava_format.json + +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_18260_chartqa_cauldron_llava_format.json +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_6694_multihiertt_cauldron.json +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_6694_multihiertt_cauldron.json +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_6694_multihiertt_cauldron.json +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_6694_multihiertt_cauldron.json +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_6694_multihiertt_cauldron.jsonLoaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_6694_multihiertt_cauldron.json + +Loaded 2000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/chart_6694_multihiertt_cauldron.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/5280_clevr-math_mathv360k.jsonLoaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/5280_clevr-math_mathv360k.json + +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/5280_clevr-math_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/5280_clevr-math_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/5280_clevr-math_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/5280_clevr-math_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/5280_clevr-math_mathv360k.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/98675_tallyqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/98675_tallyqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/98675_tallyqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/98675_tallyqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/98675_tallyqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/98675_tallyqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_2113_infographic_vqa_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_2113_infographic_vqa_llava_format.jsonLoaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_2113_infographic_vqa_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_2113_infographic_vqa_llava_format.json + +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_2113_infographic_vqa_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_2113_infographic_vqa_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/98675_tallyqa_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_2113_infographic_vqa_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_4394_infographic_vqa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_4394_infographic_vqa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_4394_infographic_vqa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_4394_infographic_vqa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_4394_infographic_vqa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_4394_infographic_vqa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_4394_infographic_vqa.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_8171_docvqa_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_8171_docvqa_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_8171_docvqa_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_8171_docvqa_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_8171_docvqa_llava_format.jsonLoaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_8171_docvqa_llava_format.json + +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_8171_docvqa_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_33010_robut_wtq_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_33010_robut_wtq_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_33010_robut_wtq_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_33010_robut_wtq_cauldron_llava_format.jsonLoaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_33010_robut_wtq_cauldron_llava_format.json + +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_33010_robut_wtq_cauldron_llava_format.json +Loaded 1000 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/doc_33010_robut_wtq_cauldron_llava_format.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/rec_jsons_processed/refcocog_train.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/rec_jsons_processed/refcocog_train.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/rec_jsons_processed/refcocog_train.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/rec_jsons_processed/refcocog_train.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/rec_jsons_processed/refcocog_train.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/rec_jsons_processed/refcocog_train.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_19990_allava_instruct_vflan4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_19990_allava_instruct_vflan4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_19990_allava_instruct_vflan4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_19990_allava_instruct_vflan4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/rec_jsons_processed/refcocog_train.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_19990_allava_instruct_vflan4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_19990_allava_instruct_vflan4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_19990_allava_instruct_vflan4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_4864_ai2d_gpt4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_4864_ai2d_gpt4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_4864_ai2d_gpt4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_4864_ai2d_gpt4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_4864_ai2d_gpt4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_4864_ai2d_gpt4v.json +Loaded 2500 samples from /mnt/bn/magellan-product-audit/xushilin/VLM-R1/data/grpo_processed/caption_4864_ai2d_gpt4v.json +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/5 [00:00 +n124-112-200:54095:54095 [0] NCCL INFO NET/Plugin: No plugin found (gcp-fastrak) +n124-112-200:54095:54095 [0] NCCL INFO NET/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54095:54095 [0] NCCL INFO NET/Plugin: Failed to find ncclNetPlugin_v8 symbol. +n124-112-200:54095:54095 [0] NCCL INFO NET/Plugin: Loaded net plugin FasTrak (v7) +Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`. +Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`. +Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`. +n124-112-200:54095:54095 [0] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54095:54095 [0] NCCL INFO Using network FasTrak +Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`. +n124-112-200:54095:54095 [0] NCCL INFO DMA-BUF is available on GPU device 0 +n124-112-200:54095:54095 [0] NCCL INFO ncclCommInitRank comm 0xc64e8530 rank 1 nranks 2 cudaDev 0 nvmlDev 1 busId 5000 commId 0xd39f30e972357747 - Init START +n124-112-200:54095:54095 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:54095 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:54095 [0] NCCL INFO NCCL_NET_GDR_LEVEL set by environment to PIX +n124-112-200:54095:54095 [0] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54095:54095 [0] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - GPU/0-4000 (0) +n124-112-200:54095:54095 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - GPU/0-5000 (1) +n124-112-200:54095:54095 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54095:54095 [0] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54095:54095 [0] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54095:54095 [0] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54095:54095 [0] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54095:54095 [0] NCCL INFO ========================================== +n124-112-200:54095:54095 [0] NCCL INFO GPU/4000 :GPU/0-4000 (0/5000.0/LOC) GPU/0-5000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54095:54095 [0] NCCL INFO GPU/5000 :GPU/0-4000 (2/370.8/NVL) GPU/0-5000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54095:54095 [0] NCCL INFO Setting affinity for GPU 1 to 0fffffff,ffffff00,00000000,000fffff,ffffffff +n124-112-200:54095:54095 [0] NCCL INFO NCCL_NVLS_ENABLE set by environment to 0. +n124-112-200:54095:54095 [0] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54095:54095 [0] NCCL INFO 0 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 1 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 2 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 3 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 4 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 5 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 6 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 7 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 8 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 9 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 10 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 11 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 60.000000/60.000000, type NVL/PIX, sameChannels 0 +n124-112-200:54095:54095 [0] NCCL INFO 0 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 1 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 2 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 3 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 4 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 5 : GPU/0 GPU/1 +n124-112-200:54095:54095 [0] NCCL INFO 6 : GPU/1 GPU/0 +n124-112-200:54095:54095 [0] NCCL INFO 7 : GPU/1 GPU/0 +n124-112-200:54095:54095 [0] NCCL INFO 8 : GPU/1 GPU/0 +n124-112-200:54095:54095 [0] NCCL INFO 9 : GPU/1 GPU/0 +n124-112-200:54095:54095 [0] NCCL INFO 10 : GPU/1 GPU/0 +n124-112-200:54095:54095 [0] NCCL INFO 11 : GPU/1 GPU/0 +n124-112-200:54095:54095 [0] NCCL INFO comm 0xc64e8530 rank 1 nRanks 2 nNodes 1 localRanks 2 localRank 1 MNNVL 0 +n124-112-200:54095:54095 [0] NCCL INFO Tree 0 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 12 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 1 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 13 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 2 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 14 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 3 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 15 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 4 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 16 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 5 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 17 : 0 -> 1 -> -1/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 6 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 18 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 7 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 19 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 8 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 20 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 9 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 21 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 10 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 22 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 11 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO Tree 23 : -1 -> 1 -> 0/-1/-1 +n124-112-200:54095:54095 [0] NCCL INFO NCCL_MIN_NCHANNELS set by environment to 4. +n124-112-200:54095:54095 [0] NCCL INFO Ring 00 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 01 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 02 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 03 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 04 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 05 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 06 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 07 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 08 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 09 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 10 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 11 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 12 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 13 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 14 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 15 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 16 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 17 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 18 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 19 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 20 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 21 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 22 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Ring 23 : 0 -> 1 -> 0 +n124-112-200:54095:54095 [0] NCCL INFO Trees [0] -1/-1/-1->1->0 [1] -1/-1/-1->1->0 [2] -1/-1/-1->1->0 [3] -1/-1/-1->1->0 [4] -1/-1/-1->1->0 [5] -1/-1/-1->1->0 [6] 0/-1/-1->1->-1 [7] 0/-1/-1->1->-1 [8] 0/-1/-1->1->-1 [9] 0/-1/-1->1->-1 [10] 0/-1/-1->1->-1 [11] 0/-1/-1->1->-1 [12] -1/-1/-1->1->0 [13] -1/-1/-1->1->0 [14] -1/-1/-1->1->0 [15] -1/-1/-1->1->0 [16] -1/-1/-1->1->0 [17] -1/-1/-1->1->0 [18] 0/-1/-1->1->-1 [19] 0/-1/-1->1->-1 [20] 0/-1/-1->1->-1 [21] 0/-1/-1->1->-1 [22] 0/-1/-1->1->-1 [23] 0/-1/-1->1->-1 +n124-112-200:54095:54095 [0] NCCL INFO NCCL_BUFFSIZE set by environment to 4194304. +n124-112-200:54095:54095 [0] NCCL INFO NCCL_P2P_NVL_CHUNKSIZE set by environment to 1048576. +n124-112-200:54095:54095 [0] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54095:54095 [0] NCCL INFO NCCL_CUMEM_ENABLE set by environment to 0. +n124-112-200:54095:54095 [0] NCCL INFO Channel 00/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 01/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 02/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 03/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 04/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 05/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 06/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 07/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 08/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 09/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 10/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 11/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 12/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 13/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 14/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 15/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 16/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 17/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 18/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 19/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 20/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 21/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 22/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Channel 23/0 : 1[1] -> 0[0] via P2P/IPC +n124-112-200:54095:54095 [0] NCCL INFO Connected all rings +n124-112-200:54095:54095 [0] NCCL INFO Connected all trees +n124-112-200:54095:54095 [0] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54095:54095 [0] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54095:54095 [0] NCCL INFO threadThresholds 8/8/64 | 16/8/64 | 512 | 512 +n124-112-200:54095:54095 [0] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54095:54095 [0] NCCL INFO TUNER/Plugin: NCCL_TUNER_PLUGIN set to libnccl-tuner.so +n124-112-200:54095:54095 [0] NCCL INFO TUNER/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54095:54095 [0] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2, using internal tuner instead. +n124-112-200:54095:54095 [0] NCCL INFO ncclCommInitRank comm 0xc64e8530 rank 1 nranks 2 cudaDev 0 nvmlDev 1 busId 5000 commId 0xd39f30e972357747 - Init COMPLETE +INFO 05-10 07:03:22 __init__.py:207] Automatically detected platform cuda. +[rank6]:[W510 07:03:22.351410957 ProcessGroupNCCL.cpp:4115] [PG ID 0 PG GUID 0 Rank 6] using GPU 6 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device,or call init_process_group() with a device_id. +[rank0]:[W510 07:03:22.402076260 ProcessGroupNCCL.cpp:4115] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device,or call init_process_group() with a device_id. +NCCL version 2.21.5+cuda12.4 +n124-112-200:54095:55804 [0] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54095:55804 [0] NCCL INFO Using network FasTrak +n124-112-200:54095:55804 [0] NCCL INFO DMA-BUF is available on GPU device 0 +n124-112-200:54101:54101 [6] NCCL INFO cudaDriverVersion 12020 +n124-112-200:54101:54101 [6] NCCL INFO NCCL_SOCKET_FAMILY set by environment to AF_INET6 +n124-112-200:54101:54101 [6] NCCL INFO NCCL_SOCKET_IFNAME set by environment to =eth0 +n124-112-200:54101:54101 [6] NCCL INFO Bootstrap : Using eth0:fdbd:dccd:cdc2:1234:0:45::<0> +n124-112-200:54098:54098 [3] NCCL INFO cudaDriverVersion 12020 +n124-112-200:54098:54098 [3] NCCL INFO NCCL_SOCKET_FAMILY set by environment to AF_INET6 +n124-112-200:54098:54098 [3] NCCL INFO NCCL_SOCKET_IFNAME set by environment to =eth0 +n124-112-200:54098:54098 [3] NCCL INFO Bootstrap : Using eth0:fdbd:dccd:cdc2:1234:0:45::<0> +n124-112-200:54100:54100 [5] NCCL INFO cudaDriverVersion 12020 +n124-112-200:54100:54100 [5] NCCL INFO NCCL_SOCKET_FAMILY set by environment to AF_INET6 +n124-112-200:54100:54100 [5] NCCL INFO NCCL_SOCKET_IFNAME set by environment to =eth0 +n124-112-200:54100:54100 [5] NCCL INFO Bootstrap : Using eth0:fdbd:dccd:cdc2:1234:0:45::<0> +n124-112-200:54101:54101 [6] NCCL INFO NET/Plugin: No plugin found (gcp-fastrak) +n124-112-200:54098:54098 [3] NCCL INFO NET/Plugin: No plugin found (gcp-fastrak) +n124-112-200:54100:54100 [5] NCCL INFO NET/Plugin: No plugin found (gcp-fastrak) +n124-112-200:54101:54101 [6] NCCL INFO NET/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54101:54101 [6] NCCL INFO NET/Plugin: Failed to find ncclNetPlugin_v8 symbol. +n124-112-200:54101:54101 [6] NCCL INFO NET/Plugin: Loaded net plugin FasTrak (v7) +n124-112-200:54098:54098 [3] NCCL INFO NET/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54098:54098 [3] NCCL INFO NET/Plugin: Failed to find ncclNetPlugin_v8 symbol. +n124-112-200:54098:54098 [3] NCCL INFO NET/Plugin: Loaded net plugin FasTrak (v7) +n124-112-200:54100:54100 [5] NCCL INFO NET/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54100:54100 [5] NCCL INFO NET/Plugin: Failed to find ncclNetPlugin_v8 symbol. +n124-112-200:54100:54100 [5] NCCL INFO NET/Plugin: Loaded net plugin FasTrak (v7) +[rank4]:[W510 07:03:23.553885793 ProcessGroupNCCL.cpp:4115] [PG ID 0 PG GUID 0 Rank 4] using GPU 4 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device,or call init_process_group() with a device_id. +n124-112-200:54099:54099 [4] NCCL INFO cudaDriverVersion 12020 +n124-112-200:54099:54099 [4] NCCL INFO NCCL_SOCKET_FAMILY set by environment to AF_INET6 +n124-112-200:54099:54099 [4] NCCL INFO NCCL_SOCKET_IFNAME set by environment to =eth0 +n124-112-200:54099:54099 [4] NCCL INFO Bootstrap : Using eth0:fdbd:dccd:cdc2:1234:0:45::<0> +[rank2]:[W510 07:03:23.600727422 ProcessGroupNCCL.cpp:4115] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device,or call init_process_group() with a device_id. +n124-112-200:54097:54097 [2] NCCL INFO cudaDriverVersion 12020 +n124-112-200:54097:54097 [2] NCCL INFO NCCL_SOCKET_FAMILY set by environment to AF_INET6 +n124-112-200:54097:54097 [2] NCCL INFO NCCL_SOCKET_IFNAME set by environment to =eth0 +n124-112-200:54097:54097 [2] NCCL INFO Bootstrap : Using eth0:fdbd:dccd:cdc2:1234:0:45::<0> +n124-112-200:54099:54099 [4] NCCL INFO NET/Plugin: No plugin found (gcp-fastrak) +n124-112-200:54097:54097 [2] NCCL INFO NET/Plugin: No plugin found (gcp-fastrak) +n124-112-200:54099:54099 [4] NCCL INFO NET/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54099:54099 [4] NCCL INFO NET/Plugin: Failed to find ncclNetPlugin_v8 symbol. +n124-112-200:54099:54099 [4] NCCL INFO NET/Plugin: Loaded net plugin FasTrak (v7) +n124-112-200:54097:54097 [2] NCCL INFO NET/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54097:54097 [2] NCCL INFO NET/Plugin: Failed to find ncclNetPlugin_v8 symbol. +n124-112-200:54097:54097 [2] NCCL INFO NET/Plugin: Loaded net plugin FasTrak (v7) +n124-112-200:54097:55810 [2] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54097:55810 [2] NCCL INFO Using network FasTrak +n124-112-200:54097:55810 [2] NCCL INFO DMA-BUF is available on GPU device 2 +[rank1]:[W510 07:03:23.236233770 ProcessGroupNCCL.cpp:4115] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device,or call init_process_group() with a device_id. +n124-112-200:54096:54096 [1] NCCL INFO cudaDriverVersion 12020 +n124-112-200:54096:54096 [1] NCCL INFO NCCL_SOCKET_FAMILY set by environment to AF_INET6 +n124-112-200:54096:54096 [1] NCCL INFO NCCL_SOCKET_IFNAME set by environment to =eth0 +n124-112-200:54096:54096 [1] NCCL INFO Bootstrap : Using eth0:fdbd:dccd:cdc2:1234:0:45::<0> +n124-112-200:54101:55805 [6] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54101:55805 [6] NCCL INFO Using network FasTrak +n124-112-200:54096:54096 [1] NCCL INFO NET/Plugin: No plugin found (gcp-fastrak) +n124-112-200:54100:55807 [5] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54100:55807 [5] NCCL INFO Using network FasTrak +n124-112-200:54098:55806 [3] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54098:55806 [3] NCCL INFO Using network FasTrak +n124-112-200:54099:55809 [4] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54099:55809 [4] NCCL INFO Using network FasTrak +n124-112-200:54096:54096 [1] NCCL INFO NET/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54096:54096 [1] NCCL INFO NET/Plugin: Failed to find ncclNetPlugin_v8 symbol. +n124-112-200:54096:54096 [1] NCCL INFO NET/Plugin: Loaded net plugin FasTrak (v7) +n124-112-200:54101:55805 [6] NCCL INFO DMA-BUF is available on GPU device 6 +n124-112-200:54099:55809 [4] NCCL INFO DMA-BUF is available on GPU device 4 +n124-112-200:54100:55807 [5] NCCL INFO DMA-BUF is available on GPU device 5 +n124-112-200:54098:55806 [3] NCCL INFO DMA-BUF is available on GPU device 3 +n124-112-200:54096:55836 [1] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54096:55836 [1] NCCL INFO Using network FasTrak +n124-112-200:54096:55836 [1] NCCL INFO DMA-BUF is available on GPU device 1 +n124-112-200:54096:55836 [1] NCCL INFO ncclCommInitRank comm 0xc406a3a0 rank 1 nranks 7 cudaDev 1 nvmlDev 2 busId b000 commId 0xc9abc278b27d692f - Init START +n124-112-200:54095:55804 [0] NCCL INFO ncclCommInitRank comm 0xcad968b0 rank 0 nranks 7 cudaDev 0 nvmlDev 1 busId 5000 commId 0xc9abc278b27d692f - Init START +n124-112-200:54097:55810 [2] NCCL INFO ncclCommInitRank comm 0x23947350 rank 2 nranks 7 cudaDev 2 nvmlDev 3 busId c000 commId 0xc9abc278b27d692f - Init START +n124-112-200:54100:55807 [5] NCCL INFO ncclCommInitRank comm 0xc6db1170 rank 5 nranks 7 cudaDev 5 nvmlDev 6 busId 8b000 commId 0xc9abc278b27d692f - Init START +n124-112-200:54098:55806 [3] NCCL INFO ncclCommInitRank comm 0x4f6041b0 rank 3 nranks 7 cudaDev 3 nvmlDev 4 busId 84000 commId 0xc9abc278b27d692f - Init START +n124-112-200:54099:55809 [4] NCCL INFO ncclCommInitRank comm 0x37e31620 rank 4 nranks 7 cudaDev 4 nvmlDev 5 busId 85000 commId 0xc9abc278b27d692f - Init START +n124-112-200:54101:55805 [6] NCCL INFO ncclCommInitRank comm 0xc409e9d0 rank 6 nranks 7 cudaDev 6 nvmlDev 7 busId 8c000 commId 0xc9abc278b27d692f - Init START +n124-112-200:54096:55836 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:55805 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:55804 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:55810 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:55806 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:55807 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:55809 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:55836 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:55805 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:55804 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:55810 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:55806 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:55807 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:55809 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:55810 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:55809 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:55804 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:55805 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:55836 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:55806 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:55807 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:55809 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:55804 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:55805 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:55836 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:55807 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:55810 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:55806 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:55809 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:55804 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:55836 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:55805 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:55807 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:55810 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:55806 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:55809 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:55804 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:55836 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:55805 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:55807 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:55810 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:55806 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:55809 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:55809 [4] NCCL INFO NCCL_CUMEM_ENABLE set by environment to 0. +n124-112-200:54101:55805 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:55805 [6] NCCL INFO NCCL_CUMEM_ENABLE set by environment to 0. +n124-112-200:54099:55809 [4] NCCL INFO NCCL_NET_GDR_LEVEL set by environment to PIX +n124-112-200:54099:55809 [4] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54099:55809 [4] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54099:55809 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54099:55809 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54099:55809 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54099:55809 [4] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54099:55809 [4] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54099:55809 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54099:55809 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54099:55809 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54099:55809 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54099:55809 [4] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54099:55809 [4] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54099:55809 [4] NCCL INFO ========================================== +n124-112-200:54099:55809 [4] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54099:55809 [4] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54099:55809 [4] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54099:55809 [4] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54099:55809 [4] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54099:55809 [4] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54099:55809 [4] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54099:55809 [4] NCCL INFO Setting affinity for GPU 5 to ffff,ffffffff,f0000000,000000ff,ffffffff,fff00000,00000000 +n124-112-200:54099:55809 [4] NCCL INFO NCCL_NVLS_ENABLE set by environment to 0. +n124-112-200:54096:55836 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:55809 [4] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54099:55809 [4] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54099:55809 [4] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:55809 [4] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO NCCL_CUMEM_ENABLE set by environment to 0. +n124-112-200:54100:55807 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:55807 [5] NCCL INFO NCCL_CUMEM_ENABLE set by environment to 0. +n124-112-200:54095:55804 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:55805 [6] NCCL INFO NCCL_NET_GDR_LEVEL set by environment to PIX +n124-112-200:54096:55836 [1] NCCL INFO NCCL_NET_GDR_LEVEL set by environment to PIX +n124-112-200:54096:55836 [1] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54096:55836 [1] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54096:55836 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54096:55836 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54096:55836 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54096:55836 [1] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54096:55836 [1] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54096:55836 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54096:55836 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54096:55836 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54096:55836 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54096:55836 [1] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54096:55836 [1] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54096:55836 [1] NCCL INFO ========================================== +n124-112-200:54096:55836 [1] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54096:55836 [1] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54101:55805 [6] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54096:55836 [1] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54101:55805 [6] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54096:55836 [1] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54101:55805 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:55836 [1] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54096:55836 [1] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54101:55805 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54096:55836 [1] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:55805 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54101:55805 [6] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54101:55805 [6] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54096:55836 [1] NCCL INFO Setting affinity for GPU 2 to 0fffffff,ffffff00,00000000,000fffff,ffffffff +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54101:55805 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54101:55805 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54096:55836 [1] NCCL INFO NCCL_NVLS_ENABLE set by environment to 0. +n124-112-200:54101:55805 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54101:55805 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54101:55805 [6] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54101:55805 [6] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54101:55805 [6] NCCL INFO ========================================== +n124-112-200:54101:55805 [6] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54101:55805 [6] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54101:55805 [6] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54101:55805 [6] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:55805 [6] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:55805 [6] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:55805 [6] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:55805 [6] NCCL INFO Setting affinity for GPU 7 to ffff,ffffffff,f0000000,000000ff,ffffffff,fff00000,00000000 +n124-112-200:54101:55805 [6] NCCL INFO NCCL_NVLS_ENABLE set by environment to 0. +n124-112-200:54096:55836 [1] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54096:55836 [1] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54096:55836 [1] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:55836 [1] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54101:55805 [6] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54101:55805 [6] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:55805 [6] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:55806 [3] NCCL INFO NCCL_CUMEM_ENABLE set by environment to 0. +n124-112-200:54100:55807 [5] NCCL INFO NCCL_NET_GDR_LEVEL set by environment to PIX +n124-112-200:54097:55810 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:55807 [5] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54100:55807 [5] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54100:55807 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54100:55807 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54100:55807 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54100:55807 [5] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54100:55807 [5] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54100:55807 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54100:55807 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54100:55807 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54100:55807 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54100:55807 [5] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54100:55807 [5] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54100:55807 [5] NCCL INFO ========================================== +n124-112-200:54100:55807 [5] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54100:55807 [5] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54100:55807 [5] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54100:55807 [5] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54100:55807 [5] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54100:55807 [5] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54100:55807 [5] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54098:55806 [3] NCCL INFO NCCL_NET_GDR_LEVEL set by environment to PIX +n124-112-200:54095:55804 [0] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54100:55807 [5] NCCL INFO Setting affinity for GPU 6 to ffff,ffffffff,f0000000,000000ff,ffffffff,fff00000,00000000 +n124-112-200:54095:55804 [0] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54095:55804 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54095:55804 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54095:55804 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54095:55804 [0] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54095:55804 [0] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54095:55804 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54095:55804 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54095:55804 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54095:55804 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54095:55804 [0] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54095:55804 [0] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54095:55804 [0] NCCL INFO ========================================== +n124-112-200:54100:55807 [5] NCCL INFO NCCL_NVLS_ENABLE set by environment to 0. +n124-112-200:54095:55804 [0] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54095:55804 [0] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54095:55804 [0] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54095:55804 [0] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54095:55804 [0] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54095:55804 [0] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54095:55804 [0] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54095:55804 [0] NCCL INFO Setting affinity for GPU 1 to 0fffffff,ffffff00,00000000,000fffff,ffffffff +n124-112-200:54098:55806 [3] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54098:55806 [3] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54098:55806 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54098:55806 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54098:55806 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54098:55806 [3] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54098:55806 [3] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54098:55806 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54098:55806 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54098:55806 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54098:55806 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54098:55806 [3] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54098:55806 [3] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54098:55806 [3] NCCL INFO ========================================== +n124-112-200:54098:55806 [3] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54098:55806 [3] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54098:55806 [3] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54098:55806 [3] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54098:55806 [3] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54098:55806 [3] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54098:55806 [3] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54098:55806 [3] NCCL INFO Setting affinity for GPU 4 to ffff,ffffffff,f0000000,000000ff,ffffffff,fff00000,00000000 +n124-112-200:54098:55806 [3] NCCL INFO NCCL_NVLS_ENABLE set by environment to 0. +n124-112-200:54097:55810 [2] NCCL INFO NCCL_CUMEM_ENABLE set by environment to 0. +n124-112-200:54097:55810 [2] NCCL INFO NCCL_NET_GDR_LEVEL set by environment to PIX +n124-112-200:54097:55810 [2] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54097:55810 [2] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54097:55810 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54097:55810 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54097:55810 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54097:55810 [2] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54097:55810 [2] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54097:55810 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54097:55810 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54097:55810 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54097:55810 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54097:55810 [2] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54097:55810 [2] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54097:55810 [2] NCCL INFO ========================================== +n124-112-200:54097:55810 [2] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54097:55810 [2] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54097:55810 [2] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54097:55810 [2] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54097:55810 [2] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54097:55810 [2] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54097:55810 [2] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54097:55810 [2] NCCL INFO Setting affinity for GPU 3 to 0fffffff,ffffff00,00000000,000fffff,ffffffff +n124-112-200:54097:55810 [2] NCCL INFO NCCL_NVLS_ENABLE set by environment to 0. +n124-112-200:54095:55804 [0] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54095:55804 [0] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54095:55804 [0] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:55804 [0] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54100:55807 [5] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54100:55807 [5] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54098:55806 [3] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54098:55806 [3] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:55806 [3] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54097:55810 [2] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54097:55810 [2] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:55810 [2] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:55807 [5] NCCL INFO comm 0xc6db1170 rank 5 nRanks 7 nNodes 1 localRanks 7 localRank 5 MNNVL 0 +n124-112-200:54099:55809 [4] NCCL INFO comm 0x37e31620 rank 4 nRanks 7 nNodes 1 localRanks 7 localRank 4 MNNVL 0 +n124-112-200:54097:55810 [2] NCCL INFO comm 0x23947350 rank 2 nRanks 7 nNodes 1 localRanks 7 localRank 2 MNNVL 0 +n124-112-200:54100:55807 [5] NCCL INFO NCCL_MIN_NCHANNELS set by environment to 4. +n124-112-200:54098:55806 [3] NCCL INFO comm 0x4f6041b0 rank 3 nRanks 7 nNodes 1 localRanks 7 localRank 3 MNNVL 0 +n124-112-200:54096:55836 [1] NCCL INFO comm 0xc406a3a0 rank 1 nRanks 7 nNodes 1 localRanks 7 localRank 1 MNNVL 0 +n124-112-200:54097:55810 [2] NCCL INFO NCCL_MIN_NCHANNELS set by environment to 4. +n124-112-200:54099:55809 [4] NCCL INFO NCCL_MIN_NCHANNELS set by environment to 4. +n124-112-200:54101:55805 [6] NCCL INFO comm 0xc409e9d0 rank 6 nRanks 7 nNodes 1 localRanks 7 localRank 6 MNNVL 0 +n124-112-200:54096:55836 [1] NCCL INFO Tree 0 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO comm 0xcad968b0 rank 0 nRanks 7 nNodes 1 localRanks 7 localRank 0 MNNVL 0 +n124-112-200:54098:55806 [3] NCCL INFO NCCL_MIN_NCHANNELS set by environment to 4. +n124-112-200:54096:55836 [1] NCCL INFO Tree 12 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54096:55836 [1] NCCL INFO Tree 1 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 00 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Tree 13 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 0 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54096:55836 [1] NCCL INFO Tree 2 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54097:55810 [2] NCCL INFO Ring 00 : 1 -> 2 -> 3 +n124-112-200:54099:55809 [4] NCCL INFO Ring 00 : 3 -> 4 -> 5 +n124-112-200:54096:55836 [1] NCCL INFO Tree 14 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 12 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 01 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Tree 3 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 1 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54098:55806 [3] NCCL INFO Ring 00 : 2 -> 3 -> 4 +n124-112-200:54096:55836 [1] NCCL INFO Tree 15 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54097:55810 [2] NCCL INFO Ring 01 : 1 -> 2 -> 3 +n124-112-200:54096:55836 [1] NCCL INFO Tree 4 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54099:55809 [4] NCCL INFO Ring 01 : 3 -> 4 -> 5 +n124-112-200:54100:55807 [5] NCCL INFO Ring 02 : 4 -> 5 -> 6 +n124-112-200:54101:55805 [6] NCCL INFO NCCL_MIN_NCHANNELS set by environment to 4. +n124-112-200:54099:55809 [4] NCCL INFO Ring 02 : 3 -> 4 -> 5 +n124-112-200:54095:55804 [0] NCCL INFO Tree 13 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54098:55806 [3] NCCL INFO Ring 01 : 2 -> 3 -> 4 +n124-112-200:54097:55810 [2] NCCL INFO Ring 02 : 1 -> 2 -> 3 +n124-112-200:54096:55836 [1] NCCL INFO Tree 16 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 03 : 4 -> 5 -> 6 +n124-112-200:54099:55809 [4] NCCL INFO Ring 03 : 3 -> 4 -> 5 +n124-112-200:54095:55804 [0] NCCL INFO Tree 2 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54098:55806 [3] NCCL INFO Ring 02 : 2 -> 3 -> 4 +n124-112-200:54099:55809 [4] NCCL INFO Ring 04 : 3 -> 4 -> 5 +n124-112-200:54097:55810 [2] NCCL INFO Ring 03 : 1 -> 2 -> 3 +n124-112-200:54096:55836 [1] NCCL INFO Tree 5 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54098:55806 [3] NCCL INFO Ring 03 : 2 -> 3 -> 4 +n124-112-200:54100:55807 [5] NCCL INFO Ring 04 : 4 -> 5 -> 6 +n124-112-200:54095:55804 [0] NCCL INFO Tree 14 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54098:55806 [3] NCCL INFO Ring 04 : 2 -> 3 -> 4 +n124-112-200:54097:55810 [2] NCCL INFO Ring 04 : 1 -> 2 -> 3 +n124-112-200:54100:55807 [5] NCCL INFO Ring 05 : 4 -> 5 -> 6 +n124-112-200:54101:55805 [6] NCCL INFO Ring 00 : 5 -> 6 -> 0 +n124-112-200:54097:55810 [2] NCCL INFO Ring 05 : 1 -> 2 -> 3 +n124-112-200:54099:55809 [4] NCCL INFO Ring 05 : 3 -> 4 -> 5 +n124-112-200:54101:55805 [6] NCCL INFO Ring 01 : 5 -> 6 -> 0 +n124-112-200:54096:55836 [1] NCCL INFO Tree 17 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 3 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54098:55806 [3] NCCL INFO Ring 05 : 2 -> 3 -> 4 +n124-112-200:54096:55836 [1] NCCL INFO Tree 6 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 06 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Tree 18 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54097:55810 [2] NCCL INFO Ring 06 : 1 -> 2 -> 3 +n124-112-200:54099:55809 [4] NCCL INFO Ring 06 : 3 -> 4 -> 5 +n124-112-200:54097:55810 [2] NCCL INFO Ring 07 : 1 -> 2 -> 3 +n124-112-200:54101:55805 [6] NCCL INFO Ring 02 : 5 -> 6 -> 0 +n124-112-200:54097:55810 [2] NCCL INFO Ring 08 : 1 -> 2 -> 3 +n124-112-200:54095:55804 [0] NCCL INFO Tree 15 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54098:55806 [3] NCCL INFO Ring 06 : 2 -> 3 -> 4 +n124-112-200:54095:55804 [0] NCCL INFO Tree 4 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 07 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Tree 7 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 16 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 08 : 4 -> 5 -> 6 +n124-112-200:54099:55809 [4] NCCL INFO Ring 07 : 3 -> 4 -> 5 +n124-112-200:54101:55805 [6] NCCL INFO Ring 03 : 5 -> 6 -> 0 +n124-112-200:54097:55810 [2] NCCL INFO Ring 09 : 1 -> 2 -> 3 +n124-112-200:54098:55806 [3] NCCL INFO Ring 07 : 2 -> 3 -> 4 +n124-112-200:54096:55836 [1] NCCL INFO Tree 19 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54097:55810 [2] NCCL INFO Ring 10 : 1 -> 2 -> 3 +n124-112-200:54095:55804 [0] NCCL INFO Tree 5 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 09 : 4 -> 5 -> 6 +n124-112-200:54095:55804 [0] NCCL INFO Tree 17 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54099:55809 [4] NCCL INFO Ring 08 : 3 -> 4 -> 5 +n124-112-200:54101:55805 [6] NCCL INFO Ring 04 : 5 -> 6 -> 0 +n124-112-200:54096:55836 [1] NCCL INFO Tree 8 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54098:55806 [3] NCCL INFO Ring 08 : 2 -> 3 -> 4 +n124-112-200:54101:55805 [6] NCCL INFO Ring 05 : 5 -> 6 -> 0 +n124-112-200:54097:55810 [2] NCCL INFO Ring 11 : 1 -> 2 -> 3 +n124-112-200:54100:55807 [5] NCCL INFO Ring 10 : 4 -> 5 -> 6 +n124-112-200:54098:55806 [3] NCCL INFO Ring 09 : 2 -> 3 -> 4 +n124-112-200:54095:55804 [0] NCCL INFO Tree 6 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 11 : 4 -> 5 -> 6 +n124-112-200:54095:55804 [0] NCCL INFO Tree 18 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54099:55809 [4] NCCL INFO Ring 09 : 3 -> 4 -> 5 +n124-112-200:54096:55836 [1] NCCL INFO Tree 20 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54101:55805 [6] NCCL INFO Ring 06 : 5 -> 6 -> 0 +n124-112-200:54096:55836 [1] NCCL INFO Tree 9 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54097:55810 [2] NCCL INFO Ring 12 : 1 -> 2 -> 3 +n124-112-200:54101:55805 [6] NCCL INFO Ring 07 : 5 -> 6 -> 0 +n124-112-200:54096:55836 [1] NCCL INFO Tree 21 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54098:55806 [3] NCCL INFO Ring 10 : 2 -> 3 -> 4 +n124-112-200:54096:55836 [1] NCCL INFO Tree 10 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 12 : 4 -> 5 -> 6 +n124-112-200:54095:55804 [0] NCCL INFO Tree 7 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54099:55809 [4] NCCL INFO Ring 10 : 3 -> 4 -> 5 +n124-112-200:54097:55810 [2] NCCL INFO Ring 13 : 1 -> 2 -> 3 +n124-112-200:54101:55805 [6] NCCL INFO Ring 08 : 5 -> 6 -> 0 +n124-112-200:54098:55806 [3] NCCL INFO Ring 11 : 2 -> 3 -> 4 +n124-112-200:54096:55836 [1] NCCL INFO Tree 22 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 13 : 4 -> 5 -> 6 +n124-112-200:54095:55804 [0] NCCL INFO Tree 19 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54099:55809 [4] NCCL INFO Ring 11 : 3 -> 4 -> 5 +n124-112-200:54097:55810 [2] NCCL INFO Ring 14 : 1 -> 2 -> 3 +n124-112-200:54101:55805 [6] NCCL INFO Ring 09 : 5 -> 6 -> 0 +n124-112-200:54098:55806 [3] NCCL INFO Ring 12 : 2 -> 3 -> 4 +n124-112-200:54096:55836 [1] NCCL INFO Tree 11 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54101:55805 [6] NCCL INFO Ring 10 : 5 -> 6 -> 0 +n124-112-200:54100:55807 [5] NCCL INFO Ring 14 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Tree 23 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 8 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54100:55807 [5] NCCL INFO Ring 15 : 4 -> 5 -> 6 +n124-112-200:54099:55809 [4] NCCL INFO Ring 12 : 3 -> 4 -> 5 +n124-112-200:54097:55810 [2] NCCL INFO Ring 15 : 1 -> 2 -> 3 +n124-112-200:54098:55806 [3] NCCL INFO Ring 13 : 2 -> 3 -> 4 +n124-112-200:54101:55805 [6] NCCL INFO Ring 11 : 5 -> 6 -> 0 +n124-112-200:54097:55810 [2] NCCL INFO Ring 16 : 1 -> 2 -> 3 +n124-112-200:54097:55810 [2] NCCL INFO Ring 17 : 1 -> 2 -> 3 +n124-112-200:54097:55810 [2] NCCL INFO Ring 18 : 1 -> 2 -> 3 +n124-112-200:54097:55810 [2] NCCL INFO Ring 19 : 1 -> 2 -> 3 +n124-112-200:54097:55810 [2] NCCL INFO Ring 20 : 1 -> 2 -> 3 +n124-112-200:54097:55810 [2] NCCL INFO Ring 21 : 1 -> 2 -> 3 +n124-112-200:54097:55810 [2] NCCL INFO Ring 22 : 1 -> 2 -> 3 +n124-112-200:54097:55810 [2] NCCL INFO Ring 23 : 1 -> 2 -> 3 +n124-112-200:54097:55810 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 [2] 3/-1/-1->2->1 [3] 3/-1/-1->2->1 [4] 3/-1/-1->2->1 [5] 3/-1/-1->2->1 [6] 3/-1/-1->2->1 [7] 3/-1/-1->2->1 [8] 3/-1/-1->2->1 [9] 3/-1/-1->2->1 [10] 3/-1/-1->2->1 [11] 3/-1/-1->2->1 [12] 3/-1/-1->2->1 [13] 3/-1/-1->2->1 [14] 3/-1/-1->2->1 [15] 3/-1/-1->2->1 [16] 3/-1/-1->2->1 [17] 3/-1/-1->2->1 [18] 3/-1/-1->2->1 [19] 3/-1/-1->2->1 [20] 3/-1/-1->2->1 [21] 3/-1/-1->2->1 [22] 3/-1/-1->2->1 [23] 3/-1/-1->2->1 +n124-112-200:54097:55810 [2] NCCL INFO NCCL_BUFFSIZE set by environment to 4194304. +n124-112-200:54095:55804 [0] NCCL INFO Tree 20 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54097:55810 [2] NCCL INFO NCCL_P2P_NVL_CHUNKSIZE set by environment to 1048576. +n124-112-200:54097:55810 [2] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54095:55804 [0] NCCL INFO Tree 9 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 21 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 10 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 22 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 11 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Tree 23 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:55804 [0] NCCL INFO Channel 00/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 01/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 02/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 03/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 04/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 05/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 06/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 07/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 08/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 09/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 10/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 11/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 12/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 13/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 14/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 15/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 16/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 17/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 18/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 19/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 20/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 21/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 22/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Channel 23/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:55804 [0] NCCL INFO Ring 00 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 01 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 02 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 03 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 04 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 05 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 06 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 07 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 08 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 09 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 10 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 11 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 12 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 13 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 14 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 15 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 16 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 17 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 18 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 19 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 20 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 21 : 6 -> 0 -> 1 +n124-112-200:54095:55804 [0] NCCL INFO Ring 22 : 6 -> 0 -> 1 +n124-112-200:54096:55836 [1] NCCL INFO NCCL_MIN_NCHANNELS set by environment to 4. +n124-112-200:54100:55807 [5] NCCL INFO Ring 16 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Ring 00 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 01 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 02 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 03 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 04 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 05 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 06 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 07 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 08 : 0 -> 1 -> 2 +n124-112-200:54100:55807 [5] NCCL INFO Ring 17 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Ring 09 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 10 : 0 -> 1 -> 2 +n124-112-200:54100:55807 [5] NCCL INFO Ring 18 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Ring 11 : 0 -> 1 -> 2 +n124-112-200:54100:55807 [5] NCCL INFO Ring 19 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Ring 12 : 0 -> 1 -> 2 +n124-112-200:54100:55807 [5] NCCL INFO Ring 20 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Ring 13 : 0 -> 1 -> 2 +n124-112-200:54100:55807 [5] NCCL INFO Ring 21 : 4 -> 5 -> 6 +n124-112-200:54100:55807 [5] NCCL INFO Ring 22 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Ring 14 : 0 -> 1 -> 2 +n124-112-200:54100:55807 [5] NCCL INFO Ring 23 : 4 -> 5 -> 6 +n124-112-200:54096:55836 [1] NCCL INFO Ring 15 : 0 -> 1 -> 2 +n124-112-200:54100:55807 [5] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/-1/-1->5->4 [2] 6/-1/-1->5->4 [3] 6/-1/-1->5->4 [4] 6/-1/-1->5->4 [5] 6/-1/-1->5->4 [6] 6/-1/-1->5->4 [7] 6/-1/-1->5->4 [8] 6/-1/-1->5->4 [9] 6/-1/-1->5->4 [10] 6/-1/-1->5->4 [11] 6/-1/-1->5->4 [12] 6/-1/-1->5->4 [13] 6/-1/-1->5->4 [14] 6/-1/-1->5->4 [15] 6/-1/-1->5->4 [16] 6/-1/-1->5->4 [17] 6/-1/-1->5->4 [18] 6/-1/-1->5->4 [19] 6/-1/-1->5->4 [20] 6/-1/-1->5->4 [21] 6/-1/-1->5->4 [22] 6/-1/-1->5->4 [23] 6/-1/-1->5->4 +n124-112-200:54096:55836 [1] NCCL INFO Ring 16 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 17 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 18 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 19 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 20 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 21 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 22 : 0 -> 1 -> 2 +n124-112-200:54096:55836 [1] NCCL INFO Ring 23 : 0 -> 1 -> 2 +n124-112-200:54100:55807 [5] NCCL INFO NCCL_BUFFSIZE set by environment to 4194304. +n124-112-200:54096:55836 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 [2] 2/-1/-1->1->0 [3] 2/-1/-1->1->0 [4] 2/-1/-1->1->0 [5] 2/-1/-1->1->0 [6] 2/-1/-1->1->0 [7] 2/-1/-1->1->0 [8] 2/-1/-1->1->0 [9] 2/-1/-1->1->0 [10] 2/-1/-1->1->0 [11] 2/-1/-1->1->0 [12] 2/-1/-1->1->0 [13] 2/-1/-1->1->0 [14] 2/-1/-1->1->0 [15] 2/-1/-1->1->0 [16] 2/-1/-1->1->0 [17] 2/-1/-1->1->0 [18] 2/-1/-1->1->0 [19] 2/-1/-1->1->0 [20] 2/-1/-1->1->0 [21] 2/-1/-1->1->0 [22] 2/-1/-1->1->0 [23] 2/-1/-1->1->0 +n124-112-200:54100:55807 [5] NCCL INFO NCCL_P2P_NVL_CHUNKSIZE set by environment to 1048576. +n124-112-200:54100:55807 [5] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54096:55836 [1] NCCL INFO NCCL_BUFFSIZE set by environment to 4194304. +n124-112-200:54096:55836 [1] NCCL INFO NCCL_P2P_NVL_CHUNKSIZE set by environment to 1048576. +n124-112-200:54096:55836 [1] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54099:55809 [4] NCCL INFO Ring 13 : 3 -> 4 -> 5 +n124-112-200:54098:55806 [3] NCCL INFO Ring 14 : 2 -> 3 -> 4 +n124-112-200:54099:55809 [4] NCCL INFO Ring 14 : 3 -> 4 -> 5 +n124-112-200:54099:55809 [4] NCCL INFO Ring 15 : 3 -> 4 -> 5 +n124-112-200:54099:55809 [4] NCCL INFO Ring 16 : 3 -> 4 -> 5 +n124-112-200:54099:55809 [4] NCCL INFO Ring 17 : 3 -> 4 -> 5 +n124-112-200:54099:55809 [4] NCCL INFO Ring 18 : 3 -> 4 -> 5 +n124-112-200:54099:55809 [4] NCCL INFO Ring 19 : 3 -> 4 -> 5 +n124-112-200:54099:55809 [4] NCCL INFO Ring 20 : 3 -> 4 -> 5 +n124-112-200:54099:55809 [4] NCCL INFO Ring 21 : 3 -> 4 -> 5 +n124-112-200:54099:55809 [4] NCCL INFO Ring 22 : 3 -> 4 -> 5 +n124-112-200:54099:55809 [4] NCCL INFO Ring 23 : 3 -> 4 -> 5 +n124-112-200:54099:55809 [4] NCCL INFO Trees [0] 5/-1/-1->4->3 [1] 5/-1/-1->4->3 [2] 5/-1/-1->4->3 [3] 5/-1/-1->4->3 [4] 5/-1/-1->4->3 [5] 5/-1/-1->4->3 [6] 5/-1/-1->4->3 [7] 5/-1/-1->4->3 [8] 5/-1/-1->4->3 [9] 5/-1/-1->4->3 [10] 5/-1/-1->4->3 [11] 5/-1/-1->4->3 [12] 5/-1/-1->4->3 [13] 5/-1/-1->4->3 [14] 5/-1/-1->4->3 [15] 5/-1/-1->4->3 [16] 5/-1/-1->4->3 [17] 5/-1/-1->4->3 [18] 5/-1/-1->4->3 [19] 5/-1/-1->4->3 [20] 5/-1/-1->4->3 [21] 5/-1/-1->4->3 [22] 5/-1/-1->4->3 [23] 5/-1/-1->4->3 +n124-112-200:54098:55806 [3] NCCL INFO Ring 15 : 2 -> 3 -> 4 +n124-112-200:54098:55806 [3] NCCL INFO Ring 16 : 2 -> 3 -> 4 +n124-112-200:54098:55806 [3] NCCL INFO Ring 17 : 2 -> 3 -> 4 +n124-112-200:54098:55806 [3] NCCL INFO Ring 18 : 2 -> 3 -> 4 +n124-112-200:54098:55806 [3] NCCL INFO Ring 19 : 2 -> 3 -> 4 +n124-112-200:54098:55806 [3] NCCL INFO Ring 20 : 2 -> 3 -> 4 +n124-112-200:54098:55806 [3] NCCL INFO Ring 21 : 2 -> 3 -> 4 +n124-112-200:54098:55806 [3] NCCL INFO Ring 22 : 2 -> 3 -> 4 +n124-112-200:54098:55806 [3] NCCL INFO Ring 23 : 2 -> 3 -> 4 +n124-112-200:54098:55806 [3] NCCL INFO Trees [0] 4/-1/-1->3->2 [1] 4/-1/-1->3->2 [2] 4/-1/-1->3->2 [3] 4/-1/-1->3->2 [4] 4/-1/-1->3->2 [5] 4/-1/-1->3->2 [6] 4/-1/-1->3->2 [7] 4/-1/-1->3->2 [8] 4/-1/-1->3->2 [9] 4/-1/-1->3->2 [10] 4/-1/-1->3->2 [11] 4/-1/-1->3->2 [12] 4/-1/-1->3->2 [13] 4/-1/-1->3->2 [14] 4/-1/-1->3->2 [15] 4/-1/-1->3->2 [16] 4/-1/-1->3->2 [17] 4/-1/-1->3->2 [18] 4/-1/-1->3->2 [19] 4/-1/-1->3->2 [20] 4/-1/-1->3->2 [21] 4/-1/-1->3->2 [22] 4/-1/-1->3->2 [23] 4/-1/-1->3->2 +n124-112-200:54099:55809 [4] NCCL INFO NCCL_BUFFSIZE set by environment to 4194304. +n124-112-200:54099:55809 [4] NCCL INFO NCCL_P2P_NVL_CHUNKSIZE set by environment to 1048576. +n124-112-200:54099:55809 [4] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54098:55806 [3] NCCL INFO NCCL_BUFFSIZE set by environment to 4194304. +n124-112-200:54098:55806 [3] NCCL INFO NCCL_P2P_NVL_CHUNKSIZE set by environment to 1048576. +n124-112-200:54098:55806 [3] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54097:55810 [2] NCCL INFO Channel 00/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 01/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 02/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 03/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 04/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 05/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 06/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 07/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 08/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 09/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Ring 12 : 5 -> 6 -> 0 +n124-112-200:54095:55804 [0] NCCL INFO Ring 23 : 6 -> 0 -> 1 +n124-112-200:54101:55805 [6] NCCL INFO Ring 13 : 5 -> 6 -> 0 +n124-112-200:54101:55805 [6] NCCL INFO Ring 14 : 5 -> 6 -> 0 +n124-112-200:54101:55805 [6] NCCL INFO Ring 15 : 5 -> 6 -> 0 +n124-112-200:54101:55805 [6] NCCL INFO Ring 16 : 5 -> 6 -> 0 +n124-112-200:54101:55805 [6] NCCL INFO Ring 17 : 5 -> 6 -> 0 +n124-112-200:54101:55805 [6] NCCL INFO Ring 18 : 5 -> 6 -> 0 +n124-112-200:54101:55805 [6] NCCL INFO Ring 19 : 5 -> 6 -> 0 +n124-112-200:54101:55805 [6] NCCL INFO Ring 20 : 5 -> 6 -> 0 +n124-112-200:54095:55804 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1 [2] 1/-1/-1->0->-1 [3] 1/-1/-1->0->-1 [4] 1/-1/-1->0->-1 [5] 1/-1/-1->0->-1 [6] 1/-1/-1->0->-1 [7] 1/-1/-1->0->-1 [8] 1/-1/-1->0->-1 [9] 1/-1/-1->0->-1 [10] 1/-1/-1->0->-1 [11] 1/-1/-1->0->-1 [12] 1/-1/-1->0->-1 [13] 1/-1/-1->0->-1 [14] 1/-1/-1->0->-1 [15] 1/-1/-1->0->-1 [16] 1/-1/-1->0->-1 [17] 1/-1/-1->0->-1 [18] 1/-1/-1->0->-1 [19] 1/-1/-1->0->-1 [20] 1/-1/-1->0->-1 [21] 1/-1/-1->0->-1 [22] 1/-1/-1->0->-1 [23] 1/-1/-1->0->-1 +n124-112-200:54101:55805 [6] NCCL INFO Ring 21 : 5 -> 6 -> 0 +n124-112-200:54101:55805 [6] NCCL INFO Ring 22 : 5 -> 6 -> 0 +n124-112-200:54101:55805 [6] NCCL INFO Ring 23 : 5 -> 6 -> 0 +n124-112-200:54095:55804 [0] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54101:55805 [6] NCCL INFO Trees [0] -1/-1/-1->6->5 [1] -1/-1/-1->6->5 [2] -1/-1/-1->6->5 [3] -1/-1/-1->6->5 [4] -1/-1/-1->6->5 [5] -1/-1/-1->6->5 [6] -1/-1/-1->6->5 [7] -1/-1/-1->6->5 [8] -1/-1/-1->6->5 [9] -1/-1/-1->6->5 [10] -1/-1/-1->6->5 [11] -1/-1/-1->6->5 [12] -1/-1/-1->6->5 [13] -1/-1/-1->6->5 [14] -1/-1/-1->6->5 [15] -1/-1/-1->6->5 [16] -1/-1/-1->6->5 [17] -1/-1/-1->6->5 [18] -1/-1/-1->6->5 [19] -1/-1/-1->6->5 [20] -1/-1/-1->6->5 [21] -1/-1/-1->6->5 [22] -1/-1/-1->6->5 [23] -1/-1/-1->6->5 +n124-112-200:54101:55805 [6] NCCL INFO NCCL_BUFFSIZE set by environment to 4194304. +n124-112-200:54101:55805 [6] NCCL INFO NCCL_P2P_NVL_CHUNKSIZE set by environment to 1048576. +n124-112-200:54101:55805 [6] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54097:55810 [2] NCCL INFO Channel 10/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 11/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 12/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 13/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 14/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 15/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 16/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 17/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 18/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 19/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 20/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 21/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 22/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 00/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 23/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 01/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 02/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 03/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 04/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 05/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 06/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 07/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 08/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 09/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 10/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 11/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 12/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 13/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 14/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 15/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 16/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 17/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 18/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 00/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 19/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 01/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 20/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 02/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 21/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 03/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 22/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 04/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 23/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 05/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 06/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 07/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 08/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 09/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 10/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 11/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 12/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 13/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 14/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 15/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 16/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 17/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 18/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 19/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 20/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 21/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 22/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Channel 23/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 00/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 01/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 02/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 03/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 04/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 05/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 00/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 01/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 02/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 06/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 03/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 04/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 05/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 00/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 06/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 07/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 08/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 09/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 10/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 11/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 12/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 13/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 14/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 15/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 07/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 16/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 17/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 01/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 18/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 19/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 20/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 21/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 22/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 23/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 00/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 08/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 02/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 01/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 03/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 02/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 09/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 04/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 03/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 10/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 04/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 11/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 05/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 12/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 05/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 06/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 06/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 07/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 13/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 07/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 08/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 14/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 09/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 08/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 15/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 10/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 11/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 09/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 16/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 10/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 17/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 12/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 18/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 11/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 13/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 19/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 12/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 14/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 20/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 21/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 22/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 13/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 15/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 23/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 16/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 14/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 17/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 15/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 18/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 16/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 19/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 17/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 20/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 18/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 19/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 21/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 22/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 20/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 23/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 21/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 22/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 23/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Connected all rings +n124-112-200:54101:55805 [6] NCCL INFO Connected all rings +n124-112-200:54101:55805 [6] NCCL INFO Channel 00/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 01/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 02/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 03/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 04/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 05/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 06/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Connected all rings +n124-112-200:54096:55836 [1] NCCL INFO Connected all rings +n124-112-200:54101:55805 [6] NCCL INFO Channel 07/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Connected all rings +n124-112-200:54098:55806 [3] NCCL INFO Connected all rings +n124-112-200:54099:55809 [4] NCCL INFO Connected all rings +n124-112-200:54101:55805 [6] NCCL INFO Channel 08/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 09/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 10/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 11/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 12/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 13/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 14/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 15/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 16/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 17/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 00/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 01/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 02/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 03/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 04/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 05/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 06/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 18/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 07/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 08/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 09/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 10/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 11/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 12/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 13/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 14/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 15/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 16/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 17/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 19/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 18/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 19/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 20/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 21/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 22/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:55807 [5] NCCL INFO Channel 23/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 20/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 21/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 22/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 00/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 01/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54101:55805 [6] NCCL INFO Channel 23/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 02/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 03/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 04/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 05/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 06/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 07/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 08/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 09/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 10/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 00/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 11/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 01/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 12/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 02/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 00/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 00/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 01/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 13/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 02/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 03/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 03/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 04/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 05/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 06/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 07/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 08/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 01/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 09/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 10/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 11/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 14/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 12/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 13/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 04/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 14/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 15/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 16/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 17/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 18/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 02/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 19/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 20/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 21/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 22/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 15/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54096:55836 [1] NCCL INFO Channel 23/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 05/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 16/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 06/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 03/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 04/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 17/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 05/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 07/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 18/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 08/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 06/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 19/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 07/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 09/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 20/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 10/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 08/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 21/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 22/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 11/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 09/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:55810 [2] NCCL INFO Channel 23/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 12/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 10/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 13/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 14/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 11/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 15/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 12/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 16/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 13/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 17/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 14/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 18/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 19/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 15/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 20/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 16/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 17/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 21/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 18/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 19/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 22/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 20/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 21/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:55806 [3] NCCL INFO Channel 23/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 22/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54099:55809 [4] NCCL INFO Channel 23/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54095:55804 [0] NCCL INFO Connected all trees +n124-112-200:54095:55804 [0] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54095:55804 [0] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54095:55804 [0] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54095:55804 [0] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54096:55836 [1] NCCL INFO Connected all trees +n124-112-200:54096:55836 [1] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54096:55836 [1] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54096:55836 [1] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54096:55836 [1] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54101:55805 [6] NCCL INFO Connected all trees +n124-112-200:54100:55807 [5] NCCL INFO Connected all trees +n124-112-200:54100:55807 [5] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54101:55805 [6] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54100:55807 [5] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54101:55805 [6] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54100:55807 [5] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54101:55805 [6] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54100:55807 [5] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54101:55805 [6] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54097:55810 [2] NCCL INFO Connected all trees +n124-112-200:54097:55810 [2] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54097:55810 [2] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54097:55810 [2] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54097:55810 [2] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54099:55809 [4] NCCL INFO Connected all trees +n124-112-200:54098:55806 [3] NCCL INFO Connected all trees +n124-112-200:54099:55809 [4] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54099:55809 [4] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54098:55806 [3] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54099:55809 [4] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54098:55806 [3] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54099:55809 [4] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54098:55806 [3] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54098:55806 [3] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54099:55809 [4] NCCL INFO TUNER/Plugin: NCCL_TUNER_PLUGIN set to libnccl-tuner.so +n124-112-200:54095:55804 [0] NCCL INFO ncclCommInitRank comm 0xcad968b0 rank 0 nranks 7 cudaDev 0 nvmlDev 1 busId 5000 commId 0xc9abc278b27d692f - Init COMPLETE +n124-112-200:54096:55836 [1] NCCL INFO TUNER/Plugin: NCCL_TUNER_PLUGIN set to libnccl-tuner.so +n124-112-200:54101:55805 [6] NCCL INFO TUNER/Plugin: NCCL_TUNER_PLUGIN set to libnccl-tuner.so +n124-112-200:54100:55807 [5] NCCL INFO TUNER/Plugin: NCCL_TUNER_PLUGIN set to libnccl-tuner.so +n124-112-200:54097:55810 [2] NCCL INFO TUNER/Plugin: NCCL_TUNER_PLUGIN set to libnccl-tuner.so +n124-112-200:54098:55806 [3] NCCL INFO TUNER/Plugin: NCCL_TUNER_PLUGIN set to libnccl-tuner.so +n124-112-200:54097:55810 [2] NCCL INFO TUNER/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54097:55810 [2] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2, using internal tuner instead. +n124-112-200:54097:55810 [2] NCCL INFO ncclCommInitRank comm 0x23947350 rank 2 nranks 7 cudaDev 2 nvmlDev 3 busId c000 commId 0xc9abc278b27d692f - Init COMPLETE +n124-112-200:54101:55805 [6] NCCL INFO TUNER/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54101:55805 [6] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2, using internal tuner instead. +n124-112-200:54101:55805 [6] NCCL INFO ncclCommInitRank comm 0xc409e9d0 rank 6 nranks 7 cudaDev 6 nvmlDev 7 busId 8c000 commId 0xc9abc278b27d692f - Init COMPLETE +n124-112-200:54099:55809 [4] NCCL INFO TUNER/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54099:55809 [4] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2, using internal tuner instead. +n124-112-200:54099:55809 [4] NCCL INFO ncclCommInitRank comm 0x37e31620 rank 4 nranks 7 cudaDev 4 nvmlDev 5 busId 85000 commId 0xc9abc278b27d692f - Init COMPLETE +n124-112-200:54098:55806 [3] NCCL INFO TUNER/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54098:55806 [3] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2, using internal tuner instead. +n124-112-200:54098:55806 [3] NCCL INFO ncclCommInitRank comm 0x4f6041b0 rank 3 nranks 7 cudaDev 3 nvmlDev 4 busId 84000 commId 0xc9abc278b27d692f - Init COMPLETE +n124-112-200:54096:55836 [1] NCCL INFO TUNER/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54096:55836 [1] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2, using internal tuner instead. +n124-112-200:54096:55836 [1] NCCL INFO ncclCommInitRank comm 0xc406a3a0 rank 1 nranks 7 cudaDev 1 nvmlDev 2 busId b000 commId 0xc9abc278b27d692f - Init COMPLETE +n124-112-200:54100:55807 [5] NCCL INFO TUNER/Plugin: Plugin name set by env to libnccl-net-gcp-fastrak.so +n124-112-200:54100:55807 [5] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2, using internal tuner instead. +n124-112-200:54100:55807 [5] NCCL INFO ncclCommInitRank comm 0xc6db1170 rank 5 nranks 7 cudaDev 5 nvmlDev 6 busId 8b000 commId 0xc9abc278b27d692f - Init COMPLETE +n124-112-200:54095:56100 [0] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54095:56100 [0] NCCL INFO Using network FasTrak +n124-112-200:54100:56102 [5] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54101:56106 [6] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54096:56105 [1] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54098:56104 [3] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54100:56102 [5] NCCL INFO Using network FasTrak +n124-112-200:54099:56101 [4] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54097:56103 [2] NCCL INFO Using non-device net plugin version 7 +n124-112-200:54101:56106 [6] NCCL INFO Using network FasTrak +n124-112-200:54096:56105 [1] NCCL INFO Using network FasTrak +n124-112-200:54098:56104 [3] NCCL INFO Using network FasTrak +n124-112-200:54099:56101 [4] NCCL INFO Using network FasTrak +n124-112-200:54097:56103 [2] NCCL INFO Using network FasTrak +n124-112-200:54095:56100 [0] NCCL INFO DMA-BUF is available on GPU device 0 +n124-112-200:54100:56102 [5] NCCL INFO DMA-BUF is available on GPU device 5 +n124-112-200:54099:56101 [4] NCCL INFO DMA-BUF is available on GPU device 4 +n124-112-200:54101:56106 [6] NCCL INFO DMA-BUF is available on GPU device 6 +n124-112-200:54098:56104 [3] NCCL INFO DMA-BUF is available on GPU device 3 +n124-112-200:54096:56105 [1] NCCL INFO DMA-BUF is available on GPU device 1 +n124-112-200:54097:56103 [2] NCCL INFO DMA-BUF is available on GPU device 2 +n124-112-200:54101:56106 [6] NCCL INFO bootstrapSplit: comm 0xc1698160 parent 0xc409e9d0 rank 6 nranks 7 color -1326228412 key 6 prev 5 next 0 - DONE +n124-112-200:54100:56102 [5] NCCL INFO bootstrapSplit: comm 0x102f54c0 parent 0xc6db1170 rank 5 nranks 7 color -1326228412 key 5 prev 4 next 6 - DONE +n124-112-200:54096:56105 [1] NCCL INFO bootstrapSplit: comm 0xc408b450 parent 0xc406a3a0 rank 1 nranks 7 color -1326228412 key 1 prev 0 next 2 - DONE +n124-112-200:54095:56100 [0] NCCL INFO bootstrapSplit: comm 0x3e453450 parent 0xcad968b0 rank 0 nranks 7 color -1326228412 key 0 prev 6 next 1 - DONE +n124-112-200:54096:56105 [1] NCCL INFO ncclCommSplit comm 0xc408b450 rank 1 nranks 7 cudaDev 1 nvmlDev 2 busId b000 parent 0xc406a3a0 color -1326228412 key 1 commId 0xfd728d2be815c138 - Init START +n124-112-200:54095:56100 [0] NCCL INFO ncclCommSplit comm 0x3e453450 rank 0 nranks 7 cudaDev 0 nvmlDev 1 busId 5000 parent 0xcad968b0 color -1326228412 key 0 commId 0xfd728d2be815c138 - Init START +n124-112-200:54100:56102 [5] NCCL INFO ncclCommSplit comm 0x102f54c0 rank 5 nranks 7 cudaDev 5 nvmlDev 6 busId 8b000 parent 0xc6db1170 color -1326228412 key 5 commId 0xfd728d2be815c138 - Init START +n124-112-200:54101:56106 [6] NCCL INFO ncclCommSplit comm 0xc1698160 rank 6 nranks 7 cudaDev 6 nvmlDev 7 busId 8c000 parent 0xc409e9d0 color -1326228412 key 6 commId 0xfd728d2be815c138 - Init START +n124-112-200:54099:56101 [4] NCCL INFO bootstrapSplit: comm 0xeff4ee0 parent 0x37e31620 rank 4 nranks 7 color -1326228412 key 4 prev 3 next 5 - DONE +n124-112-200:54099:56101 [4] NCCL INFO ncclCommSplit comm 0xeff4ee0 rank 4 nranks 7 cudaDev 4 nvmlDev 5 busId 85000 parent 0x37e31620 color -1326228412 key 4 commId 0xfd728d2be815c138 - Init START +n124-112-200:54097:56103 [2] NCCL INFO bootstrapSplit: comm 0x384d8de0 parent 0x23947350 rank 2 nranks 7 color -1326228412 key 2 prev 1 next 3 - DONE +n124-112-200:54097:56103 [2] NCCL INFO ncclCommSplit comm 0x384d8de0 rank 2 nranks 7 cudaDev 2 nvmlDev 3 busId c000 parent 0x23947350 color -1326228412 key 2 commId 0xfd728d2be815c138 - Init START +n124-112-200:54098:56104 [3] NCCL INFO bootstrapSplit: comm 0x3cb19f80 parent 0x4f6041b0 rank 3 nranks 7 color -1326228412 key 3 prev 2 next 4 - DONE +n124-112-200:54098:56104 [3] NCCL INFO ncclCommSplit comm 0x3cb19f80 rank 3 nranks 7 cudaDev 3 nvmlDev 4 busId 84000 parent 0x4f6041b0 color -1326228412 key 3 commId 0xfd728d2be815c138 - Init START +n124-112-200:54100:56102 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:56106 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:56100 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:56101 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:56105 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:56104 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:56103 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:56102 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:56106 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:56100 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:56101 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:56105 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:56104 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:56103 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:56102 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:56106 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:56100 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:56101 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:56105 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:56104 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:56103 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:56102 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:56106 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:56100 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:56101 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:56105 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:56104 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:56103 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:56102 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:56106 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:56103 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:56101 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:56100 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:56104 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:56105 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:56102 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:56106 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:56103 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:56101 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:56100 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54098:56104 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:56105 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:56102 [5] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54100:56102 [5] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54100:56102 [5] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54100:56102 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54100:56102 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54100:56102 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54100:56102 [5] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54100:56102 [5] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54100:56102 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54100:56102 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54100:56102 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54100:56102 [5] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54100:56102 [5] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54100:56102 [5] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54100:56102 [5] NCCL INFO ========================================== +n124-112-200:54100:56102 [5] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54100:56102 [5] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54100:56102 [5] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54100:56102 [5] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54100:56102 [5] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54100:56102 [5] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54100:56102 [5] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54100:56102 [5] NCCL INFO Setting affinity for GPU 6 to ffff,ffffffff,f0000000,000000ff,ffffffff,fff00000,00000000 +n124-112-200:54100:56102 [5] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54100:56102 [5] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54100:56102 [5] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54100:56102 [5] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:56106 [6] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54101:56106 [6] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54101:56106 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54101:56106 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54101:56106 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54101:56106 [6] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54101:56106 [6] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54101:56106 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54101:56106 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54101:56106 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54101:56106 [6] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54101:56106 [6] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54101:56106 [6] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54101:56106 [6] NCCL INFO ========================================== +n124-112-200:54101:56106 [6] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54101:56106 [6] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54101:56106 [6] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54101:56106 [6] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:56106 [6] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:56106 [6] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:56106 [6] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54101:56106 [6] NCCL INFO Setting affinity for GPU 7 to ffff,ffffffff,f0000000,000000ff,ffffffff,fff00000,00000000 +n124-112-200:54097:56103 [2] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54101:56106 [6] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54101:56106 [6] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54101:56106 [6] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54097:56103 [2] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54097:56103 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54097:56103 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54097:56103 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54097:56103 [2] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54097:56103 [2] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54097:56103 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54097:56103 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54097:56103 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54097:56103 [2] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54097:56103 [2] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54097:56103 [2] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54097:56103 [2] NCCL INFO ========================================== +n124-112-200:54097:56103 [2] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54097:56103 [2] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54097:56103 [2] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54097:56103 [2] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54097:56103 [2] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54097:56103 [2] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54097:56103 [2] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54097:56103 [2] NCCL INFO Setting affinity for GPU 3 to 0fffffff,ffffff00,00000000,000fffff,ffffffff +n124-112-200:54099:56101 [4] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54095:56100 [0] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54097:56103 [2] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54097:56103 [2] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54097:56103 [2] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54097:56103 [2] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54099:56101 [4] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54099:56101 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54099:56101 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54099:56101 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54099:56101 [4] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54099:56101 [4] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54099:56101 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54099:56101 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54099:56101 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54099:56101 [4] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54099:56101 [4] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54099:56101 [4] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54099:56101 [4] NCCL INFO ========================================== +n124-112-200:54099:56101 [4] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54099:56101 [4] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54099:56101 [4] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54099:56101 [4] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54099:56101 [4] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54099:56101 [4] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54099:56101 [4] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54099:56101 [4] NCCL INFO Setting affinity for GPU 5 to ffff,ffffffff,f0000000,000000ff,ffffffff,fff00000,00000000 +n124-112-200:54095:56100 [0] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54095:56100 [0] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54095:56100 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54095:56100 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54095:56100 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54095:56100 [0] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54095:56100 [0] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54095:56100 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54095:56100 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54095:56100 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54095:56100 [0] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54095:56100 [0] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54095:56100 [0] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54095:56100 [0] NCCL INFO ========================================== +n124-112-200:54095:56100 [0] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54095:56100 [0] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54095:56100 [0] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54095:56100 [0] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54095:56100 [0] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54095:56100 [0] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54095:56100 [0] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54095:56100 [0] NCCL INFO Setting affinity for GPU 1 to 0fffffff,ffffff00,00000000,000fffff,ffffffff +n124-112-200:54098:56104 [3] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54096:56105 [1] NCCL INFO Could not find real path of /sys/class/pci_bus/fffffff/../../fffffff:ff:f +n124-112-200:54099:56101 [4] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54099:56101 [4] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54099:56101 [4] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54099:56101 [4] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54098:56104 [3] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54098:56104 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54098:56104 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54098:56104 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54098:56104 [3] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54098:56104 [3] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54098:56104 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54098:56104 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54098:56104 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54098:56104 [3] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54098:56104 [3] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54098:56104 [3] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54098:56104 [3] NCCL INFO ========================================== +n124-112-200:54098:56104 [3] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54098:56104 [3] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54098:56104 [3] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54098:56104 [3] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54098:56104 [3] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54098:56104 [3] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54098:56104 [3] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54098:56104 [3] NCCL INFO Setting affinity for GPU 4 to ffff,ffffffff,f0000000,000000ff,ffffffff,fff00000,00000000 +n124-112-200:54096:56105 [1] NCCL INFO === System : maxBw 370.8 totalBw 370.8 === +n124-112-200:54096:56105 [1] NCCL INFO CPU/0-0 (1/1/2) +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - PCI/0-2000 (10b5879610b58796) +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - GPU/0-5000 (0) +n124-112-200:54096:56105 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - NIC/0-6000 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - NIC/0-7000 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - PCI/0-9000 (10b5879610b58796) +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - GPU/0-b000 (1) +n124-112-200:54096:56105 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - GPU/0-c000 (2) +n124-112-200:54096:56105 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - NIC/0-d000 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - NIC/0-e000 +n124-112-200:54096:56105 [1] NCCL INFO + SYS[10.0] - CPU/1 +n124-112-200:54096:56105 [1] NCCL INFO CPU/0-1 (1/1/2) +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - PCI/0-82000 (10b5879610b58796) +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - GPU/0-84000 (3) +n124-112-200:54096:56105 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - GPU/0-85000 (4) +n124-112-200:54096:56105 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - NIC/0-86000 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - NIC/0-87000 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - PCI/0-89000 (10b5879610b58796) +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - GPU/0-8b000 (5) +n124-112-200:54096:56105 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - GPU/0-8c000 (6) +n124-112-200:54096:56105 [1] NCCL INFO + NVL[370.8] - NVS/0 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - NIC/0-8d000 +n124-112-200:54096:56105 [1] NCCL INFO + PCI[24.0] - NIC/0-8e000 +n124-112-200:54096:56105 [1] NCCL INFO + SYS[10.0] - CPU/0 +n124-112-200:54096:56105 [1] NCCL INFO ========================================== +n124-112-200:54096:56105 [1] NCCL INFO GPU/5000 :GPU/0-5000 (0/5000.0/LOC) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54096:56105 [1] NCCL INFO GPU/B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (0/5000.0/LOC) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54096:56105 [1] NCCL INFO GPU/C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (0/5000.0/LOC) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (2/24.0/PHB) CPU/0-1 (3/10.0/SYS) +n124-112-200:54096:56105 [1] NCCL INFO GPU/84000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (0/5000.0/LOC) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54096:56105 [1] NCCL INFO GPU/85000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (0/5000.0/LOC) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54096:56105 [1] NCCL INFO GPU/8B000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (0/5000.0/LOC) GPU/0-8c000 (2/370.8/NVL) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54096:56105 [1] NCCL INFO GPU/8C000 :GPU/0-5000 (2/370.8/NVL) GPU/0-b000 (2/370.8/NVL) GPU/0-c000 (2/370.8/NVL) GPU/0-84000 (2/370.8/NVL) GPU/0-85000 (2/370.8/NVL) GPU/0-8b000 (2/370.8/NVL) GPU/0-8c000 (0/5000.0/LOC) NVS/0-0 (1/370.8/NVL) CPU/0-0 (3/10.0/SYS) CPU/0-1 (2/24.0/PHB) +n124-112-200:54096:56105 [1] NCCL INFO Setting affinity for GPU 2 to 0fffffff,ffffff00,00000000,000fffff,ffffffff +n124-112-200:54095:56100 [0] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54095:56100 [0] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54095:56100 [0] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54095:56100 [0] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54096:56105 [1] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54096:56105 [1] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54096:56105 [1] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO Pattern 4, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54098:56104 [3] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO Pattern 1, crossNic 0, nChannels 12, bw 30.000000/30.000000, type NVL/PIX, sameChannels 1 +n124-112-200:54098:56104 [3] NCCL INFO 0 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 1 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 2 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 3 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 4 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 5 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 6 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 7 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 8 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 9 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 10 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54098:56104 [3] NCCL INFO 11 : GPU/0 GPU/1 GPU/2 GPU/3 GPU/4 GPU/5 GPU/6 +n124-112-200:54101:56106 [6] NCCL INFO comm 0xc1698160 rank 6 nRanks 7 nNodes 1 localRanks 7 localRank 6 MNNVL 0 +n124-112-200:54100:56102 [5] NCCL INFO comm 0x102f54c0 rank 5 nRanks 7 nNodes 1 localRanks 7 localRank 5 MNNVL 0 +n124-112-200:54101:56106 [6] NCCL INFO Ring 00 : 5 -> 6 -> 0 +n124-112-200:54101:56106 [6] NCCL INFO Ring 01 : 5 -> 6 -> 0 +n124-112-200:54101:56106 [6] NCCL INFO Ring 02 : 5 -> 6 -> 0 +n124-112-200:54100:56102 [5] NCCL INFO Ring 00 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 03 : 5 -> 6 -> 0 +n124-112-200:54100:56102 [5] NCCL INFO Ring 01 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 04 : 5 -> 6 -> 0 +n124-112-200:54100:56102 [5] NCCL INFO Ring 02 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 05 : 5 -> 6 -> 0 +n124-112-200:54100:56102 [5] NCCL INFO Ring 03 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 06 : 5 -> 6 -> 0 +n124-112-200:54100:56102 [5] NCCL INFO Ring 04 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 07 : 5 -> 6 -> 0 +n124-112-200:54099:56101 [4] NCCL INFO comm 0xeff4ee0 rank 4 nRanks 7 nNodes 1 localRanks 7 localRank 4 MNNVL 0 +n124-112-200:54100:56102 [5] NCCL INFO Ring 05 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 08 : 5 -> 6 -> 0 +n124-112-200:54095:56100 [0] NCCL INFO comm 0x3e453450 rank 0 nRanks 7 nNodes 1 localRanks 7 localRank 0 MNNVL 0 +n124-112-200:54096:56105 [1] NCCL INFO comm 0xc408b450 rank 1 nRanks 7 nNodes 1 localRanks 7 localRank 1 MNNVL 0 +n124-112-200:54100:56102 [5] NCCL INFO Ring 06 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 09 : 5 -> 6 -> 0 +n124-112-200:54098:56104 [3] NCCL INFO comm 0x3cb19f80 rank 3 nRanks 7 nNodes 1 localRanks 7 localRank 3 MNNVL 0 +n124-112-200:54100:56102 [5] NCCL INFO Ring 07 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 10 : 5 -> 6 -> 0 +n124-112-200:54100:56102 [5] NCCL INFO Ring 08 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 11 : 5 -> 6 -> 0 +n124-112-200:54097:56103 [2] NCCL INFO comm 0x384d8de0 rank 2 nRanks 7 nNodes 1 localRanks 7 localRank 2 MNNVL 0 +n124-112-200:54100:56102 [5] NCCL INFO Ring 09 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 12 : 5 -> 6 -> 0 +n124-112-200:54096:56105 [1] NCCL INFO Tree 0 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 0 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54100:56102 [5] NCCL INFO Ring 10 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 13 : 5 -> 6 -> 0 +n124-112-200:54096:56105 [1] NCCL INFO Tree 12 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54099:56101 [4] NCCL INFO Ring 00 : 3 -> 4 -> 5 +n124-112-200:54095:56100 [0] NCCL INFO Tree 12 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54098:56104 [3] NCCL INFO Ring 00 : 2 -> 3 -> 4 +n124-112-200:54097:56103 [2] NCCL INFO Ring 00 : 1 -> 2 -> 3 +n124-112-200:54100:56102 [5] NCCL INFO Ring 11 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 14 : 5 -> 6 -> 0 +n124-112-200:54096:56105 [1] NCCL INFO Tree 1 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54101:56106 [6] NCCL INFO Ring 15 : 5 -> 6 -> 0 +n124-112-200:54099:56101 [4] NCCL INFO Ring 01 : 3 -> 4 -> 5 +n124-112-200:54095:56100 [0] NCCL INFO Tree 1 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54098:56104 [3] NCCL INFO Ring 01 : 2 -> 3 -> 4 +n124-112-200:54095:56100 [0] NCCL INFO Tree 13 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54097:56103 [2] NCCL INFO Ring 01 : 1 -> 2 -> 3 +n124-112-200:54100:56102 [5] NCCL INFO Ring 12 : 4 -> 5 -> 6 +n124-112-200:54096:56105 [1] NCCL INFO Tree 13 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54097:56103 [2] NCCL INFO Ring 02 : 1 -> 2 -> 3 +n124-112-200:54100:56102 [5] NCCL INFO Ring 13 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 16 : 5 -> 6 -> 0 +n124-112-200:54099:56101 [4] NCCL INFO Ring 02 : 3 -> 4 -> 5 +n124-112-200:54098:56104 [3] NCCL INFO Ring 02 : 2 -> 3 -> 4 +n124-112-200:54095:56100 [0] NCCL INFO Tree 2 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 2 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54097:56103 [2] NCCL INFO Ring 03 : 1 -> 2 -> 3 +n124-112-200:54100:56102 [5] NCCL INFO Ring 14 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 17 : 5 -> 6 -> 0 +n124-112-200:54097:56103 [2] NCCL INFO Ring 04 : 1 -> 2 -> 3 +n124-112-200:54100:56102 [5] NCCL INFO Ring 15 : 4 -> 5 -> 6 +n124-112-200:54101:56106 [6] NCCL INFO Ring 18 : 5 -> 6 -> 0 +n124-112-200:54099:56101 [4] NCCL INFO Ring 03 : 3 -> 4 -> 5 +n124-112-200:54101:56106 [6] NCCL INFO Ring 19 : 5 -> 6 -> 0 +n124-112-200:54098:56104 [3] NCCL INFO Ring 03 : 2 -> 3 -> 4 +n124-112-200:54095:56100 [0] NCCL INFO Tree 14 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 14 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54098:56104 [3] NCCL INFO Ring 04 : 2 -> 3 -> 4 +n124-112-200:54095:56100 [0] NCCL INFO Tree 3 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54097:56103 [2] NCCL INFO Ring 05 : 1 -> 2 -> 3 +n124-112-200:54098:56104 [3] NCCL INFO Ring 05 : 2 -> 3 -> 4 +n124-112-200:54097:56103 [2] NCCL INFO Ring 06 : 1 -> 2 -> 3 +n124-112-200:54100:56102 [5] NCCL INFO Ring 16 : 4 -> 5 -> 6 +n124-112-200:54097:56103 [2] NCCL INFO Ring 07 : 1 -> 2 -> 3 +n124-112-200:54099:56101 [4] NCCL INFO Ring 04 : 3 -> 4 -> 5 +n124-112-200:54101:56106 [6] NCCL INFO Ring 20 : 5 -> 6 -> 0 +n124-112-200:54096:56105 [1] NCCL INFO Tree 3 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 15 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54101:56106 [6] NCCL INFO Ring 21 : 5 -> 6 -> 0 +n124-112-200:54096:56105 [1] NCCL INFO Tree 15 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 4 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54101:56106 [6] NCCL INFO Ring 22 : 5 -> 6 -> 0 +n124-112-200:54095:56100 [0] NCCL INFO Tree 16 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54098:56104 [3] NCCL INFO Ring 06 : 2 -> 3 -> 4 +n124-112-200:54100:56102 [5] NCCL INFO Ring 17 : 4 -> 5 -> 6 +n124-112-200:54099:56101 [4] NCCL INFO Ring 05 : 3 -> 4 -> 5 +n124-112-200:54097:56103 [2] NCCL INFO Ring 08 : 1 -> 2 -> 3 +n124-112-200:54096:56105 [1] NCCL INFO Tree 4 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54101:56106 [6] NCCL INFO Ring 23 : 5 -> 6 -> 0 +n124-112-200:54095:56100 [0] NCCL INFO Tree 5 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54098:56104 [3] NCCL INFO Ring 07 : 2 -> 3 -> 4 +n124-112-200:54095:56100 [0] NCCL INFO Tree 17 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54100:56102 [5] NCCL INFO Ring 18 : 4 -> 5 -> 6 +n124-112-200:54099:56101 [4] NCCL INFO Ring 06 : 3 -> 4 -> 5 +n124-112-200:54097:56103 [2] NCCL INFO Ring 09 : 1 -> 2 -> 3 +n124-112-200:54096:56105 [1] NCCL INFO Tree 16 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54098:56104 [3] NCCL INFO Ring 08 : 2 -> 3 -> 4 +n124-112-200:54101:56106 [6] NCCL INFO Trees [0] -1/-1/-1->6->5 [1] -1/-1/-1->6->5 [2] -1/-1/-1->6->5 [3] -1/-1/-1->6->5 [4] -1/-1/-1->6->5 [5] -1/-1/-1->6->5 [6] -1/-1/-1->6->5 [7] -1/-1/-1->6->5 [8] -1/-1/-1->6->5 [9] -1/-1/-1->6->5 [10] -1/-1/-1->6->5 [11] -1/-1/-1->6->5 [12] -1/-1/-1->6->5 [13] -1/-1/-1->6->5 [14] -1/-1/-1->6->5 [15] -1/-1/-1->6->5 [16] -1/-1/-1->6->5 [17] -1/-1/-1->6->5 [18] -1/-1/-1->6->5 [19] -1/-1/-1->6->5 [20] -1/-1/-1->6->5 [21] -1/-1/-1->6->5 [22] -1/-1/-1->6->5 [23] -1/-1/-1->6->5 +n124-112-200:54101:56106 [6] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54095:56100 [0] NCCL INFO Tree 6 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 18 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 7 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 19 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 8 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 20 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 9 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 21 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 10 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 22 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 11 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Tree 23 : -1 -> 0 -> 1/-1/-1 +n124-112-200:54095:56100 [0] NCCL INFO Channel 00/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 01/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 02/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 03/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 04/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 05/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 06/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 07/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 08/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 09/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 10/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 11/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 12/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 13/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 14/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 15/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 16/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 17/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 18/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 19/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 20/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 21/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 22/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Channel 23/24 : 0 1 2 3 4 5 6 +n124-112-200:54095:56100 [0] NCCL INFO Ring 00 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 01 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 02 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 03 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 04 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 05 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 06 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 07 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 08 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 09 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 10 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 11 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 12 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 13 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 14 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 15 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 16 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 17 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 18 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 19 : 6 -> 0 -> 1 +n124-112-200:54100:56102 [5] NCCL INFO Ring 19 : 4 -> 5 -> 6 +n124-112-200:54097:56103 [2] NCCL INFO Ring 10 : 1 -> 2 -> 3 +n124-112-200:54100:56102 [5] NCCL INFO Ring 20 : 4 -> 5 -> 6 +n124-112-200:54100:56102 [5] NCCL INFO Ring 21 : 4 -> 5 -> 6 +n124-112-200:54100:56102 [5] NCCL INFO Ring 22 : 4 -> 5 -> 6 +n124-112-200:54100:56102 [5] NCCL INFO Ring 23 : 4 -> 5 -> 6 +n124-112-200:54100:56102 [5] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/-1/-1->5->4 [2] 6/-1/-1->5->4 [3] 6/-1/-1->5->4 [4] 6/-1/-1->5->4 [5] 6/-1/-1->5->4 [6] 6/-1/-1->5->4 [7] 6/-1/-1->5->4 [8] 6/-1/-1->5->4 [9] 6/-1/-1->5->4 [10] 6/-1/-1->5->4 [11] 6/-1/-1->5->4 [12] 6/-1/-1->5->4 [13] 6/-1/-1->5->4 [14] 6/-1/-1->5->4 [15] 6/-1/-1->5->4 [16] 6/-1/-1->5->4 [17] 6/-1/-1->5->4 [18] 6/-1/-1->5->4 [19] 6/-1/-1->5->4 [20] 6/-1/-1->5->4 [21] 6/-1/-1->5->4 [22] 6/-1/-1->5->4 [23] 6/-1/-1->5->4 +n124-112-200:54097:56103 [2] NCCL INFO Ring 11 : 1 -> 2 -> 3 +n124-112-200:54100:56102 [5] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54097:56103 [2] NCCL INFO Ring 12 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 13 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 14 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 15 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 16 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 17 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 18 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 19 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 20 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 21 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 22 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Ring 23 : 1 -> 2 -> 3 +n124-112-200:54097:56103 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 [2] 3/-1/-1->2->1 [3] 3/-1/-1->2->1 [4] 3/-1/-1->2->1 [5] 3/-1/-1->2->1 [6] 3/-1/-1->2->1 [7] 3/-1/-1->2->1 [8] 3/-1/-1->2->1 [9] 3/-1/-1->2->1 [10] 3/-1/-1->2->1 [11] 3/-1/-1->2->1 [12] 3/-1/-1->2->1 [13] 3/-1/-1->2->1 [14] 3/-1/-1->2->1 [15] 3/-1/-1->2->1 [16] 3/-1/-1->2->1 [17] 3/-1/-1->2->1 [18] 3/-1/-1->2->1 [19] 3/-1/-1->2->1 [20] 3/-1/-1->2->1 [21] 3/-1/-1->2->1 [22] 3/-1/-1->2->1 [23] 3/-1/-1->2->1 +n124-112-200:54097:56103 [2] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54099:56101 [4] NCCL INFO Ring 07 : 3 -> 4 -> 5 +n124-112-200:54096:56105 [1] NCCL INFO Tree 5 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54099:56101 [4] NCCL INFO Ring 08 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 09 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 10 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 11 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 12 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 13 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 14 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 15 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 16 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 17 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 18 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 19 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 20 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 21 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 22 : 3 -> 4 -> 5 +n124-112-200:54099:56101 [4] NCCL INFO Ring 23 : 3 -> 4 -> 5 +n124-112-200:54096:56105 [1] NCCL INFO Tree 17 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54099:56101 [4] NCCL INFO Trees [0] 5/-1/-1->4->3 [1] 5/-1/-1->4->3 [2] 5/-1/-1->4->3 [3] 5/-1/-1->4->3 [4] 5/-1/-1->4->3 [5] 5/-1/-1->4->3 [6] 5/-1/-1->4->3 [7] 5/-1/-1->4->3 [8] 5/-1/-1->4->3 [9] 5/-1/-1->4->3 [10] 5/-1/-1->4->3 [11] 5/-1/-1->4->3 [12] 5/-1/-1->4->3 [13] 5/-1/-1->4->3 [14] 5/-1/-1->4->3 [15] 5/-1/-1->4->3 [16] 5/-1/-1->4->3 [17] 5/-1/-1->4->3 [18] 5/-1/-1->4->3 [19] 5/-1/-1->4->3 [20] 5/-1/-1->4->3 [21] 5/-1/-1->4->3 [22] 5/-1/-1->4->3 [23] 5/-1/-1->4->3 +n124-112-200:54096:56105 [1] NCCL INFO Tree 6 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 18 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54099:56101 [4] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54096:56105 [1] NCCL INFO Tree 7 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54098:56104 [3] NCCL INFO Ring 09 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Tree 19 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 8 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 20 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 9 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 21 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 10 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 22 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 11 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54096:56105 [1] NCCL INFO Tree 23 : 0 -> 1 -> 2/-1/-1 +n124-112-200:54098:56104 [3] NCCL INFO Ring 10 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 00 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 11 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 01 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 12 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 02 : 0 -> 1 -> 2 +n124-112-200:54096:56105 [1] NCCL INFO Ring 03 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 13 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 04 : 0 -> 1 -> 2 +n124-112-200:54096:56105 [1] NCCL INFO Ring 05 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 14 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 06 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 15 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 07 : 0 -> 1 -> 2 +n124-112-200:54096:56105 [1] NCCL INFO Ring 08 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 16 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 09 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 17 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 10 : 0 -> 1 -> 2 +n124-112-200:54096:56105 [1] NCCL INFO Ring 11 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 18 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 12 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 19 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 13 : 0 -> 1 -> 2 +n124-112-200:54096:56105 [1] NCCL INFO Ring 14 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 20 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 15 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 21 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 16 : 0 -> 1 -> 2 +n124-112-200:54096:56105 [1] NCCL INFO Ring 17 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 22 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 18 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Ring 23 : 2 -> 3 -> 4 +n124-112-200:54096:56105 [1] NCCL INFO Ring 19 : 0 -> 1 -> 2 +n124-112-200:54096:56105 [1] NCCL INFO Ring 20 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO Trees [0] 4/-1/-1->3->2 [1] 4/-1/-1->3->2 [2] 4/-1/-1->3->2 [3] 4/-1/-1->3->2 [4] 4/-1/-1->3->2 [5] 4/-1/-1->3->2 [6] 4/-1/-1->3->2 [7] 4/-1/-1->3->2 [8] 4/-1/-1->3->2 [9] 4/-1/-1->3->2 [10] 4/-1/-1->3->2 [11] 4/-1/-1->3->2 [12] 4/-1/-1->3->2 [13] 4/-1/-1->3->2 [14] 4/-1/-1->3->2 [15] 4/-1/-1->3->2 [16] 4/-1/-1->3->2 [17] 4/-1/-1->3->2 [18] 4/-1/-1->3->2 [19] 4/-1/-1->3->2 [20] 4/-1/-1->3->2 [21] 4/-1/-1->3->2 [22] 4/-1/-1->3->2 [23] 4/-1/-1->3->2 +n124-112-200:54096:56105 [1] NCCL INFO Ring 21 : 0 -> 1 -> 2 +n124-112-200:54096:56105 [1] NCCL INFO Ring 22 : 0 -> 1 -> 2 +n124-112-200:54098:56104 [3] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54096:56105 [1] NCCL INFO Ring 23 : 0 -> 1 -> 2 +n124-112-200:54096:56105 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 [2] 2/-1/-1->1->0 [3] 2/-1/-1->1->0 [4] 2/-1/-1->1->0 [5] 2/-1/-1->1->0 [6] 2/-1/-1->1->0 [7] 2/-1/-1->1->0 [8] 2/-1/-1->1->0 [9] 2/-1/-1->1->0 [10] 2/-1/-1->1->0 [11] 2/-1/-1->1->0 [12] 2/-1/-1->1->0 [13] 2/-1/-1->1->0 [14] 2/-1/-1->1->0 [15] 2/-1/-1->1->0 [16] 2/-1/-1->1->0 [17] 2/-1/-1->1->0 [18] 2/-1/-1->1->0 [19] 2/-1/-1->1->0 [20] 2/-1/-1->1->0 [21] 2/-1/-1->1->0 [22] 2/-1/-1->1->0 [23] 2/-1/-1->1->0 +n124-112-200:54096:56105 [1] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54095:56100 [0] NCCL INFO Ring 20 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 21 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 22 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Ring 23 : 6 -> 0 -> 1 +n124-112-200:54095:56100 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1 [2] 1/-1/-1->0->-1 [3] 1/-1/-1->0->-1 [4] 1/-1/-1->0->-1 [5] 1/-1/-1->0->-1 [6] 1/-1/-1->0->-1 [7] 1/-1/-1->0->-1 [8] 1/-1/-1->0->-1 [9] 1/-1/-1->0->-1 [10] 1/-1/-1->0->-1 [11] 1/-1/-1->0->-1 [12] 1/-1/-1->0->-1 [13] 1/-1/-1->0->-1 [14] 1/-1/-1->0->-1 [15] 1/-1/-1->0->-1 [16] 1/-1/-1->0->-1 [17] 1/-1/-1->0->-1 [18] 1/-1/-1->0->-1 [19] 1/-1/-1->0->-1 [20] 1/-1/-1->0->-1 [21] 1/-1/-1->0->-1 [22] 1/-1/-1->0->-1 [23] 1/-1/-1->0->-1 +n124-112-200:54095:56100 [0] NCCL INFO P2P Chunksize set to 524288 +n124-112-200:54099:56101 [4] NCCL INFO Channel 00/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 01/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 02/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 03/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 04/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 05/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 06/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 07/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 08/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 09/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 10/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 11/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 12/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 13/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 14/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 15/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 16/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 17/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 18/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 19/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 20/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 21/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 22/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 23/0 : 4[5] -> 5[6] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 00/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 01/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 02/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 03/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 04/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 05/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 06/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 07/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 08/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 09/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 10/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 11/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 12/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 13/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 14/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 15/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 16/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 17/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 18/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 19/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 20/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 21/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 22/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Channel 23/0 : 0[1] -> 1[2] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 00/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 00/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 01/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 00/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 01/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 02/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 03/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 04/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 05/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 06/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 07/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 08/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 09/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 10/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 01/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 11/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 02/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 00/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 12/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 13/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 14/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 15/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 16/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 17/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 18/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 19/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 20/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 21/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 02/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 00/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 03/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 22/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 01/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 23/0 : 5[6] -> 6[7] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 03/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 01/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 04/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 02/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 04/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 02/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 05/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 03/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 05/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 03/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 06/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 04/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 06/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 04/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 07/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 05/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 07/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 05/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 08/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 06/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 08/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 06/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 09/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 07/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 09/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 07/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 10/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 08/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 10/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 08/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 11/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 09/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 11/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 09/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 12/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 10/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 12/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 10/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 13/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 11/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 13/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 11/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 14/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 12/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 14/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 12/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 15/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 13/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 15/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 13/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 16/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 14/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 16/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 14/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 17/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 15/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 17/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 15/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 18/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 16/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 18/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 16/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 19/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 17/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 19/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 17/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 20/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 18/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 20/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 18/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 21/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 19/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 21/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 19/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 22/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 20/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 22/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 20/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 23/0 : 6[7] -> 0[1] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 21/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 22/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 23/0 : 2[3] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 21/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 23/0 : 1[2] -> 2[3] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 22/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 23/0 : 3[4] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Connected all rings +n124-112-200:54099:56101 [4] NCCL INFO Connected all rings +n124-112-200:54095:56100 [0] NCCL INFO Connected all rings +n124-112-200:54101:56106 [6] NCCL INFO Connected all rings +n124-112-200:54101:56106 [6] NCCL INFO Channel 00/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Connected all rings +n124-112-200:54097:56103 [2] NCCL INFO Connected all rings +n124-112-200:54098:56104 [3] NCCL INFO Connected all rings +n124-112-200:54101:56106 [6] NCCL INFO Channel 01/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 02/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 00/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 01/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 02/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 03/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 03/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 04/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 05/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 06/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 07/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 08/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 09/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 10/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 11/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 12/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 13/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 04/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 14/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 15/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 16/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 17/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 18/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 19/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 20/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 21/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 22/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54100:56102 [5] NCCL INFO Channel 23/0 : 5[6] -> 4[5] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 05/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 06/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 07/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 08/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 09/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 10/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 11/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 12/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 13/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 14/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 15/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 16/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 17/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 18/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 19/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 20/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 21/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 22/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54101:56106 [6] NCCL INFO Channel 23/0 : 6[7] -> 5[6] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 00/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 01/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 00/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 00/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 02/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 01/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 03/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 01/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 00/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 02/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 04/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 02/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 01/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 03/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 05/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 03/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 02/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 04/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 06/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 04/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 03/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 05/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 07/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 05/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 04/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 06/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 08/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 05/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 06/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 07/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 09/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 07/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 06/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 08/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 10/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 08/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 07/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 09/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 11/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 09/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 08/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 10/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 12/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 10/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 09/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 11/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 13/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 11/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 10/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 12/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 14/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 12/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 11/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 13/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 15/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 13/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 12/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 14/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 16/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 14/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 13/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 15/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 17/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 14/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 15/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 16/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 18/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 17/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 15/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 16/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 19/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 18/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 16/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 17/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 19/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 20/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 17/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 18/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 20/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 18/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 19/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 21/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 21/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 20/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 22/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 22/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 19/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54096:56105 [1] NCCL INFO Channel 23/0 : 1[2] -> 0[1] via P2P/IPC +n124-112-200:54099:56101 [4] NCCL INFO Channel 23/0 : 4[5] -> 3[4] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 21/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 22/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 20/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54097:56103 [2] NCCL INFO Channel 23/0 : 2[3] -> 1[2] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 21/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 22/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54098:56104 [3] NCCL INFO Channel 23/0 : 3[4] -> 2[3] via P2P/IPC +n124-112-200:54095:56100 [0] NCCL INFO Connected all trees +n124-112-200:54095:56100 [0] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54095:56100 [0] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54095:56100 [0] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54095:56100 [0] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54101:56106 [6] NCCL INFO Connected all trees +n124-112-200:54101:56106 [6] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54101:56106 [6] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54101:56106 [6] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54101:56106 [6] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54099:56101 [4] NCCL INFO Connected all trees +n124-112-200:54099:56101 [4] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54100:56102 [5] NCCL INFO Connected all trees +n124-112-200:54099:56101 [4] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54099:56101 [4] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54099:56101 [4] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54100:56102 [5] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54100:56102 [5] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54100:56102 [5] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54100:56102 [5] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54097:56103 [2] NCCL INFO Connected all trees +n124-112-200:54098:56104 [3] NCCL INFO Connected all trees +n124-112-200:54098:56104 [3] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54097:56103 [2] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54098:56104 [3] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54097:56103 [2] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54098:56104 [3] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54098:56104 [3] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54097:56103 [2] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54097:56103 [2] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54096:56105 [1] NCCL INFO Connected all trees +n124-112-200:54096:56105 [1] NCCL INFO NCCL_PROTO set by environment to Simple +n124-112-200:54096:56105 [1] NCCL INFO NCCL_ALGO set by environment to Ring,Tree +n124-112-200:54096:56105 [1] NCCL INFO threadThresholds 8/8/64 | 56/8/64 | 512 | 512 +n124-112-200:54096:56105 [1] NCCL INFO 24 coll channels, 24 collnet channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +n124-112-200:54098:56104 [3] NCCL INFO ncclCommSplit comm 0x3cb19f80 rank 3 nranks 7 cudaDev 3 nvmlDev 4 busId 84000 parent 0x4f6041b0 color -1326228412 key 3 commId 0xfd728d2be815c138 - Init COMPLETE +n124-112-200:54099:56101 [4] NCCL INFO ncclCommSplit comm 0xeff4ee0 rank 4 nranks 7 cudaDev 4 nvmlDev 5 busId 85000 parent 0x37e31620 color -1326228412 key 4 commId 0xfd728d2be815c138 - Init COMPLETE +n124-112-200:54095:56100 [0] NCCL INFO ncclCommSplit comm 0x3e453450 rank 0 nranks 7 cudaDev 0 nvmlDev 1 busId 5000 parent 0xcad968b0 color -1326228412 key 0 commId 0xfd728d2be815c138 - Init COMPLETE +n124-112-200:54100:56102 [5] NCCL INFO ncclCommSplit comm 0x102f54c0 rank 5 nranks 7 cudaDev 5 nvmlDev 6 busId 8b000 parent 0xc6db1170 color -1326228412 key 5 commId 0xfd728d2be815c138 - Init COMPLETE +n124-112-200:54096:56105 [1] NCCL INFO ncclCommSplit comm 0xc408b450 rank 1 nranks 7 cudaDev 1 nvmlDev 2 busId b000 parent 0xc406a3a0 color -1326228412 key 1 commId 0xfd728d2be815c138 - Init COMPLETE +n124-112-200:54101:56106 [6] NCCL INFO ncclCommSplit comm 0xc1698160 rank 6 nranks 7 cudaDev 6 nvmlDev 7 busId 8c000 parent 0xc409e9d0 color -1326228412 key 6 commId 0xfd728d2be815c138 - Init COMPLETE +n124-112-200:54097:56103 [2] NCCL INFO ncclCommSplit comm 0x384d8de0 rank 2 nranks 7 cudaDev 2 nvmlDev 3 busId c000 parent 0x23947350 color -1326228412 key 2 commId 0xfd728d2be815c138 - Init COMPLETE +Using /mnt/bn/magellan-product-audit/xushilin/.cache/torch_extensions/py311_cu124 as PyTorch extensions root...Using /mnt/bn/magellan-product-audit/xushilin/.cache/torch_extensions/py311_cu124 as PyTorch extensions root...Using /mnt/bn/magellan-product-audit/xushilin/.cache/torch_extensions/py311_cu124 as PyTorch extensions root...Using /mnt/bn/magellan-product-audit/xushilin/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... + + + +Using /mnt/bn/magellan-product-audit/xushilin/.cache/torch_extensions/py311_cu124 as PyTorch extensions root...Using /mnt/bn/magellan-product-audit/xushilin/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... + +Using /mnt/bn/magellan-product-audit/xushilin/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... +Detected CUDA files, patching ldflags +Emitting ninja build file /mnt/bn/magellan-product-audit/xushilin/.cache/torch_extensions/py311_cu124/fused_adam/build.ninja... +/mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/torch/utils/cpp_extension.py:1964: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. +If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST']. + warnings.warn( +Building extension module fused_adam... +Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +[1/3] c++ -MMD -MF fused_adam_frontend.o.d -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/deepspeed/ops/csrc/includes -I/mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/deepspeed/ops/csrc/adam -isystem /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/torch/include -isystem /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/torch/include/TH -isystem /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DBF16_AVAILABLE -c /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/deepspeed/ops/csrc/adam/fused_adam_frontend.cpp -o fused_adam_frontend.o +[2/3] /usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output multi_tensor_adam.cuda.o.d -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/deepspeed/ops/csrc/includes -I/mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/deepspeed/ops/csrc/adam -isystem /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/torch/include -isystem /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/torch/include/TH -isystem /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_90,code=sm_90 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_90,code=sm_90 -gencode=arch=compute_90,code=compute_90 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -U__CUDA_NO_BFLOAT16_CONVERSIONS__ -std=c++17 -c /mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o +[3/3] c++ fused_adam_frontend.o multi_tensor_adam.cuda.o -shared -L/mnt/bn/magellan-product-llm-data/xushilin/miniconda3/envs/dev/lib/python3.11/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart -o fused_adam.so +Loading extension module fused_adam... +Time to load fused_adam op: 142.38229990005493 seconds +Loading extension module fused_adam... +Loading extension module fused_adam... +Loading extension module fused_adam... +Time to load fused_adam op: 142.45441889762878 seconds +Time to load fused_adam op: 142.4613823890686 seconds +Loading extension module fused_adam... +Time to load fused_adam op: 142.46453142166138 seconds +Loading extension module fused_adam... +Loading extension module fused_adam... +Time to load fused_adam op: 142.46678566932678 seconds +Time to load fused_adam op: 142.46910548210144 seconds +Time to load fused_adam op: 142.47157788276672 seconds + 0%| | 0/2918 [00:00