{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.457024097442627, "min": 1.382386326789856, "max": 3.295682191848755, "count": 5000 }, "SoccerTwos.Policy.Entropy.sum": { "value": 29047.232421875, "min": 9045.61328125, "max": 129095.1171875, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 94.5, "min": 41.59322033898305, "max": 999.0, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19656.0, "min": 10480.0, "max": 30552.0, "count": 5000 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1615.298322029235, "min": 1184.1147553530536, "max": 1788.072882833674, "count": 4982 }, "SoccerTwos.Self-play.ELO.sum": { "value": 167991.02549104043, "min": 2372.0045709806127, "max": 386399.5660834336, "count": 4982 }, "SoccerTwos.Step.mean": { "value": 49999927.0, "min": 9292.0, "max": 49999927.0, "count": 5000 }, "SoccerTwos.Step.sum": { "value": 49999927.0, "min": 9292.0, "max": 49999927.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.0052182432264089584, "min": -0.13654792308807373, "max": 0.182419553399086, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -0.547915518283844, "min": -23.895885467529297, "max": 24.13683319091797, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.005107656586915255, "min": -0.13689196109771729, "max": 0.18595825135707855, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -0.5363039374351501, "min": -23.956092834472656, "max": 23.616697311401367, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.013596189589727492, "min": -0.5333333333333333, "max": 0.5414170178961246, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 1.4275999069213867, "min": -74.41499990224838, "max": 57.16080003976822, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.013596189589727492, "min": -0.5333333333333333, "max": 0.5414170178961246, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 1.4275999069213867, "min": -74.41499990224838, "max": 57.16080003976822, "count": 5000 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01741077769790233, "min": 0.00999593781477112, "max": 0.025121571430160354, "count": 2424 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01741077769790233, "min": 0.00999593781477112, "max": 0.025121571430160354, "count": 2424 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.06743189729750157, "min": 0.00010065852614692025, "max": 0.12310614089171092, "count": 2424 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.06743189729750157, "min": 0.00010065852614692025, "max": 0.12310614089171092, "count": 2424 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.06791092591981093, "min": 0.0001043718390368061, "max": 0.125173020362854, "count": 2424 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.06791092591981093, "min": 0.0001043718390368061, "max": 0.125173020362854, "count": 2424 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2424 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2424 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 2424 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 2424 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2424 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2424 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1741783027", "python_version": "3.9.13 (tags/v3.9.13:6de2ca5, May 17 2022, 16:36:42) [MSC v.1929 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\avoronk\\Desktop\\MLAgent\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos2 --no-graphics", "mlagents_version": "0.30.0", "mlagents_envs_version": "0.30.0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.6.0+cpu", "numpy_version": "1.21.2", "end_time_seconds": "1741950132" }, "total": 167105.3948365, "count": 1, "self": 0.41425709999748506, "children": { "run_training.setup": { "total": 0.12295080000000036, "count": 1, "self": 0.12295080000000036 }, "TrainerController.start_learning": { "total": 167104.8576286, "count": 1, "self": 92.95741247112164, "children": { "TrainerController._reset_env": { "total": 9.987525800080114, "count": 250, "self": 9.987525800080114 }, "TrainerController.advance": { "total": 167001.74491522883, "count": 3430610, "self": 91.7349966520851, "children": { "env_step": { "total": 64277.21248649163, "count": 3430610, "self": 50972.3180262094, "children": { "SubprocessEnvManager._take_step": { "total": 13249.325617400105, "count": 3430610, "self": 462.81668907397034, "children": { "TorchPolicy.evaluate": { "total": 12786.508928326135, "count": 6281864, "self": 12786.508928326135 } } }, "workers": { "total": 55.568842882126084, "count": 3430610, "self": 0.0, "children": { "worker_root": { "total": 166966.18707331613, "count": 3430610, "is_parallel": true, "self": 125895.98357992913, "children": { "steps_from_proto": { "total": 0.4630026999929715, "count": 500, "is_parallel": true, "self": 0.10038229986840275, "children": { "_process_rank_one_or_two_observation": { "total": 0.36262040012456875, "count": 2000, "is_parallel": true, "self": 0.36262040012456875 } } }, "UnityEnvironment.step": { "total": 41069.74049068701, "count": 3430610, "is_parallel": true, "self": 2015.8002678867779, "children": { "UnityEnvironment._generate_step_input": { "total": 1560.0781008194954, "count": 3430610, "is_parallel": true, "self": 1560.0781008194954 }, "communicator.exchange": { "total": 30734.03462130728, "count": 3430610, "is_parallel": true, "self": 30734.03462130728 }, "steps_from_proto": { "total": 6759.827500673452, "count": 6861220, "is_parallel": true, "self": 1459.1545256672607, "children": { "_process_rank_one_or_two_observation": { "total": 5300.672975006191, "count": 27444880, "is_parallel": true, "self": 5300.672975006191 } } } } } } } } } } }, "trainer_advance": { "total": 102632.79743208512, "count": 3430610, "self": 608.8977108858235, "children": { "process_trajectory": { "total": 13656.725619899378, "count": 3430610, "self": 13641.189395999445, "children": { "RLTrainer._checkpoint": { "total": 15.536223899932793, "count": 100, "self": 15.536223899932793 } } }, "_update_policy": { "total": 88367.17410129991, "count": 2424, "self": 8230.456397199145, "children": { "TorchPOCAOptimizer.update": { "total": 80136.71770410077, "count": 72720, "self": 80136.71770410077 } } } } } } }, "trainer_threads": { "total": 2.2999884095042944e-06, "count": 1, "self": 2.2999884095042944e-06 }, "TrainerController._save_models": { "total": 0.16777279999223538, "count": 1, "self": 0.006049499992514029, "children": { "RLTrainer._checkpoint": { "total": 0.16172329999972135, "count": 1, "self": 0.16172329999972135 } } } } } } }