ppo-PyramidsRND / run_logs /timers.json
1aurent's picture
Wow such model
8db3c46
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.3102952241897583,
"min": 0.30454158782958984,
"max": 1.4764149188995361,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 9418.0810546875,
"min": 9043.6669921875,
"max": 44788.5234375,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989993.0,
"min": 29952.0,
"max": 989993.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989993.0,
"min": 29952.0,
"max": 989993.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.5284751057624817,
"min": -0.09651791304349899,
"max": 0.646522045135498,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 144.80218505859375,
"min": -23.16429901123047,
"max": 184.90530395507812,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.033348482102155685,
"min": -0.01714378409087658,
"max": 0.31495872139930725,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 9.137483596801758,
"min": -4.748828411102295,
"max": 74.64521789550781,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.0675561677198857,
"min": 0.06520644967959184,
"max": 0.07367452353648678,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9457863480783999,
"min": 0.5157216647554075,
"max": 1.0650019302916276,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.014521736876174276,
"min": 0.0006480201820093869,
"max": 0.015912959387129548,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.20330431626643986,
"min": 0.008424262366122029,
"max": 0.22278143141981369,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.418197527299995e-06,
"min": 7.418197527299995e-06,
"max": 0.00029523291587474284,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00010385476538219993,
"min": 0.00010385476538219993,
"max": 0.0036328318890560996,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.1024727,
"min": 0.1024727,
"max": 0.19841097142857142,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4346178,
"min": 1.3888768,
"max": 2.6109439,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0002570227299999999,
"min": 0.0002570227299999999,
"max": 0.009841256045714286,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0035983182199999983,
"min": 0.0035983182199999983,
"max": 0.12111329560999999,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.013241833075881004,
"min": 0.013241833075881004,
"max": 0.3928230106830597,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.18538565933704376,
"min": 0.18538565933704376,
"max": 2.7497611045837402,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 351.4457831325301,
"min": 294.65346534653463,
"max": 996.71875,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29170.0,
"min": 16751.0,
"max": 32455.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.5521421452243644,
"min": -0.9351438009180129,
"max": 1.6955999807307596,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 128.82779805362225,
"min": -29.92460162937641,
"max": 168.23899817466736,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.5521421452243644,
"min": -0.9351438009180129,
"max": 1.6955999807307596,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 128.82779805362225,
"min": -29.92460162937641,
"max": 168.23899817466736,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.048140521151966036,
"min": 0.042419247724009804,
"max": 7.475783476058175,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 3.995663255613181,
"min": 3.995663255613181,
"max": 127.08831909298897,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1689089307",
"python_version": "3.10.12 (main, Jun 7 2023, 12:45:35) [GCC 9.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0+cu102",
"numpy_version": "1.21.2",
"end_time_seconds": "1689091629"
},
"total": 2321.261840333,
"count": 1,
"self": 0.9587011140001778,
"children": {
"run_training.setup": {
"total": 0.04554998600019644,
"count": 1,
"self": 0.04554998600019644
},
"TrainerController.start_learning": {
"total": 2320.2575892329996,
"count": 1,
"self": 1.5935823269505818,
"children": {
"TrainerController._reset_env": {
"total": 4.049705815000152,
"count": 1,
"self": 4.049705815000152
},
"TrainerController.advance": {
"total": 2314.448032905049,
"count": 64080,
"self": 1.7072245560025294,
"children": {
"env_step": {
"total": 1646.1051665650375,
"count": 64080,
"self": 1521.5808535869744,
"children": {
"SubprocessEnvManager._take_step": {
"total": 123.57080820505121,
"count": 64080,
"self": 5.231265325085587,
"children": {
"TorchPolicy.evaluate": {
"total": 118.33954287996562,
"count": 62554,
"self": 118.33954287996562
}
}
},
"workers": {
"total": 0.9535047730119004,
"count": 64080,
"self": 0.0,
"children": {
"worker_root": {
"total": 2314.7725745350094,
"count": 64080,
"is_parallel": true,
"self": 918.4829426190431,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.001966669000012189,
"count": 1,
"is_parallel": true,
"self": 0.0006319660001281591,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0013347029998840299,
"count": 8,
"is_parallel": true,
"self": 0.0013347029998840299
}
}
},
"UnityEnvironment.step": {
"total": 0.05454993800003649,
"count": 1,
"is_parallel": true,
"self": 0.000560711999924024,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0005360590000691445,
"count": 1,
"is_parallel": true,
"self": 0.0005360590000691445
},
"communicator.exchange": {
"total": 0.05130555599998843,
"count": 1,
"is_parallel": true,
"self": 0.05130555599998843
},
"steps_from_proto": {
"total": 0.0021476110000548942,
"count": 1,
"is_parallel": true,
"self": 0.00038934300005166733,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.001758268000003227,
"count": 8,
"is_parallel": true,
"self": 0.001758268000003227
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1396.2896319159663,
"count": 64079,
"is_parallel": true,
"self": 34.894041348936526,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 25.273906911049153,
"count": 64079,
"is_parallel": true,
"self": 25.273906911049153
},
"communicator.exchange": {
"total": 1222.3144232960442,
"count": 64079,
"is_parallel": true,
"self": 1222.3144232960442
},
"steps_from_proto": {
"total": 113.80726035993644,
"count": 64079,
"is_parallel": true,
"self": 22.697565777895306,
"children": {
"_process_rank_one_or_two_observation": {
"total": 91.10969458204113,
"count": 512632,
"is_parallel": true,
"self": 91.10969458204113
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 666.6356417840086,
"count": 64080,
"self": 2.985699104979858,
"children": {
"process_trajectory": {
"total": 118.93023136402394,
"count": 64080,
"self": 118.66455056502377,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2656807990001653,
"count": 2,
"self": 0.2656807990001653
}
}
},
"_update_policy": {
"total": 544.7197113150048,
"count": 457,
"self": 350.591992014007,
"children": {
"TorchPPOOptimizer.update": {
"total": 194.1277193009978,
"count": 22836,
"self": 194.1277193009978
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.3789999684377108e-06,
"count": 1,
"self": 1.3789999684377108e-06
},
"TrainerController._save_models": {
"total": 0.16626680700028373,
"count": 1,
"self": 0.0019845280003210064,
"children": {
"RLTrainer._checkpoint": {
"total": 0.16428227899996273,
"count": 1,
"self": 0.16428227899996273
}
}
}
}
}
}
}