HuggingMachines's picture
First Push
7e860f8 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.29709750413894653,
"min": 0.29709750413894653,
"max": 0.29709750413894653,
"count": 1
},
"Pyramids.Policy.Entropy.sum": {
"value": 6184.38134765625,
"min": 6184.38134765625,
"max": 6184.38134765625,
"count": 1
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 313.0877192982456,
"min": 313.0877192982456,
"max": 313.0877192982456,
"count": 1
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 17846.0,
"min": 17846.0,
"max": 17846.0,
"count": 1
},
"Pyramids.Step.mean": {
"value": 1019984.0,
"min": 1019984.0,
"max": 1019984.0,
"count": 1
},
"Pyramids.Step.sum": {
"value": 1019984.0,
"min": 1019984.0,
"max": 1019984.0,
"count": 1
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.5835974812507629,
"min": 0.5835974812507629,
"max": 0.5835974812507629,
"count": 1
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 107.96553039550781,
"min": 107.96553039550781,
"max": 107.96553039550781,
"count": 1
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.08388964086771011,
"min": 0.08388964086771011,
"max": 0.08388964086771011,
"count": 1
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 15.519583702087402,
"min": 15.519583702087402,
"max": 15.519583702087402,
"count": 1
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.686912264740258,
"min": 1.686912264740258,
"max": 1.686912264740258,
"count": 1
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 96.1539990901947,
"min": 96.1539990901947,
"max": 96.1539990901947,
"count": 1
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.686912264740258,
"min": 1.686912264740258,
"max": 1.686912264740258,
"count": 1
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 96.1539990901947,
"min": 96.1539990901947,
"max": 96.1539990901947,
"count": 1
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.036538689442462975,
"min": 0.036538689442462975,
"max": 0.036538689442462975,
"count": 1
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.0827052982203895,
"min": 2.0827052982203895,
"max": 2.0827052982203895,
"count": 1
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06466975791759237,
"min": 0.06466975791759237,
"max": 0.06466975791759237,
"count": 1
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.5820278212583313,
"min": 0.5820278212583313,
"max": 0.5820278212583313,
"count": 1
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.015688749308066217,
"min": 0.015688749308066217,
"max": 0.015688749308066217,
"count": 1
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.14119874377259595,
"min": 0.14119874377259595,
"max": 0.14119874377259595,
"count": 1
},
"Pyramids.Policy.LearningRate.mean": {
"value": 5.667056039816612e-06,
"min": 5.667056039816612e-06,
"max": 5.667056039816612e-06,
"count": 1
},
"Pyramids.Policy.LearningRate.sum": {
"value": 5.100350435834951e-05,
"min": 5.100350435834951e-05,
"max": 5.100350435834951e-05,
"count": 1
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10188898597626755,
"min": 0.10188898597626755,
"max": 0.10188898597626755,
"count": 1
},
"Pyramids.Policy.Epsilon.sum": {
"value": 0.9170008737864079,
"min": 0.9170008737864079,
"max": 0.9170008737864079,
"count": 1
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0001987096990291262,
"min": 0.0001987096990291262,
"max": 0.0001987096990291262,
"count": 1
},
"Pyramids.Policy.Beta.sum": {
"value": 0.001788387291262136,
"min": 0.001788387291262136,
"max": 0.001788387291262136,
"count": 1
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.011391442269086838,
"min": 0.011391442269086838,
"max": 0.011391442269086838,
"count": 1
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.10252297669649124,
"min": 0.10252297669649124,
"max": 0.10252297669649124,
"count": 1
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1765056306",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=PyramidsTraining --no-graphics --resume",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.8.0+cu128",
"numpy_version": "1.23.5",
"end_time_seconds": "1765056394"
},
"total": 88.36538406599993,
"count": 1,
"self": 0.48088269799973204,
"children": {
"run_training.setup": {
"total": 0.025826080000115326,
"count": 1,
"self": 0.025826080000115326
},
"TrainerController.start_learning": {
"total": 87.85867528800009,
"count": 1,
"self": 0.05468905799534696,
"children": {
"TrainerController._reset_env": {
"total": 2.3069376850007757,
"count": 1,
"self": 2.3069376850007757
},
"TrainerController.advance": {
"total": 85.37670938000338,
"count": 1995,
"self": 0.05675208900629514,
"children": {
"env_step": {
"total": 64.4985841550124,
"count": 1995,
"self": 58.78523483995923,
"children": {
"SubprocessEnvManager._take_step": {
"total": 5.679183673039006,
"count": 1995,
"self": 0.16634645904650824,
"children": {
"TorchPolicy.evaluate": {
"total": 5.512837213992498,
"count": 1926,
"self": 5.512837213992498
}
}
},
"workers": {
"total": 0.03416564201415895,
"count": 1995,
"self": 0.0,
"children": {
"worker_root": {
"total": 87.49142945899075,
"count": 1995,
"is_parallel": true,
"self": 33.053141846014114,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.002092548000291572,
"count": 1,
"is_parallel": true,
"self": 0.0006396970011337544,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0014528509991578176,
"count": 8,
"is_parallel": true,
"self": 0.0014528509991578176
}
}
},
"UnityEnvironment.step": {
"total": 0.052563428999746975,
"count": 1,
"is_parallel": true,
"self": 0.0005593469986706623,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0004858820002482389,
"count": 1,
"is_parallel": true,
"self": 0.0004858820002482389
},
"communicator.exchange": {
"total": 0.049650858000859444,
"count": 1,
"is_parallel": true,
"self": 0.049650858000859444
},
"steps_from_proto": {
"total": 0.0018673419999686303,
"count": 1,
"is_parallel": true,
"self": 0.00036817700038227485,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0014991649995863554,
"count": 8,
"is_parallel": true,
"self": 0.0014991649995863554
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 54.43828761297664,
"count": 1994,
"is_parallel": true,
"self": 1.193144935006785,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.7941273809656195,
"count": 1994,
"is_parallel": true,
"self": 0.7941273809656195
},
"communicator.exchange": {
"total": 48.54618899999514,
"count": 1994,
"is_parallel": true,
"self": 48.54618899999514
},
"steps_from_proto": {
"total": 3.9048262970090946,
"count": 1994,
"is_parallel": true,
"self": 0.8382953780092066,
"children": {
"_process_rank_one_or_two_observation": {
"total": 3.066530918999888,
"count": 15952,
"is_parallel": true,
"self": 3.066530918999888
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 20.821373135984686,
"count": 1995,
"self": 0.10325494000335311,
"children": {
"process_trajectory": {
"total": 3.943889454982127,
"count": 1995,
"self": 3.943889454982127
},
"_update_policy": {
"total": 16.774228740999206,
"count": 14,
"self": 9.262684272992374,
"children": {
"TorchPPOOptimizer.update": {
"total": 7.511544468006832,
"count": 672,
"self": 7.511544468006832
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.2220007192809135e-06,
"count": 1,
"self": 1.2220007192809135e-06
},
"TrainerController._save_models": {
"total": 0.12033794299986766,
"count": 1,
"self": 0.0014341070000227774,
"children": {
"RLTrainer._checkpoint": {
"total": 0.11890383599984489,
"count": 1,
"self": 0.11890383599984489
}
}
}
}
}
}
}