diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -4,13612 +4,10895 @@ "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, - "global_step": 1235, + "global_step": 4940, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.004050632911392405, - "grad_norm": 6.420329598097033, - "learning_rate": 0.0, - "loss": 0.7925, + "epoch": 0.005063291139240506, + "grad_norm": 4.609051043616223, + "learning_rate": 3.238866396761134e-07, + "loss": 0.5035, "loss_nan_ranks": 0, - "loss_rank_avg": 0.08521983027458191, - "step": 1, - "valid_targets_mean": 4176.2, - "valid_targets_min": 1259 + "loss_rank_avg": 0.25989317893981934, + "step": 5, + "valid_targets_mean": 3288.2, + "valid_targets_min": 815 }, { - "epoch": 0.00810126582278481, - "grad_norm": 6.227117765992412, - "learning_rate": 3.2258064516129035e-07, - "loss": 0.7813, + "epoch": 0.010126582278481013, + "grad_norm": 3.6056753367980687, + "learning_rate": 7.287449392712551e-07, + "loss": 0.4979, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0910545140504837, - "step": 2, - "valid_targets_mean": 4499.6, - "valid_targets_min": 1332 + "loss_rank_avg": 0.23132257163524628, + "step": 10, + "valid_targets_mean": 4994.0, + "valid_targets_min": 1103 }, { - "epoch": 0.012151898734177215, - "grad_norm": 6.456985603229851, - "learning_rate": 6.451612903225807e-07, - "loss": 0.8104, + "epoch": 0.015189873417721518, + "grad_norm": 3.75046312941662, + "learning_rate": 1.133603238866397e-06, + "loss": 0.5107, "loss_nan_ranks": 0, - "loss_rank_avg": 0.08171398937702179, - "step": 3, - "valid_targets_mean": 3918.3, - "valid_targets_min": 1195 + "loss_rank_avg": 0.23433995246887207, + "step": 15, + "valid_targets_mean": 4084.1, + "valid_targets_min": 1248 }, { - "epoch": 0.01620253164556962, - "grad_norm": 6.249791581499031, - "learning_rate": 9.67741935483871e-07, - "loss": 0.7958, + "epoch": 0.020253164556962026, + "grad_norm": 4.009323566522028, + "learning_rate": 1.5384615384615387e-06, + "loss": 0.4757, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0754520520567894, - "step": 4, - "valid_targets_mean": 3975.1, - "valid_targets_min": 1365 + "loss_rank_avg": 0.19289864599704742, + "step": 20, + "valid_targets_mean": 4351.0, + "valid_targets_min": 1208 }, { - "epoch": 0.020253164556962026, - "grad_norm": 5.810678301715652, - "learning_rate": 1.2903225806451614e-06, - "loss": 0.7392, + "epoch": 0.02531645569620253, + "grad_norm": 3.857687726883038, + "learning_rate": 1.9433198380566803e-06, + "loss": 0.4795, "loss_nan_ranks": 0, - "loss_rank_avg": 0.09185856580734253, - "step": 5, - "valid_targets_mean": 4353.0, - "valid_targets_min": 1210 + "loss_rank_avg": 0.25582650303840637, + "step": 25, + "valid_targets_mean": 4894.1, + "valid_targets_min": 1784 }, { - "epoch": 0.02430379746835443, - "grad_norm": 5.41391649813407, - "learning_rate": 1.6129032258064516e-06, - "loss": 0.7529, + "epoch": 0.030379746835443037, + "grad_norm": 2.431783748480512, + "learning_rate": 2.348178137651822e-06, + "loss": 0.4538, "loss_nan_ranks": 0, - "loss_rank_avg": 0.08282750844955444, - "step": 6, - "valid_targets_mean": 3698.6, - "valid_targets_min": 1283 + "loss_rank_avg": 0.21279120445251465, + "step": 30, + "valid_targets_mean": 4872.4, + "valid_targets_min": 1107 }, { - "epoch": 0.028354430379746835, - "grad_norm": 4.424776890998065, - "learning_rate": 1.935483870967742e-06, - "loss": 0.7235, + "epoch": 0.035443037974683546, + "grad_norm": 2.0913610531108153, + "learning_rate": 2.7530364372469636e-06, + "loss": 0.4576, "loss_nan_ranks": 0, - "loss_rank_avg": 0.11384673416614532, - "step": 7, - "valid_targets_mean": 5868.5, - "valid_targets_min": 939 + "loss_rank_avg": 0.21099823713302612, + "step": 35, + "valid_targets_mean": 4177.9, + "valid_targets_min": 1173 }, { - "epoch": 0.03240506329113924, - "grad_norm": 4.077353043076277, - "learning_rate": 2.2580645161290324e-06, - "loss": 0.7017, + "epoch": 0.04050632911392405, + "grad_norm": 2.1267526423567826, + "learning_rate": 3.157894736842105e-06, + "loss": 0.4362, "loss_nan_ranks": 0, - "loss_rank_avg": 0.10583869367837906, - "step": 8, - "valid_targets_mean": 5800.6, - "valid_targets_min": 1707 + "loss_rank_avg": 0.21998053789138794, + "step": 40, + "valid_targets_mean": 5318.6, + "valid_targets_min": 1431 }, { - "epoch": 0.03645569620253165, - "grad_norm": 2.2756792698357886, - "learning_rate": 2.580645161290323e-06, - "loss": 0.6685, + "epoch": 0.04556962025316456, + "grad_norm": 1.8229308674567508, + "learning_rate": 3.562753036437247e-06, + "loss": 0.4107, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06618285179138184, - "step": 9, - "valid_targets_mean": 3563.2, - "valid_targets_min": 875 + "loss_rank_avg": 0.21922659873962402, + "step": 45, + "valid_targets_mean": 3793.9, + "valid_targets_min": 1470 }, { - "epoch": 0.04050632911392405, - "grad_norm": 2.1200029277675614, - "learning_rate": 2.903225806451613e-06, - "loss": 0.6746, + "epoch": 0.05063291139240506, + "grad_norm": 1.1764023018227465, + "learning_rate": 3.967611336032389e-06, + "loss": 0.3992, "loss_nan_ranks": 0, - "loss_rank_avg": 0.08664921671152115, - "step": 10, - "valid_targets_mean": 5192.1, - "valid_targets_min": 1433 + "loss_rank_avg": 0.1912367194890976, + "step": 50, + "valid_targets_mean": 4403.0, + "valid_targets_min": 1320 }, { - "epoch": 0.044556962025316456, - "grad_norm": 1.9809407766421865, - "learning_rate": 3.225806451612903e-06, - "loss": 0.6465, + "epoch": 0.05569620253164557, + "grad_norm": 1.0017414517719199, + "learning_rate": 4.372469635627531e-06, + "loss": 0.3867, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07697989046573639, - "step": 11, - "valid_targets_mean": 4453.6, - "valid_targets_min": 1029 + "loss_rank_avg": 0.19108061492443085, + "step": 55, + "valid_targets_mean": 4060.0, + "valid_targets_min": 1137 }, { - "epoch": 0.04860759493670886, - "grad_norm": 3.3136354683998746, - "learning_rate": 3.548387096774194e-06, - "loss": 0.6338, + "epoch": 0.060759493670886074, + "grad_norm": 0.7507852394561627, + "learning_rate": 4.7773279352226725e-06, + "loss": 0.3841, "loss_nan_ranks": 0, - "loss_rank_avg": 0.08591389656066895, - "step": 12, - "valid_targets_mean": 4982.2, - "valid_targets_min": 1056 + "loss_rank_avg": 0.16387608647346497, + "step": 60, + "valid_targets_mean": 4424.5, + "valid_targets_min": 1409 }, { - "epoch": 0.052658227848101265, - "grad_norm": 3.6619988249855, - "learning_rate": 3.870967741935484e-06, - "loss": 0.6646, + "epoch": 0.06582278481012659, + "grad_norm": 0.557594133353884, + "learning_rate": 5.1821862348178145e-06, + "loss": 0.3711, "loss_nan_ranks": 0, - "loss_rank_avg": 0.09026002883911133, - "step": 13, - "valid_targets_mean": 4544.6, - "valid_targets_min": 722 + "loss_rank_avg": 0.16386422514915466, + "step": 65, + "valid_targets_mean": 3916.2, + "valid_targets_min": 1045 }, { - "epoch": 0.05670886075949367, - "grad_norm": 3.2077224891053713, - "learning_rate": 4.193548387096774e-06, - "loss": 0.6125, + "epoch": 0.07088607594936709, + "grad_norm": 0.421043632829371, + "learning_rate": 5.5870445344129565e-06, + "loss": 0.3875, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07291078567504883, - "step": 14, - "valid_targets_mean": 4802.9, - "valid_targets_min": 1215 + "loss_rank_avg": 0.19452455639839172, + "step": 70, + "valid_targets_mean": 5656.6, + "valid_targets_min": 1746 }, { - "epoch": 0.060759493670886074, - "grad_norm": 3.4045427950280165, - "learning_rate": 4.516129032258065e-06, - "loss": 0.646, + "epoch": 0.0759493670886076, + "grad_norm": 0.41166529544187236, + "learning_rate": 5.991902834008098e-06, + "loss": 0.3747, "loss_nan_ranks": 0, - "loss_rank_avg": 0.08019945025444031, - "step": 15, - "valid_targets_mean": 4426.5, - "valid_targets_min": 1411 + "loss_rank_avg": 0.2017640769481659, + "step": 75, + "valid_targets_mean": 5565.6, + "valid_targets_min": 1301 }, { - "epoch": 0.06481012658227848, - "grad_norm": 2.636319161292139, - "learning_rate": 4.838709677419355e-06, - "loss": 0.5962, + "epoch": 0.0810126582278481, + "grad_norm": 0.37589937170078513, + "learning_rate": 6.39676113360324e-06, + "loss": 0.3707, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07337251305580139, - "step": 16, - "valid_targets_mean": 4542.8, - "valid_targets_min": 1060 + "loss_rank_avg": 0.13995295763015747, + "step": 80, + "valid_targets_mean": 3588.8, + "valid_targets_min": 994 }, { - "epoch": 0.06886075949367089, - "grad_norm": 2.542957173923556, - "learning_rate": 5.161290322580646e-06, - "loss": 0.601, + "epoch": 0.08607594936708861, + "grad_norm": 0.3714038081964425, + "learning_rate": 6.801619433198381e-06, + "loss": 0.3691, "loss_nan_ranks": 0, - "loss_rank_avg": 0.10810995101928711, - "step": 17, - "valid_targets_mean": 6028.9, - "valid_targets_min": 1560 + "loss_rank_avg": 0.21744906902313232, + "step": 85, + "valid_targets_mean": 5115.6, + "valid_targets_min": 898 }, { - "epoch": 0.0729113924050633, - "grad_norm": 2.0489652247347863, - "learning_rate": 5.483870967741935e-06, - "loss": 0.5887, + "epoch": 0.09113924050632911, + "grad_norm": 0.34597741271059124, + "learning_rate": 7.206477732793523e-06, + "loss": 0.3564, "loss_nan_ranks": 0, - "loss_rank_avg": 0.08178861439228058, - "step": 18, - "valid_targets_mean": 5927.2, - "valid_targets_min": 1886 + "loss_rank_avg": 0.17021265625953674, + "step": 90, + "valid_targets_mean": 5033.3, + "valid_targets_min": 886 }, { - "epoch": 0.0769620253164557, - "grad_norm": 1.5141315525278707, - "learning_rate": 5.806451612903226e-06, - "loss": 0.5803, + "epoch": 0.09620253164556962, + "grad_norm": 0.3172791695402959, + "learning_rate": 7.611336032388664e-06, + "loss": 0.3739, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07792671024799347, - "step": 19, - "valid_targets_mean": 5318.4, - "valid_targets_min": 1014 + "loss_rank_avg": 0.16697894036769867, + "step": 95, + "valid_targets_mean": 5135.7, + "valid_targets_min": 776 }, { - "epoch": 0.0810126582278481, - "grad_norm": 1.4189201796367261, - "learning_rate": 6.129032258064517e-06, - "loss": 0.5588, + "epoch": 0.10126582278481013, + "grad_norm": 0.3605571673212477, + "learning_rate": 8.016194331983806e-06, + "loss": 0.3609, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05552545189857483, - "step": 20, - "valid_targets_mean": 3590.8, - "valid_targets_min": 996 + "loss_rank_avg": 0.17649498581886292, + "step": 100, + "valid_targets_mean": 4052.9, + "valid_targets_min": 1407 }, { - "epoch": 0.08506329113924051, - "grad_norm": 1.4030946456260018, - "learning_rate": 6.451612903225806e-06, - "loss": 0.5513, + "epoch": 0.10632911392405063, + "grad_norm": 0.3126393081213533, + "learning_rate": 8.421052631578948e-06, + "loss": 0.3512, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06783956289291382, - "step": 21, - "valid_targets_mean": 4338.8, - "valid_targets_min": 1382 + "loss_rank_avg": 0.14957275986671448, + "step": 105, + "valid_targets_mean": 4704.9, + "valid_targets_min": 1914 }, { - "epoch": 0.08911392405063291, - "grad_norm": 1.3837911133448608, - "learning_rate": 6.774193548387097e-06, - "loss": 0.5383, + "epoch": 0.11139240506329114, + "grad_norm": 0.3531313178759686, + "learning_rate": 8.82591093117409e-06, + "loss": 0.3726, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0490473248064518, - "step": 22, - "valid_targets_mean": 3343.5, - "valid_targets_min": 1526 + "loss_rank_avg": 0.16743913292884827, + "step": 110, + "valid_targets_mean": 3947.0, + "valid_targets_min": 1052 }, { - "epoch": 0.09316455696202532, - "grad_norm": 1.1409375146335552, - "learning_rate": 7.096774193548388e-06, - "loss": 0.5404, + "epoch": 0.11645569620253164, + "grad_norm": 0.361312795641348, + "learning_rate": 9.230769230769232e-06, + "loss": 0.3633, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07465570420026779, - "step": 23, - "valid_targets_mean": 5280.9, - "valid_targets_min": 891 + "loss_rank_avg": 0.1453179121017456, + "step": 115, + "valid_targets_mean": 2979.1, + "valid_targets_min": 1042 }, { - "epoch": 0.09721518987341772, - "grad_norm": 0.921464895025812, - "learning_rate": 7.4193548387096784e-06, - "loss": 0.5473, + "epoch": 0.12151898734177215, + "grad_norm": 0.3118010440427671, + "learning_rate": 9.635627530364373e-06, + "loss": 0.3533, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07144070416688919, - "step": 24, - "valid_targets_mean": 4877.6, - "valid_targets_min": 1659 + "loss_rank_avg": 0.1923716962337494, + "step": 120, + "valid_targets_mean": 5371.9, + "valid_targets_min": 1279 }, { - "epoch": 0.10126582278481013, - "grad_norm": 1.1027739754347268, - "learning_rate": 7.741935483870968e-06, - "loss": 0.5181, + "epoch": 0.12658227848101267, + "grad_norm": 0.35948459926380516, + "learning_rate": 1.0040485829959515e-05, + "loss": 0.348, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05358847230672836, - "step": 25, - "valid_targets_mean": 4054.9, - "valid_targets_min": 1409 + "loss_rank_avg": 0.2048473060131073, + "step": 125, + "valid_targets_mean": 4750.8, + "valid_targets_min": 990 }, { - "epoch": 0.10531645569620253, - "grad_norm": 0.978422105693308, - "learning_rate": 8.064516129032258e-06, - "loss": 0.5043, + "epoch": 0.13164556962025317, + "grad_norm": 0.31537572490184773, + "learning_rate": 1.0445344129554658e-05, + "loss": 0.3713, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05265155807137489, - "step": 26, - "valid_targets_mean": 4199.1, - "valid_targets_min": 1048 + "loss_rank_avg": 0.20135825872421265, + "step": 130, + "valid_targets_mean": 4831.9, + "valid_targets_min": 1663 }, { - "epoch": 0.10936708860759493, - "grad_norm": 0.8264186306920859, - "learning_rate": 8.387096774193549e-06, - "loss": 0.5111, + "epoch": 0.13670886075949368, + "grad_norm": 0.36436085249069955, + "learning_rate": 1.0850202429149799e-05, + "loss": 0.3511, "loss_nan_ranks": 0, - "loss_rank_avg": 0.047893133014440536, - "step": 27, - "valid_targets_mean": 4003.8, - "valid_targets_min": 1485 + "loss_rank_avg": 0.21216928958892822, + "step": 135, + "valid_targets_mean": 4523.4, + "valid_targets_min": 1238 }, { - "epoch": 0.11341772151898734, - "grad_norm": 0.8702558388946918, - "learning_rate": 8.70967741935484e-06, - "loss": 0.5237, + "epoch": 0.14177215189873418, + "grad_norm": 0.3366824015027373, + "learning_rate": 1.1255060728744939e-05, + "loss": 0.3567, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07174102216959, - "step": 28, - "valid_targets_mean": 4451.9, - "valid_targets_min": 1003 + "loss_rank_avg": 0.17111346125602722, + "step": 140, + "valid_targets_mean": 3792.1, + "valid_targets_min": 1100 }, { - "epoch": 0.11746835443037974, - "grad_norm": 0.7688421729857465, - "learning_rate": 9.03225806451613e-06, - "loss": 0.5, + "epoch": 0.1468354430379747, + "grad_norm": 0.35533807007148477, + "learning_rate": 1.1659919028340081e-05, + "loss": 0.3589, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07423195987939835, - "step": 29, - "valid_targets_mean": 5388.8, - "valid_targets_min": 1313 + "loss_rank_avg": 0.1642487645149231, + "step": 145, + "valid_targets_mean": 4016.7, + "valid_targets_min": 1556 }, { - "epoch": 0.12151898734177215, - "grad_norm": 0.8462036679650163, - "learning_rate": 9.35483870967742e-06, - "loss": 0.4917, + "epoch": 0.1518987341772152, + "grad_norm": 0.31790675871917146, + "learning_rate": 1.2064777327935225e-05, + "loss": 0.3429, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06195066124200821, - "step": 30, - "valid_targets_mean": 4936.8, - "valid_targets_min": 1281 + "loss_rank_avg": 0.197759211063385, + "step": 150, + "valid_targets_mean": 5829.0, + "valid_targets_min": 1509 }, { - "epoch": 0.12556962025316457, - "grad_norm": 0.6471729036969938, - "learning_rate": 9.67741935483871e-06, - "loss": 0.4856, + "epoch": 0.1569620253164557, + "grad_norm": 0.32375174058174566, + "learning_rate": 1.2469635627530365e-05, + "loss": 0.3571, "loss_nan_ranks": 0, - "loss_rank_avg": 0.052095141261816025, - "step": 31, - "valid_targets_mean": 3925.6, - "valid_targets_min": 982 + "loss_rank_avg": 0.1860826313495636, + "step": 155, + "valid_targets_mean": 4697.8, + "valid_targets_min": 1042 }, { - "epoch": 0.12962025316455697, - "grad_norm": 0.6236907607706728, - "learning_rate": 1e-05, - "loss": 0.4888, + "epoch": 0.1620253164556962, + "grad_norm": 0.971603737593077, + "learning_rate": 1.2874493927125507e-05, + "loss": 0.3655, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07329850643873215, - "step": 32, - "valid_targets_mean": 4460.2, - "valid_targets_min": 834 + "loss_rank_avg": 0.2078309953212738, + "step": 160, + "valid_targets_mean": 5384.2, + "valid_targets_min": 1144 }, { - "epoch": 0.13367088607594937, - "grad_norm": 0.7166373529372694, - "learning_rate": 1.0322580645161291e-05, - "loss": 0.4859, + "epoch": 0.1670886075949367, + "grad_norm": 0.47966039195161064, + "learning_rate": 1.327935222672065e-05, + "loss": 0.3588, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05221894755959511, - "step": 33, - "valid_targets_mean": 5000.0, - "valid_targets_min": 1107 + "loss_rank_avg": 0.18700364232063293, + "step": 165, + "valid_targets_mean": 4719.9, + "valid_targets_min": 1071 }, { - "epoch": 0.13772151898734178, - "grad_norm": 0.6552483827885665, - "learning_rate": 1.0645161290322582e-05, - "loss": 0.4849, + "epoch": 0.17215189873417722, + "grad_norm": 0.2955436571363374, + "learning_rate": 1.3684210526315791e-05, + "loss": 0.3413, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04771258682012558, - "step": 34, - "valid_targets_mean": 3255.8, - "valid_targets_min": 1309 + "loss_rank_avg": 0.16040512919425964, + "step": 170, + "valid_targets_mean": 5175.4, + "valid_targets_min": 1035 }, { - "epoch": 0.14177215189873418, - "grad_norm": 0.5471801158923242, - "learning_rate": 1.096774193548387e-05, - "loss": 0.4738, + "epoch": 0.17721518987341772, + "grad_norm": 0.2909056024011537, + "learning_rate": 1.4089068825910932e-05, + "loss": 0.3611, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05435044690966606, - "step": 35, - "valid_targets_mean": 3794.1, - "valid_targets_min": 1102 + "loss_rank_avg": 0.14239881932735443, + "step": 175, + "valid_targets_mean": 4601.9, + "valid_targets_min": 1236 }, { - "epoch": 0.1458227848101266, - "grad_norm": 0.5734883495938603, - "learning_rate": 1.1290322580645164e-05, - "loss": 0.4693, + "epoch": 0.18227848101265823, + "grad_norm": 0.3684074269197893, + "learning_rate": 1.4493927125506074e-05, + "loss": 0.3607, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0718671903014183, - "step": 36, - "valid_targets_mean": 4801.4, - "valid_targets_min": 970 + "loss_rank_avg": 0.17205636203289032, + "step": 180, + "valid_targets_mean": 3363.9, + "valid_targets_min": 899 }, { - "epoch": 0.149873417721519, - "grad_norm": 0.6395737529832362, - "learning_rate": 1.1612903225806453e-05, - "loss": 0.4678, + "epoch": 0.18734177215189873, + "grad_norm": 0.2978543096994537, + "learning_rate": 1.4898785425101216e-05, + "loss": 0.3606, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04163165017962456, - "step": 37, - "valid_targets_mean": 3100.6, - "valid_targets_min": 1101 + "loss_rank_avg": 0.1950611174106598, + "step": 185, + "valid_targets_mean": 5937.9, + "valid_targets_min": 1368 }, { - "epoch": 0.1539240506329114, - "grad_norm": 0.5491272351602188, - "learning_rate": 1.1935483870967743e-05, - "loss": 0.4638, + "epoch": 0.19240506329113924, + "grad_norm": 0.30414263404359004, + "learning_rate": 1.5303643724696356e-05, + "loss": 0.3478, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0640963688492775, - "step": 38, - "valid_targets_mean": 5124.9, - "valid_targets_min": 1311 + "loss_rank_avg": 0.18548183143138885, + "step": 190, + "valid_targets_mean": 5815.7, + "valid_targets_min": 1087 }, { - "epoch": 0.1579746835443038, - "grad_norm": 0.5858951225783975, - "learning_rate": 1.2258064516129034e-05, - "loss": 0.4633, + "epoch": 0.19746835443037974, + "grad_norm": 0.3751151244554016, + "learning_rate": 1.5708502024291498e-05, + "loss": 0.3638, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0465809591114521, - "step": 39, - "valid_targets_mean": 3319.8, - "valid_targets_min": 1454 + "loss_rank_avg": 0.2703745365142822, + "step": 195, + "valid_targets_mean": 5632.3, + "valid_targets_min": 1444 }, { - "epoch": 0.1620253164556962, - "grad_norm": 0.5078493446863747, - "learning_rate": 1.2580645161290324e-05, - "loss": 0.4794, + "epoch": 0.20253164556962025, + "grad_norm": 0.3183245241372493, + "learning_rate": 1.6113360323886644e-05, + "loss": 0.3597, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07410909980535507, - "step": 40, - "valid_targets_mean": 5386.2, - "valid_targets_min": 1146 + "loss_rank_avg": 0.15712594985961914, + "step": 200, + "valid_targets_mean": 4035.7, + "valid_targets_min": 1064 }, { - "epoch": 0.1660759493670886, - "grad_norm": 0.6556197909112473, - "learning_rate": 1.2903225806451613e-05, - "loss": 0.4669, + "epoch": 0.20759493670886076, + "grad_norm": 0.3632627857891178, + "learning_rate": 1.6518218623481782e-05, + "loss": 0.3525, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04371774196624756, - "step": 41, - "valid_targets_mean": 3323.4, - "valid_targets_min": 1429 + "loss_rank_avg": 0.17240820825099945, + "step": 205, + "valid_targets_mean": 4021.2, + "valid_targets_min": 1114 }, { - "epoch": 0.17012658227848101, - "grad_norm": 0.4954070482937292, - "learning_rate": 1.3225806451612903e-05, - "loss": 0.4518, + "epoch": 0.21265822784810126, + "grad_norm": 0.34057034041346584, + "learning_rate": 1.6923076923076924e-05, + "loss": 0.3621, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06891210377216339, - "step": 42, - "valid_targets_mean": 4534.8, - "valid_targets_min": 1056 + "loss_rank_avg": 0.22665713727474213, + "step": 210, + "valid_targets_mean": 5157.8, + "valid_targets_min": 1706 }, { - "epoch": 0.17417721518987342, - "grad_norm": 0.6069525492861939, - "learning_rate": 1.3548387096774194e-05, - "loss": 0.4593, + "epoch": 0.21772151898734177, + "grad_norm": 0.32892662546524415, + "learning_rate": 1.7327935222672066e-05, + "loss": 0.3516, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06779725104570389, - "step": 43, - "valid_targets_mean": 4790.6, - "valid_targets_min": 1597 + "loss_rank_avg": 0.1358383297920227, + "step": 215, + "valid_targets_mean": 3612.9, + "valid_targets_min": 1015 }, { - "epoch": 0.17822784810126582, - "grad_norm": 0.5243882432164961, - "learning_rate": 1.3870967741935486e-05, - "loss": 0.4504, + "epoch": 0.22278481012658227, + "grad_norm": 0.3323805458845017, + "learning_rate": 1.7732793522267208e-05, + "loss": 0.3405, "loss_nan_ranks": 0, - "loss_rank_avg": 0.044036492705345154, - "step": 44, - "valid_targets_mean": 3547.5, - "valid_targets_min": 1343 + "loss_rank_avg": 0.14480899274349213, + "step": 220, + "valid_targets_mean": 6220.1, + "valid_targets_min": 1090 }, { - "epoch": 0.18227848101265823, - "grad_norm": 0.5006000360790126, - "learning_rate": 1.4193548387096776e-05, - "loss": 0.4666, + "epoch": 0.22784810126582278, + "grad_norm": 0.2969456771010524, + "learning_rate": 1.813765182186235e-05, + "loss": 0.345, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04936465620994568, - "step": 45, - "valid_targets_mean": 3365.9, - "valid_targets_min": 901 + "loss_rank_avg": 0.16806533932685852, + "step": 225, + "valid_targets_mean": 5594.2, + "valid_targets_min": 1290 }, { - "epoch": 0.18632911392405063, - "grad_norm": 0.6455809632367707, - "learning_rate": 1.4516129032258066e-05, - "loss": 0.4608, + "epoch": 0.23291139240506328, + "grad_norm": 0.35442807757680916, + "learning_rate": 1.8542510121457492e-05, + "loss": 0.3466, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05207091197371483, - "step": 46, - "valid_targets_mean": 4055.6, - "valid_targets_min": 925 + "loss_rank_avg": 0.20088203251361847, + "step": 230, + "valid_targets_mean": 4364.5, + "valid_targets_min": 1537 }, { - "epoch": 0.19037974683544304, - "grad_norm": 0.5346587224021531, - "learning_rate": 1.4838709677419357e-05, - "loss": 0.4522, + "epoch": 0.2379746835443038, + "grad_norm": 0.32766837251688363, + "learning_rate": 1.894736842105263e-05, + "loss": 0.3597, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05411246418952942, - "step": 47, - "valid_targets_mean": 5068.6, - "valid_targets_min": 1370 + "loss_rank_avg": 0.12136702239513397, + "step": 235, + "valid_targets_mean": 2704.7, + "valid_targets_min": 1252 }, { - "epoch": 0.19443037974683544, - "grad_norm": 0.4627343813521031, - "learning_rate": 1.5161290322580646e-05, - "loss": 0.44, + "epoch": 0.2430379746835443, + "grad_norm": 0.3298038015043137, + "learning_rate": 1.9352226720647776e-05, + "loss": 0.3312, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07487696409225464, - "step": 48, - "valid_targets_mean": 4657.8, - "valid_targets_min": 775 + "loss_rank_avg": 0.17166192829608917, + "step": 240, + "valid_targets_mean": 4605.7, + "valid_targets_min": 1445 }, { - "epoch": 0.19848101265822785, - "grad_norm": 0.5934753153922505, - "learning_rate": 1.5483870967741936e-05, - "loss": 0.4625, + "epoch": 0.2481012658227848, + "grad_norm": 0.2811058024334916, + "learning_rate": 1.9757085020242915e-05, + "loss": 0.3647, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05100386589765549, - "step": 49, - "valid_targets_mean": 4084.1, - "valid_targets_min": 854 + "loss_rank_avg": 0.17767693102359772, + "step": 245, + "valid_targets_mean": 5949.4, + "valid_targets_min": 992 }, { - "epoch": 0.20253164556962025, - "grad_norm": 0.5498959721358364, - "learning_rate": 1.5806451612903226e-05, - "loss": 0.4518, + "epoch": 0.25316455696202533, + "grad_norm": 0.3238895469625763, + "learning_rate": 2.016194331983806e-05, + "loss": 0.3542, "loss_nan_ranks": 0, - "loss_rank_avg": 0.048061393201351166, - "step": 50, - "valid_targets_mean": 4037.7, - "valid_targets_min": 1066 + "loss_rank_avg": 0.1756962537765503, + "step": 250, + "valid_targets_mean": 4702.4, + "valid_targets_min": 1101 }, { - "epoch": 0.20658227848101265, - "grad_norm": 0.5153204230281868, - "learning_rate": 1.6129032258064517e-05, - "loss": 0.4322, + "epoch": 0.2582278481012658, + "grad_norm": 0.30654096002239845, + "learning_rate": 2.05668016194332e-05, + "loss": 0.3556, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0640299916267395, - "step": 51, - "valid_targets_mean": 5563.1, - "valid_targets_min": 1095 + "loss_rank_avg": 0.18253913521766663, + "step": 255, + "valid_targets_mean": 5601.0, + "valid_targets_min": 977 }, { - "epoch": 0.21063291139240506, - "grad_norm": 0.4838663333830291, - "learning_rate": 1.6451612903225807e-05, - "loss": 0.4507, + "epoch": 0.26329113924050634, + "grad_norm": 0.33818696625352207, + "learning_rate": 2.097165991902834e-05, + "loss": 0.3604, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04705619812011719, - "step": 52, - "valid_targets_mean": 4156.1, - "valid_targets_min": 1509 + "loss_rank_avg": 0.20197951793670654, + "step": 260, + "valid_targets_mean": 4999.4, + "valid_targets_min": 859 }, { - "epoch": 0.21468354430379746, - "grad_norm": 0.5662923684942928, - "learning_rate": 1.6774193548387098e-05, - "loss": 0.454, + "epoch": 0.2683544303797468, + "grad_norm": 0.3221593835641881, + "learning_rate": 2.1376518218623487e-05, + "loss": 0.3356, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06045686453580856, - "step": 53, - "valid_targets_mean": 4749.8, - "valid_targets_min": 1347 + "loss_rank_avg": 0.1421291083097458, + "step": 265, + "valid_targets_mean": 3732.8, + "valid_targets_min": 1159 }, { - "epoch": 0.21873417721518987, - "grad_norm": 0.49698527661841424, - "learning_rate": 1.7096774193548388e-05, - "loss": 0.4434, + "epoch": 0.27341772151898736, + "grad_norm": 0.3467001085595146, + "learning_rate": 2.1781376518218625e-05, + "loss": 0.3541, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04809245467185974, - "step": 54, - "valid_targets_mean": 4022.0, - "valid_targets_min": 1166 + "loss_rank_avg": 0.19689059257507324, + "step": 270, + "valid_targets_mean": 4544.8, + "valid_targets_min": 1627 }, { - "epoch": 0.22278481012658227, - "grad_norm": 0.5745339911953643, - "learning_rate": 1.741935483870968e-05, - "loss": 0.4205, + "epoch": 0.27848101265822783, + "grad_norm": 0.3893966405259359, + "learning_rate": 2.2186234817813767e-05, + "loss": 0.3502, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05455455929040909, - "step": 55, - "valid_targets_mean": 5457.8, - "valid_targets_min": 1092 + "loss_rank_avg": 0.17629346251487732, + "step": 275, + "valid_targets_mean": 3116.6, + "valid_targets_min": 1066 }, { - "epoch": 0.22683544303797468, - "grad_norm": 0.45417778196816644, - "learning_rate": 1.774193548387097e-05, - "loss": 0.4309, + "epoch": 0.28354430379746837, + "grad_norm": 0.34248011251805455, + "learning_rate": 2.2591093117408906e-05, + "loss": 0.3517, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0891185849905014, - "step": 56, - "valid_targets_mean": 6185.5, - "valid_targets_min": 846 + "loss_rank_avg": 0.1394365131855011, + "step": 280, + "valid_targets_mean": 3400.1, + "valid_targets_min": 1015 }, { - "epoch": 0.23088607594936708, - "grad_norm": 0.5571432526973803, - "learning_rate": 1.806451612903226e-05, - "loss": 0.4281, + "epoch": 0.28860759493670884, + "grad_norm": 0.34292015860444375, + "learning_rate": 2.299595141700405e-05, + "loss": 0.3441, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04635221138596535, - "step": 57, - "valid_targets_mean": 4196.2, - "valid_targets_min": 1384 + "loss_rank_avg": 0.18773144483566284, + "step": 285, + "valid_targets_mean": 4286.1, + "valid_targets_min": 1425 }, { - "epoch": 0.23493670886075949, - "grad_norm": 0.5064870878860718, - "learning_rate": 1.838709677419355e-05, - "loss": 0.4379, + "epoch": 0.2936708860759494, + "grad_norm": 0.3527832328000518, + "learning_rate": 2.3400809716599193e-05, + "loss": 0.3441, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04302515089511871, - "step": 58, - "valid_targets_mean": 3178.4, - "valid_targets_min": 995 + "loss_rank_avg": 0.18093225359916687, + "step": 290, + "valid_targets_mean": 4568.9, + "valid_targets_min": 1281 }, { - "epoch": 0.2389873417721519, - "grad_norm": 0.5103891846350429, - "learning_rate": 1.870967741935484e-05, - "loss": 0.4347, + "epoch": 0.29873417721518986, + "grad_norm": 0.3222755455970598, + "learning_rate": 2.3805668016194332e-05, + "loss": 0.3363, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06216754764318466, - "step": 59, - "valid_targets_mean": 4990.6, - "valid_targets_min": 1463 + "loss_rank_avg": 0.13850252330303192, + "step": 295, + "valid_targets_mean": 3603.9, + "valid_targets_min": 1101 }, { - "epoch": 0.2430379746835443, - "grad_norm": 0.4708904538255792, - "learning_rate": 1.903225806451613e-05, - "loss": 0.4123, + "epoch": 0.3037974683544304, + "grad_norm": 0.3081645393207173, + "learning_rate": 2.4210526315789474e-05, + "loss": 0.3523, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0597774013876915, - "step": 60, - "valid_targets_mean": 4607.7, - "valid_targets_min": 1447 + "loss_rank_avg": 0.1911936104297638, + "step": 300, + "valid_targets_mean": 6009.3, + "valid_targets_min": 840 }, { - "epoch": 0.2470886075949367, - "grad_norm": 0.42765870481541046, - "learning_rate": 1.935483870967742e-05, - "loss": 0.4434, + "epoch": 0.30886075949367087, + "grad_norm": 0.29187368241351513, + "learning_rate": 2.461538461538462e-05, + "loss": 0.3484, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0484953448176384, - "step": 61, - "valid_targets_mean": 3862.9, - "valid_targets_min": 894 + "loss_rank_avg": 0.17911839485168457, + "step": 305, + "valid_targets_mean": 7036.5, + "valid_targets_min": 1310 }, { - "epoch": 0.25113924050632913, - "grad_norm": 0.48927429380395465, - "learning_rate": 1.967741935483871e-05, - "loss": 0.4343, + "epoch": 0.3139240506329114, + "grad_norm": 0.49985981352299386, + "learning_rate": 2.5020242914979758e-05, + "loss": 0.3363, "loss_nan_ranks": 0, - "loss_rank_avg": 0.035868145525455475, - "step": 62, - "valid_targets_mean": 3523.4, - "valid_targets_min": 1009 + "loss_rank_avg": 0.16296392679214478, + "step": 310, + "valid_targets_mean": 3887.8, + "valid_targets_min": 1192 }, { - "epoch": 0.25518987341772154, - "grad_norm": 0.48899816472824276, - "learning_rate": 2e-05, - "loss": 0.4381, + "epoch": 0.3189873417721519, + "grad_norm": 0.38261294484090613, + "learning_rate": 2.54251012145749e-05, + "loss": 0.3454, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05651945248246193, - "step": 63, - "valid_targets_mean": 4210.9, - "valid_targets_min": 1536 + "loss_rank_avg": 0.15782615542411804, + "step": 315, + "valid_targets_mean": 3376.2, + "valid_targets_min": 1185 }, { - "epoch": 0.25924050632911394, - "grad_norm": 0.49834976591205526, - "learning_rate": 2.0322580645161292e-05, - "loss": 0.4396, + "epoch": 0.3240506329113924, + "grad_norm": 0.45061664130339874, + "learning_rate": 2.5829959514170046e-05, + "loss": 0.3502, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05972347408533096, - "step": 64, - "valid_targets_mean": 5685.6, - "valid_targets_min": 1421 + "loss_rank_avg": 0.1975603699684143, + "step": 320, + "valid_targets_mean": 5570.6, + "valid_targets_min": 1866 }, { - "epoch": 0.26329113924050634, - "grad_norm": 0.5160400190152858, - "learning_rate": 2.0645161290322582e-05, - "loss": 0.4395, + "epoch": 0.3291139240506329, + "grad_norm": 0.3182602149268292, + "learning_rate": 2.6234817813765184e-05, + "loss": 0.3351, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06172401085495949, - "step": 65, - "valid_targets_mean": 5001.4, - "valid_targets_min": 861 + "loss_rank_avg": 0.14342699944972992, + "step": 325, + "valid_targets_mean": 3784.2, + "valid_targets_min": 849 }, { - "epoch": 0.26734177215189875, - "grad_norm": 0.5490855404405546, - "learning_rate": 2.0967741935483873e-05, - "loss": 0.4079, + "epoch": 0.3341772151898734, + "grad_norm": 0.3254644437199896, + "learning_rate": 2.6639676113360326e-05, + "loss": 0.3501, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05667848885059357, - "step": 66, - "valid_targets_mean": 3961.4, - "valid_targets_min": 1510 + "loss_rank_avg": 0.16729095578193665, + "step": 330, + "valid_targets_mean": 3966.9, + "valid_targets_min": 1392 }, { - "epoch": 0.27139240506329115, - "grad_norm": 0.5187677100204087, - "learning_rate": 2.1290322580645163e-05, - "loss": 0.4285, + "epoch": 0.3392405063291139, + "grad_norm": 0.32206841245268697, + "learning_rate": 2.704453441295547e-05, + "loss": 0.3476, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04428575187921524, - "step": 67, - "valid_targets_mean": 3353.8, - "valid_targets_min": 992 + "loss_rank_avg": 0.1826034039258957, + "step": 335, + "valid_targets_mean": 4157.9, + "valid_targets_min": 1092 }, { - "epoch": 0.27544303797468356, - "grad_norm": 0.5524242109730821, - "learning_rate": 2.161290322580645e-05, - "loss": 0.423, + "epoch": 0.34430379746835443, + "grad_norm": 0.3602273548100487, + "learning_rate": 2.744939271255061e-05, + "loss": 0.3625, "loss_nan_ranks": 0, - "loss_rank_avg": 0.056597426533699036, - "step": 68, - "valid_targets_mean": 4378.8, - "valid_targets_min": 852 + "loss_rank_avg": 0.2222571074962616, + "step": 340, + "valid_targets_mean": 4895.1, + "valid_targets_min": 1074 }, { - "epoch": 0.27949367088607596, - "grad_norm": 0.6044648675848316, - "learning_rate": 2.193548387096774e-05, - "loss": 0.4135, + "epoch": 0.3493670886075949, + "grad_norm": 0.3392767743330912, + "learning_rate": 2.7854251012145752e-05, + "loss": 0.3455, "loss_nan_ranks": 0, - "loss_rank_avg": 0.037096381187438965, - "step": 69, - "valid_targets_mean": 3259.7, - "valid_targets_min": 1333 + "loss_rank_avg": 0.13851270079612732, + "step": 345, + "valid_targets_mean": 3250.4, + "valid_targets_min": 1001 }, { - "epoch": 0.28354430379746837, - "grad_norm": 0.6375788523437971, - "learning_rate": 2.225806451612903e-05, - "loss": 0.4445, + "epoch": 0.35443037974683544, + "grad_norm": 0.32803869222957094, + "learning_rate": 2.825910931174089e-05, + "loss": 0.362, "loss_nan_ranks": 0, - "loss_rank_avg": 0.044327497482299805, - "step": 70, - "valid_targets_mean": 3402.1, - "valid_targets_min": 1017 + "loss_rank_avg": 0.18585026264190674, + "step": 350, + "valid_targets_mean": 4607.0, + "valid_targets_min": 1164 }, { - "epoch": 0.28759493670886077, - "grad_norm": 0.7420187981275729, - "learning_rate": 2.2580645161290328e-05, - "loss": 0.4288, + "epoch": 0.3594936708860759, + "grad_norm": 0.2960837838285842, + "learning_rate": 2.8663967611336033e-05, + "loss": 0.3539, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0775013118982315, - "step": 71, - "valid_targets_mean": 5517.9, - "valid_targets_min": 1512 + "loss_rank_avg": 0.23394142091274261, + "step": 355, + "valid_targets_mean": 8210.9, + "valid_targets_min": 1637 }, { - "epoch": 0.2916455696202532, - "grad_norm": 0.594231468226171, - "learning_rate": 2.2903225806451615e-05, - "loss": 0.3961, + "epoch": 0.36455696202531646, + "grad_norm": 0.3694811833098562, + "learning_rate": 2.906882591093118e-05, + "loss": 0.3468, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05749458447098732, - "step": 72, - "valid_targets_mean": 4559.2, - "valid_targets_min": 1204 + "loss_rank_avg": 0.16552570462226868, + "step": 360, + "valid_targets_mean": 2960.9, + "valid_targets_min": 1184 }, { - "epoch": 0.2956962025316456, - "grad_norm": 0.6876765721414142, - "learning_rate": 2.3225806451612906e-05, - "loss": 0.4297, + "epoch": 0.369620253164557, + "grad_norm": 0.31999701209701786, + "learning_rate": 2.9473684210526317e-05, + "loss": 0.3466, "loss_nan_ranks": 0, - "loss_rank_avg": 0.059393130242824554, - "step": 73, - "valid_targets_mean": 4895.0, - "valid_targets_min": 1425 + "loss_rank_avg": 0.16295602917671204, + "step": 365, + "valid_targets_mean": 3841.0, + "valid_targets_min": 1068 }, { - "epoch": 0.299746835443038, - "grad_norm": 0.7313997502730675, - "learning_rate": 2.3548387096774196e-05, - "loss": 0.4106, + "epoch": 0.37468354430379747, + "grad_norm": 0.29571436256727346, + "learning_rate": 2.987854251012146e-05, + "loss": 0.3567, "loss_nan_ranks": 0, - "loss_rank_avg": 0.055824242532253265, - "step": 74, - "valid_targets_mean": 5374.9, - "valid_targets_min": 958 + "loss_rank_avg": 0.19385173916816711, + "step": 370, + "valid_targets_mean": 5434.1, + "valid_targets_min": 904 }, { - "epoch": 0.3037974683544304, - "grad_norm": 0.5852457029426907, - "learning_rate": 2.3870967741935486e-05, - "loss": 0.4299, + "epoch": 0.379746835443038, + "grad_norm": 0.2794237498150064, + "learning_rate": 3.0283400809716605e-05, + "loss": 0.3384, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06372793763875961, - "step": 75, - "valid_targets_mean": 5972.1, - "valid_targets_min": 842 + "loss_rank_avg": 0.17324280738830566, + "step": 375, + "valid_targets_mean": 6533.0, + "valid_targets_min": 914 }, { - "epoch": 0.3078481012658228, - "grad_norm": 0.6251085254030976, - "learning_rate": 2.4193548387096777e-05, - "loss": 0.4299, + "epoch": 0.3848101265822785, + "grad_norm": 0.46097531086569304, + "learning_rate": 3.068825910931174e-05, + "loss": 0.3441, "loss_nan_ranks": 0, - "loss_rank_avg": 0.044161923229694366, - "step": 76, - "valid_targets_mean": 4310.9, - "valid_targets_min": 770 + "loss_rank_avg": 0.1612202525138855, + "step": 380, + "valid_targets_mean": 4019.0, + "valid_targets_min": 1578 }, { - "epoch": 0.3118987341772152, - "grad_norm": 0.5948103719382858, - "learning_rate": 2.4516129032258067e-05, - "loss": 0.4053, + "epoch": 0.389873417721519, + "grad_norm": 0.33386509179906093, + "learning_rate": 3.1093117408906885e-05, + "loss": 0.3495, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05264626815915108, - "step": 77, - "valid_targets_mean": 4628.0, - "valid_targets_min": 1385 + "loss_rank_avg": 0.14520494639873505, + "step": 385, + "valid_targets_mean": 3415.9, + "valid_targets_min": 1118 }, { - "epoch": 0.3159493670886076, - "grad_norm": 0.604033977230391, - "learning_rate": 2.4838709677419358e-05, - "loss": 0.4179, + "epoch": 0.3949367088607595, + "grad_norm": 0.3319526543378725, + "learning_rate": 3.149797570850203e-05, + "loss": 0.3487, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05454590916633606, - "step": 78, - "valid_targets_mean": 5104.9, - "valid_targets_min": 1642 + "loss_rank_avg": 0.1795864999294281, + "step": 390, + "valid_targets_mean": 4239.8, + "valid_targets_min": 991 }, { - "epoch": 0.32, - "grad_norm": 0.6370412410362083, - "learning_rate": 2.5161290322580648e-05, - "loss": 0.4143, + "epoch": 0.4, + "grad_norm": 0.3406949335779596, + "learning_rate": 3.190283400809717e-05, + "loss": 0.3384, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05205424875020981, - "step": 79, - "valid_targets_mean": 3922.1, - "valid_targets_min": 896 + "loss_rank_avg": 0.16871468722820282, + "step": 395, + "valid_targets_mean": 3474.5, + "valid_targets_min": 1133 }, { - "epoch": 0.3240506329113924, - "grad_norm": 0.47243769878404057, - "learning_rate": 2.548387096774194e-05, - "loss": 0.4225, + "epoch": 0.4050632911392405, + "grad_norm": 0.36838681719013816, + "learning_rate": 3.230769230769231e-05, + "loss": 0.3522, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05829702690243721, - "step": 80, - "valid_targets_mean": 5457.8, - "valid_targets_min": 1868 + "loss_rank_avg": 0.1932060271501541, + "step": 400, + "valid_targets_mean": 3488.6, + "valid_targets_min": 1164 }, { - "epoch": 0.3281012658227848, - "grad_norm": 0.5815081489368484, - "learning_rate": 2.5806451612903226e-05, - "loss": 0.4065, + "epoch": 0.41012658227848103, + "grad_norm": 0.35042164458126773, + "learning_rate": 3.271255060728745e-05, + "loss": 0.3588, "loss_nan_ranks": 0, - "loss_rank_avg": 0.042076289653778076, - "step": 81, - "valid_targets_mean": 4102.0, - "valid_targets_min": 1411 + "loss_rank_avg": 0.17853647470474243, + "step": 405, + "valid_targets_mean": 3956.2, + "valid_targets_min": 1293 }, { - "epoch": 0.3321518987341772, - "grad_norm": 0.5225876956461049, - "learning_rate": 2.6129032258064516e-05, - "loss": 0.4081, + "epoch": 0.4151898734177215, + "grad_norm": 0.3489327841744588, + "learning_rate": 3.311740890688259e-05, + "loss": 0.3585, "loss_nan_ranks": 0, - "loss_rank_avg": 0.046641163527965546, - "step": 82, - "valid_targets_mean": 4011.5, - "valid_targets_min": 1200 + "loss_rank_avg": 0.16795890033245087, + "step": 410, + "valid_targets_mean": 3783.9, + "valid_targets_min": 1206 }, { - "epoch": 0.3362025316455696, - "grad_norm": 0.4677338333165761, - "learning_rate": 2.6451612903225806e-05, - "loss": 0.4089, + "epoch": 0.42025316455696204, + "grad_norm": 0.3454446525690692, + "learning_rate": 3.352226720647774e-05, + "loss": 0.3516, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06551804393529892, - "step": 83, - "valid_targets_mean": 5499.8, - "valid_targets_min": 1338 + "loss_rank_avg": 0.2117060422897339, + "step": 415, + "valid_targets_mean": 4945.9, + "valid_targets_min": 1096 }, { - "epoch": 0.34025316455696203, - "grad_norm": 0.614764413868001, - "learning_rate": 2.6774193548387097e-05, - "loss": 0.441, + "epoch": 0.4253164556962025, + "grad_norm": 0.3098825126338765, + "learning_rate": 3.3927125506072876e-05, + "loss": 0.3574, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05876109376549721, - "step": 84, - "valid_targets_mean": 4796.1, - "valid_targets_min": 1326 + "loss_rank_avg": 0.20046228170394897, + "step": 420, + "valid_targets_mean": 6020.4, + "valid_targets_min": 1220 }, { - "epoch": 0.34430379746835443, - "grad_norm": 0.6457657603329191, - "learning_rate": 2.7096774193548387e-05, - "loss": 0.4321, + "epoch": 0.43037974683544306, + "grad_norm": 0.37532962252838115, + "learning_rate": 3.433198380566802e-05, + "loss": 0.3467, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06244055926799774, - "step": 85, - "valid_targets_mean": 4897.1, - "valid_targets_min": 1076 + "loss_rank_avg": 0.1575276255607605, + "step": 425, + "valid_targets_mean": 3148.8, + "valid_targets_min": 1434 }, { - "epoch": 0.34835443037974684, - "grad_norm": 0.8450721977557598, - "learning_rate": 2.7419354838709678e-05, - "loss": 0.4159, + "epoch": 0.43544303797468353, + "grad_norm": 0.41731233709453813, + "learning_rate": 3.473684210526316e-05, + "loss": 0.3536, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02325458824634552, - "step": 86, - "valid_targets_mean": 2360.2, - "valid_targets_min": 1064 + "loss_rank_avg": 0.18151873350143433, + "step": 430, + "valid_targets_mean": 4121.1, + "valid_targets_min": 882 }, { - "epoch": 0.35240506329113924, - "grad_norm": 0.9376224307797884, - "learning_rate": 2.774193548387097e-05, - "loss": 0.4275, + "epoch": 0.44050632911392407, + "grad_norm": 0.2834549547224196, + "learning_rate": 3.51417004048583e-05, + "loss": 0.3347, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07406799495220184, - "step": 87, - "valid_targets_mean": 4577.8, - "valid_targets_min": 1642 + "loss_rank_avg": 0.21482950448989868, + "step": 435, + "valid_targets_mean": 7190.4, + "valid_targets_min": 980 }, { - "epoch": 0.35645569620253165, - "grad_norm": 0.8814792501538088, - "learning_rate": 2.8064516129032262e-05, - "loss": 0.428, + "epoch": 0.44556962025316454, + "grad_norm": 0.30073688146062255, + "learning_rate": 3.5546558704453444e-05, + "loss": 0.3511, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07024728506803513, - "step": 88, - "valid_targets_mean": 6062.3, - "valid_targets_min": 1235 + "loss_rank_avg": 0.1362500786781311, + "step": 440, + "valid_targets_mean": 4360.6, + "valid_targets_min": 1118 }, { - "epoch": 0.36050632911392405, - "grad_norm": 0.9349922865807039, - "learning_rate": 2.8387096774193552e-05, - "loss": 0.4334, + "epoch": 0.4506329113924051, + "grad_norm": 0.3750594432228394, + "learning_rate": 3.595141700404859e-05, + "loss": 0.3531, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0638594776391983, - "step": 89, - "valid_targets_mean": 5731.2, - "valid_targets_min": 1500 + "loss_rank_avg": 0.20675930380821228, + "step": 445, + "valid_targets_mean": 4531.2, + "valid_targets_min": 1680 }, { - "epoch": 0.36455696202531646, - "grad_norm": 0.7683719797658665, - "learning_rate": 2.8709677419354843e-05, - "loss": 0.4095, + "epoch": 0.45569620253164556, + "grad_norm": 0.3568782095433956, + "learning_rate": 3.635627530364373e-05, + "loss": 0.3454, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0370856411755085, - "step": 90, - "valid_targets_mean": 2962.9, - "valid_targets_min": 1186 + "loss_rank_avg": 0.17187099158763885, + "step": 450, + "valid_targets_mean": 4279.3, + "valid_targets_min": 1210 }, { - "epoch": 0.36860759493670886, - "grad_norm": 0.829622404761209, - "learning_rate": 2.9032258064516133e-05, - "loss": 0.4239, + "epoch": 0.4607594936708861, + "grad_norm": 0.31010973542926373, + "learning_rate": 3.676113360323887e-05, + "loss": 0.349, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06027737632393837, - "step": 91, - "valid_targets_mean": 5650.5, - "valid_targets_min": 1201 + "loss_rank_avg": 0.1888793706893921, + "step": 455, + "valid_targets_mean": 5464.9, + "valid_targets_min": 540 }, { - "epoch": 0.37265822784810126, - "grad_norm": 0.8014182001734745, - "learning_rate": 2.9354838709677423e-05, - "loss": 0.4352, + "epoch": 0.46582278481012657, + "grad_norm": 0.30672819186326156, + "learning_rate": 3.716599190283401e-05, + "loss": 0.3369, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05477523431181908, - "step": 92, - "valid_targets_mean": 4282.9, - "valid_targets_min": 1167 + "loss_rank_avg": 0.19805072247982025, + "step": 460, + "valid_targets_mean": 5570.1, + "valid_targets_min": 1257 }, { - "epoch": 0.37670886075949367, - "grad_norm": 0.6733027367606301, - "learning_rate": 2.9677419354838714e-05, - "loss": 0.4088, + "epoch": 0.4708860759493671, + "grad_norm": 0.33237701694635524, + "learning_rate": 3.757085020242915e-05, + "loss": 0.3443, "loss_nan_ranks": 0, - "loss_rank_avg": 0.057788364589214325, - "step": 93, - "valid_targets_mean": 4786.1, - "valid_targets_min": 1266 + "loss_rank_avg": 0.17327673733234406, + "step": 465, + "valid_targets_mean": 4441.7, + "valid_targets_min": 1062 }, { - "epoch": 0.3807594936708861, - "grad_norm": 0.727168788839206, - "learning_rate": 3.0000000000000004e-05, - "loss": 0.4044, + "epoch": 0.4759493670886076, + "grad_norm": 0.3157269440092307, + "learning_rate": 3.7975708502024296e-05, + "loss": 0.3523, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04125818610191345, - "step": 94, - "valid_targets_mean": 3322.1, - "valid_targets_min": 1565 + "loss_rank_avg": 0.2036791741847992, + "step": 470, + "valid_targets_mean": 5480.2, + "valid_targets_min": 1147 }, { - "epoch": 0.3848101265822785, - "grad_norm": 0.6935011193456103, - "learning_rate": 3.032258064516129e-05, - "loss": 0.4165, + "epoch": 0.4810126582278481, + "grad_norm": 0.3385448591385259, + "learning_rate": 3.8380566801619435e-05, + "loss": 0.3254, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04848355054855347, - "step": 95, - "valid_targets_mean": 4021.0, - "valid_targets_min": 1580 + "loss_rank_avg": 0.18548253178596497, + "step": 475, + "valid_targets_mean": 5158.2, + "valid_targets_min": 1322 }, { - "epoch": 0.3888607594936709, - "grad_norm": 0.6682798377596265, - "learning_rate": 3.0645161290322585e-05, - "loss": 0.415, + "epoch": 0.4860759493670886, + "grad_norm": 0.3229813339259753, + "learning_rate": 3.878542510121458e-05, + "loss": 0.3539, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04788028076291084, - "step": 96, - "valid_targets_mean": 4199.7, - "valid_targets_min": 1270 + "loss_rank_avg": 0.19486048817634583, + "step": 480, + "valid_targets_mean": 5207.5, + "valid_targets_min": 1234 }, { - "epoch": 0.3929113924050633, - "grad_norm": 0.6246593721293577, - "learning_rate": 3.096774193548387e-05, - "loss": 0.417, + "epoch": 0.4911392405063291, + "grad_norm": 0.39888632177896866, + "learning_rate": 3.919028340080972e-05, + "loss": 0.3367, "loss_nan_ranks": 0, - "loss_rank_avg": 0.038797035813331604, - "step": 97, - "valid_targets_mean": 3773.2, - "valid_targets_min": 925 + "loss_rank_avg": 0.15041330456733704, + "step": 485, + "valid_targets_mean": 2910.4, + "valid_targets_min": 826 }, { - "epoch": 0.3969620253164557, - "grad_norm": 0.5861274809105809, - "learning_rate": 3.1290322580645166e-05, - "loss": 0.4017, + "epoch": 0.4962025316455696, + "grad_norm": 0.3216838457166851, + "learning_rate": 3.959514170040486e-05, + "loss": 0.3535, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04115966707468033, - "step": 98, - "valid_targets_mean": 3950.2, - "valid_targets_min": 1396 + "loss_rank_avg": 0.18031927943229675, + "step": 490, + "valid_targets_mean": 5146.9, + "valid_targets_min": 1318 }, { - "epoch": 0.4010126582278481, - "grad_norm": 0.7103528215779562, - "learning_rate": 3.161290322580645e-05, - "loss": 0.431, + "epoch": 0.5012658227848101, + "grad_norm": 0.3463812343689675, + "learning_rate": 4e-05, + "loss": 0.3538, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05843929946422577, - "step": 99, - "valid_targets_mean": 4570.2, - "valid_targets_min": 1129 + "loss_rank_avg": 0.13655783236026764, + "step": 495, + "valid_targets_mean": 3592.6, + "valid_targets_min": 977 }, { - "epoch": 0.4050632911392405, - "grad_norm": 0.5907411603216484, - "learning_rate": 3.193548387096774e-05, - "loss": 0.4079, + "epoch": 0.5063291139240507, + "grad_norm": 0.36468197040198047, + "learning_rate": 3.999987517534179e-05, + "loss": 0.3298, "loss_nan_ranks": 0, - "loss_rank_avg": 0.041239671409130096, - "step": 100, - "valid_targets_mean": 3490.6, - "valid_targets_min": 1166 + "loss_rank_avg": 0.17994073033332825, + "step": 500, + "valid_targets_mean": 3419.4, + "valid_targets_min": 997 }, { - "epoch": 0.4091139240506329, - "grad_norm": 0.6217864930174553, - "learning_rate": 3.2258064516129034e-05, - "loss": 0.4313, + "epoch": 0.5113924050632911, + "grad_norm": 0.3123011360513197, + "learning_rate": 3.999950070292526e-05, + "loss": 0.3514, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0475708544254303, - "step": 101, - "valid_targets_mean": 4086.8, - "valid_targets_min": 1129 + "loss_rank_avg": 0.13850685954093933, + "step": 505, + "valid_targets_mean": 4406.6, + "valid_targets_min": 1006 }, { - "epoch": 0.4131645569620253, - "grad_norm": 0.5399807835544768, - "learning_rate": 3.258064516129033e-05, - "loss": 0.4161, + "epoch": 0.5164556962025316, + "grad_norm": 0.3110207165348701, + "learning_rate": 3.9998876587424764e-05, + "loss": 0.3437, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06466293334960938, - "step": 102, - "valid_targets_mean": 5693.0, - "valid_targets_min": 808 + "loss_rank_avg": 0.13691280782222748, + "step": 510, + "valid_targets_mean": 4396.4, + "valid_targets_min": 1417 }, { - "epoch": 0.4172151898734177, - "grad_norm": 0.610285856962528, - "learning_rate": 3.2903225806451614e-05, - "loss": 0.4095, + "epoch": 0.5215189873417722, + "grad_norm": 0.32363025722531935, + "learning_rate": 3.99980028366308e-05, + "loss": 0.3569, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04695362597703934, - "step": 103, - "valid_targets_mean": 3831.8, - "valid_targets_min": 1211 + "loss_rank_avg": 0.24637667834758759, + "step": 515, + "valid_targets_mean": 5892.6, + "valid_targets_min": 1208 }, { - "epoch": 0.4212658227848101, - "grad_norm": 0.7251098492949469, - "learning_rate": 3.322580645161291e-05, - "loss": 0.4314, + "epoch": 0.5265822784810127, + "grad_norm": 0.3650846632854969, + "learning_rate": 3.999687946144992e-05, + "loss": 0.3581, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05233541131019592, - "step": 104, - "valid_targets_mean": 4645.6, - "valid_targets_min": 1205 + "loss_rank_avg": 0.18719391524791718, + "step": 520, + "valid_targets_mean": 4166.3, + "valid_targets_min": 1384 }, { - "epoch": 0.4253164556962025, - "grad_norm": 0.5634018976126205, - "learning_rate": 3.3548387096774195e-05, - "loss": 0.42, + "epoch": 0.5316455696202531, + "grad_norm": 0.380513757297641, + "learning_rate": 3.999550647590464e-05, + "loss": 0.3508, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06966076046228409, - "step": 105, - "valid_targets_mean": 6022.4, - "valid_targets_min": 1222 + "loss_rank_avg": 0.21175450086593628, + "step": 525, + "valid_targets_mean": 3908.1, + "valid_targets_min": 1057 }, { - "epoch": 0.4293670886075949, - "grad_norm": 0.5798410623158287, - "learning_rate": 3.387096774193549e-05, - "loss": 0.4009, + "epoch": 0.5367088607594936, + "grad_norm": 0.3303089638050748, + "learning_rate": 3.9993883897133174e-05, + "loss": 0.3344, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05769410356879234, - "step": 106, - "valid_targets_mean": 6083.4, - "valid_targets_min": 1292 + "loss_rank_avg": 0.1324310302734375, + "step": 530, + "valid_targets_mean": 3035.0, + "valid_targets_min": 1176 }, { - "epoch": 0.43341772151898733, - "grad_norm": 0.5843707202381763, - "learning_rate": 3.4193548387096776e-05, - "loss": 0.4214, + "epoch": 0.5417721518987342, + "grad_norm": 0.3065345335498946, + "learning_rate": 3.9992011745389335e-05, + "loss": 0.3564, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04835837706923485, - "step": 107, - "valid_targets_mean": 3786.8, - "valid_targets_min": 1461 + "loss_rank_avg": 0.18349391222000122, + "step": 535, + "valid_targets_mean": 5569.8, + "valid_targets_min": 1419 }, { - "epoch": 0.43746835443037974, - "grad_norm": 0.5780833413976226, - "learning_rate": 3.451612903225807e-05, - "loss": 0.4085, + "epoch": 0.5468354430379747, + "grad_norm": 0.3593697878507127, + "learning_rate": 3.998989004404217e-05, + "loss": 0.3477, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05581241473555565, - "step": 108, - "valid_targets_mean": 5162.2, - "valid_targets_min": 1565 + "loss_rank_avg": 0.1619454324245453, + "step": 540, + "valid_targets_mean": 3932.5, + "valid_targets_min": 1196 }, { - "epoch": 0.44151898734177214, - "grad_norm": 0.5940606633232612, - "learning_rate": 3.483870967741936e-05, - "loss": 0.4078, + "epoch": 0.5518987341772152, + "grad_norm": 0.25234558053757633, + "learning_rate": 3.998751881957576e-05, + "loss": 0.3363, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06986302137374878, - "step": 109, - "valid_targets_mean": 5780.5, - "valid_targets_min": 1673 + "loss_rank_avg": 0.16338235139846802, + "step": 545, + "valid_targets_mean": 5379.0, + "valid_targets_min": 977 }, { - "epoch": 0.44556962025316454, - "grad_norm": 0.5425947044504619, - "learning_rate": 3.516129032258065e-05, - "loss": 0.4127, + "epoch": 0.5569620253164557, + "grad_norm": 0.3445664068863559, + "learning_rate": 3.998489810158883e-05, + "loss": 0.3475, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04262430965900421, - "step": 110, - "valid_targets_mean": 4362.6, - "valid_targets_min": 1120 + "loss_rank_avg": 0.1785704791545868, + "step": 550, + "valid_targets_mean": 3962.2, + "valid_targets_min": 1209 }, { - "epoch": 0.44962025316455695, - "grad_norm": 0.5681614069237215, - "learning_rate": 3.548387096774194e-05, - "loss": 0.4118, + "epoch": 0.5620253164556962, + "grad_norm": 0.3556936347323425, + "learning_rate": 3.99820279227944e-05, + "loss": 0.3345, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07122272253036499, - "step": 111, - "valid_targets_mean": 5264.9, - "valid_targets_min": 1212 + "loss_rank_avg": 0.18062683939933777, + "step": 555, + "valid_targets_mean": 4593.2, + "valid_targets_min": 1615 }, { - "epoch": 0.45367088607594935, - "grad_norm": 0.5261871695912, - "learning_rate": 3.5806451612903225e-05, - "loss": 0.415, + "epoch": 0.5670886075949367, + "grad_norm": 0.3792686598578621, + "learning_rate": 3.997890831901938e-05, + "loss": 0.3413, "loss_nan_ranks": 0, - "loss_rank_avg": 0.049640655517578125, - "step": 112, - "valid_targets_mean": 4602.9, - "valid_targets_min": 1322 + "loss_rank_avg": 0.19577497243881226, + "step": 560, + "valid_targets_mean": 4658.2, + "valid_targets_min": 1474 }, { - "epoch": 0.45772151898734176, - "grad_norm": 0.5753944494929996, - "learning_rate": 3.612903225806452e-05, - "loss": 0.4088, + "epoch": 0.5721518987341773, + "grad_norm": 0.32913492636939423, + "learning_rate": 3.9975539329204116e-05, + "loss": 0.3387, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05468757823109627, - "step": 113, - "valid_targets_mean": 3892.3, - "valid_targets_min": 1006 + "loss_rank_avg": 0.17351575195789337, + "step": 565, + "valid_targets_mean": 4210.6, + "valid_targets_min": 743 }, { - "epoch": 0.46177215189873416, - "grad_norm": 0.510769138277729, - "learning_rate": 3.6451612903225805e-05, - "loss": 0.4168, + "epoch": 0.5772151898734177, + "grad_norm": 0.296950123523885, + "learning_rate": 3.9971920995401905e-05, + "loss": 0.3443, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05141526088118553, - "step": 114, - "valid_targets_mean": 4239.3, - "valid_targets_min": 1187 + "loss_rank_avg": 0.20880410075187683, + "step": 570, + "valid_targets_mean": 6590.1, + "valid_targets_min": 1899 }, { - "epoch": 0.46582278481012657, - "grad_norm": 0.6220564717227475, - "learning_rate": 3.67741935483871e-05, - "loss": 0.3929, + "epoch": 0.5822784810126582, + "grad_norm": 0.34173998557875557, + "learning_rate": 3.996805336277848e-05, + "loss": 0.3508, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05481129512190819, - "step": 115, - "valid_targets_mean": 5462.4, - "valid_targets_min": 1259 + "loss_rank_avg": 0.1291336864233017, + "step": 575, + "valid_targets_mean": 4007.8, + "valid_targets_min": 1380 }, { - "epoch": 0.46987341772151897, - "grad_norm": 0.4626762378034261, - "learning_rate": 3.7096774193548386e-05, - "loss": 0.4059, + "epoch": 0.5873417721518988, + "grad_norm": 0.30049566187413324, + "learning_rate": 3.996393647961143e-05, + "loss": 0.34, "loss_nan_ranks": 0, - "loss_rank_avg": 0.07054520398378372, - "step": 116, - "valid_targets_mean": 5666.0, - "valid_targets_min": 994 + "loss_rank_avg": 0.13710632920265198, + "step": 580, + "valid_targets_mean": 4320.4, + "valid_targets_min": 1110 }, { - "epoch": 0.4739240506329114, - "grad_norm": 0.7539378367732781, - "learning_rate": 3.741935483870968e-05, - "loss": 0.4216, + "epoch": 0.5924050632911393, + "grad_norm": 0.3451273067980245, + "learning_rate": 3.995957039728962e-05, + "loss": 0.3456, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06530013680458069, - "step": 117, - "valid_targets_mean": 5578.9, - "valid_targets_min": 1256 + "loss_rank_avg": 0.18673017621040344, + "step": 585, + "valid_targets_mean": 4318.7, + "valid_targets_min": 1318 }, { - "epoch": 0.4779746835443038, - "grad_norm": 0.592412643415671, - "learning_rate": 3.7741935483870974e-05, - "loss": 0.3899, + "epoch": 0.5974683544303797, + "grad_norm": 0.3248655593711836, + "learning_rate": 3.9954955170312504e-05, + "loss": 0.335, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02674018032848835, - "step": 118, - "valid_targets_mean": 3557.8, - "valid_targets_min": 970 + "loss_rank_avg": 0.15301413834095, + "step": 590, + "valid_targets_mean": 4103.9, + "valid_targets_min": 1260 }, { - "epoch": 0.4820253164556962, - "grad_norm": 0.7086278514415122, - "learning_rate": 3.806451612903226e-05, - "loss": 0.4068, + "epoch": 0.6025316455696202, + "grad_norm": 0.3874469407061254, + "learning_rate": 3.995009085628951e-05, + "loss": 0.3417, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03874162957072258, - "step": 119, - "valid_targets_mean": 3451.5, - "valid_targets_min": 1390 + "loss_rank_avg": 0.17825846374034882, + "step": 595, + "valid_targets_mean": 3280.8, + "valid_targets_min": 1303 }, { - "epoch": 0.4860759493670886, - "grad_norm": 0.8676643374155362, - "learning_rate": 3.8387096774193555e-05, - "loss": 0.4195, + "epoch": 0.6075949367088608, + "grad_norm": 0.32514023580791557, + "learning_rate": 3.994497751593927e-05, + "loss": 0.3417, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05797410011291504, - "step": 120, - "valid_targets_mean": 5209.5, - "valid_targets_min": 1236 + "loss_rank_avg": 0.17784975469112396, + "step": 600, + "valid_targets_mean": 4897.2, + "valid_targets_min": 1343 }, { - "epoch": 0.490126582278481, - "grad_norm": 0.9409109428169281, - "learning_rate": 3.870967741935484e-05, - "loss": 0.3931, + "epoch": 0.6126582278481013, + "grad_norm": 0.32015309031323297, + "learning_rate": 3.9939615213088865e-05, + "loss": 0.3461, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04078049957752228, - "step": 121, - "valid_targets_mean": 3740.5, - "valid_targets_min": 1056 + "loss_rank_avg": 0.1702687293291092, + "step": 605, + "valid_targets_mean": 5160.1, + "valid_targets_min": 787 }, { - "epoch": 0.4941772151898734, - "grad_norm": 0.7804190583394841, - "learning_rate": 3.9032258064516135e-05, - "loss": 0.4203, + "epoch": 0.6177215189873417, + "grad_norm": 0.29531423820767827, + "learning_rate": 3.993400401467308e-05, + "loss": 0.362, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04113798215985298, - "step": 122, - "valid_targets_mean": 3469.4, - "valid_targets_min": 1310 + "loss_rank_avg": 0.1590127795934677, + "step": 610, + "valid_targets_mean": 5178.2, + "valid_targets_min": 1187 }, { - "epoch": 0.4982278481012658, - "grad_norm": 0.6976532711479004, - "learning_rate": 3.935483870967742e-05, - "loss": 0.413, + "epoch": 0.6227848101265823, + "grad_norm": 0.30956228742996816, + "learning_rate": 3.992814399073349e-05, + "loss": 0.3358, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04702591896057129, - "step": 123, - "valid_targets_mean": 4705.9, - "valid_targets_min": 1178 + "loss_rank_avg": 0.1777225136756897, + "step": 615, + "valid_targets_mean": 5224.1, + "valid_targets_min": 1044 }, { - "epoch": 0.5022784810126583, - "grad_norm": 0.6970124689802406, - "learning_rate": 3.9677419354838716e-05, - "loss": 0.4229, + "epoch": 0.6278481012658228, + "grad_norm": 0.2946569197224124, + "learning_rate": 3.992203521441765e-05, + "loss": 0.3566, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0377708300948143, - "step": 124, - "valid_targets_mean": 3650.1, - "valid_targets_min": 1443 + "loss_rank_avg": 0.16454584896564484, + "step": 620, + "valid_targets_mean": 5873.6, + "valid_targets_min": 1522 }, { - "epoch": 0.5063291139240507, - "grad_norm": 0.6067040651055834, - "learning_rate": 4e-05, - "loss": 0.3816, + "epoch": 0.6329113924050633, + "grad_norm": 0.3566548256792116, + "learning_rate": 3.991567776197815e-05, + "loss": 0.3423, "loss_nan_ranks": 0, - "loss_rank_avg": 0.044166043400764465, - "step": 125, - "valid_targets_mean": 3421.4, - "valid_targets_min": 999 + "loss_rank_avg": 0.1757839322090149, + "step": 625, + "valid_targets_mean": 5280.0, + "valid_targets_min": 1179 }, { - "epoch": 0.5103797468354431, - "grad_norm": 0.7312519375483681, - "learning_rate": 3.9999920040266474e-05, - "loss": 0.4079, + "epoch": 0.6379746835443038, + "grad_norm": 0.34702302453822725, + "learning_rate": 3.990907171277168e-05, + "loss": 0.3406, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05348426103591919, - "step": 126, - "valid_targets_mean": 3932.2, - "valid_targets_min": 944 + "loss_rank_avg": 0.16491375863552094, + "step": 630, + "valid_targets_mean": 3738.1, + "valid_targets_min": 1437 }, { - "epoch": 0.5144303797468355, - "grad_norm": 0.6629876905385923, - "learning_rate": 3.999968016170525e-05, - "loss": 0.408, + "epoch": 0.6430379746835443, + "grad_norm": 0.3122025130697409, + "learning_rate": 3.990221714925802e-05, + "loss": 0.3425, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04648736119270325, - "step": 127, - "valid_targets_mean": 3955.1, - "valid_targets_min": 972 + "loss_rank_avg": 0.19272474944591522, + "step": 635, + "valid_targets_mean": 5090.6, + "valid_targets_min": 858 }, { - "epoch": 0.5184810126582279, - "grad_norm": 0.8204905739876648, - "learning_rate": 3.9999280366234394e-05, - "loss": 0.4183, + "epoch": 0.6481012658227848, + "grad_norm": 0.3289259973223097, + "learning_rate": 3.989511415699901e-05, + "loss": 0.3492, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06762972474098206, - "step": 128, - "valid_targets_mean": 6666.4, - "valid_targets_min": 1389 + "loss_rank_avg": 0.21416299045085907, + "step": 640, + "valid_targets_mean": 5687.4, + "valid_targets_min": 1558 }, { - "epoch": 0.5225316455696203, - "grad_norm": 1.7319807050242546, - "learning_rate": 3.999872065705065e-05, - "loss": 0.4227, + "epoch": 0.6531645569620254, + "grad_norm": 0.31628182785649794, + "learning_rate": 3.988776282465752e-05, + "loss": 0.3472, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04184423387050629, - "step": 129, - "valid_targets_mean": 3844.4, - "valid_targets_min": 1319 + "loss_rank_avg": 0.16229850053787231, + "step": 645, + "valid_targets_mean": 5641.7, + "valid_targets_min": 1228 }, { - "epoch": 0.5265822784810127, - "grad_norm": 0.9634703858631996, - "learning_rate": 3.999800103862945e-05, - "loss": 0.4335, + "epoch": 0.6582278481012658, + "grad_norm": 0.32370069272186214, + "learning_rate": 3.9880163243996314e-05, + "loss": 0.3439, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04888691380620003, - "step": 130, - "valid_targets_mean": 4168.3, - "valid_targets_min": 1386 + "loss_rank_avg": 0.1790936440229416, + "step": 650, + "valid_targets_mean": 4757.5, + "valid_targets_min": 828 }, { - "epoch": 0.5306329113924051, - "grad_norm": 0.784517576483665, - "learning_rate": 3.9997121516724834e-05, - "loss": 0.4119, + "epoch": 0.6632911392405063, + "grad_norm": 0.331462646919884, + "learning_rate": 3.9872315509876885e-05, + "loss": 0.3465, "loss_nan_ranks": 0, - "loss_rank_avg": 0.044563088566064835, - "step": 131, - "valid_targets_mean": 4179.5, - "valid_targets_min": 1230 + "loss_rank_avg": 0.2034873068332672, + "step": 655, + "valid_targets_mean": 6825.2, + "valid_targets_min": 1430 }, { - "epoch": 0.5346835443037975, - "grad_norm": 0.7533377576610076, - "learning_rate": 3.999608209836944e-05, - "loss": 0.4039, + "epoch": 0.6683544303797468, + "grad_norm": 0.2773031962593845, + "learning_rate": 3.986421972025831e-05, + "loss": 0.3362, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04797458276152611, - "step": 132, - "valid_targets_mean": 4516.6, - "valid_targets_min": 1349 + "loss_rank_avg": 0.13872382044792175, + "step": 660, + "valid_targets_mean": 5178.6, + "valid_targets_min": 1303 }, { - "epoch": 0.5387341772151899, - "grad_norm": 0.678560635739266, - "learning_rate": 3.999488279187443e-05, - "loss": 0.4042, + "epoch": 0.6734177215189874, + "grad_norm": 0.3282605048208077, + "learning_rate": 3.9855875976196e-05, + "loss": 0.3336, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05512206628918648, - "step": 133, - "valid_targets_mean": 4947.1, - "valid_targets_min": 964 + "loss_rank_avg": 0.1442852020263672, + "step": 665, + "valid_targets_mean": 3767.0, + "valid_targets_min": 1309 }, { - "epoch": 0.5427848101265823, - "grad_norm": 0.8073678337167025, - "learning_rate": 3.9993523606829425e-05, - "loss": 0.4247, + "epoch": 0.6784810126582278, + "grad_norm": 0.5817158484637879, + "learning_rate": 3.984728438184047e-05, + "loss": 0.348, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026869136840105057, - "step": 134, - "valid_targets_mean": 2812.9, - "valid_targets_min": 1261 + "loss_rank_avg": 0.1867026388645172, + "step": 670, + "valid_targets_mean": 4197.3, + "valid_targets_min": 1119 }, { - "epoch": 0.5468354430379747, - "grad_norm": 0.7196854610319208, - "learning_rate": 3.9992004554102435e-05, - "loss": 0.4098, + "epoch": 0.6835443037974683, + "grad_norm": 0.3539656970185502, + "learning_rate": 3.9838445044435997e-05, + "loss": 0.3543, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04661894962191582, - "step": 135, - "valid_targets_mean": 3934.5, - "valid_targets_min": 1198 + "loss_rank_avg": 0.16187211871147156, + "step": 675, + "valid_targets_mean": 4067.3, + "valid_targets_min": 1024 }, { - "epoch": 0.5508860759493671, - "grad_norm": 0.7897514744361768, - "learning_rate": 3.999032564583976e-05, - "loss": 0.4093, + "epoch": 0.6886075949367089, + "grad_norm": 0.33229865798563013, + "learning_rate": 3.9829358074319295e-05, + "loss": 0.343, "loss_nan_ranks": 0, - "loss_rank_avg": 0.052173689007759094, - "step": 136, - "valid_targets_mean": 4249.8, - "valid_targets_min": 1415 + "loss_rank_avg": 0.17157503962516785, + "step": 680, + "valid_targets_mean": 4686.7, + "valid_targets_min": 1496 }, { - "epoch": 0.5549367088607595, - "grad_norm": 0.71909049194181, - "learning_rate": 3.998848689546591e-05, - "loss": 0.4006, + "epoch": 0.6936708860759494, + "grad_norm": 0.2791093667927135, + "learning_rate": 3.982002358491817e-05, + "loss": 0.3385, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06791016459465027, - "step": 137, - "valid_targets_mean": 5992.1, - "valid_targets_min": 899 + "loss_rank_avg": 0.1936018466949463, + "step": 685, + "valid_targets_mean": 6395.1, + "valid_targets_min": 1038 }, { - "epoch": 0.5589873417721519, - "grad_norm": 0.5433203728561666, - "learning_rate": 3.9986488317683496e-05, - "loss": 0.405, + "epoch": 0.6987341772151898, + "grad_norm": 0.3350209099910488, + "learning_rate": 3.981044169275006e-05, + "loss": 0.3363, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04295790195465088, - "step": 138, - "valid_targets_mean": 3433.3, - "valid_targets_min": 1075 + "loss_rank_avg": 0.1778436303138733, + "step": 690, + "valid_targets_mean": 4567.5, + "valid_targets_min": 1080 }, { - "epoch": 0.5630379746835443, - "grad_norm": 0.5480032491903007, - "learning_rate": 3.9984329928473067e-05, - "loss": 0.3995, + "epoch": 0.7037974683544304, + "grad_norm": 0.3531099254353881, + "learning_rate": 3.9800612517420626e-05, + "loss": 0.3469, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0497143492102623, - "step": 139, - "valid_targets_mean": 4588.2, - "valid_targets_min": 1152 + "loss_rank_avg": 0.16938315331935883, + "step": 695, + "valid_targets_mean": 3896.1, + "valid_targets_min": 1641 }, { - "epoch": 0.5670886075949367, - "grad_norm": 0.6103982990072461, - "learning_rate": 3.998201174509306e-05, - "loss": 0.4037, + "epoch": 0.7088607594936709, + "grad_norm": 0.318049795345604, + "learning_rate": 3.979053618162219e-05, + "loss": 0.3564, "loss_nan_ranks": 0, - "loss_rank_avg": 0.049686968326568604, - "step": 140, - "valid_targets_mean": 4468.4, - "valid_targets_min": 1476 + "loss_rank_avg": 0.1946740746498108, + "step": 700, + "valid_targets_mean": 5408.9, + "valid_targets_min": 1410 }, { - "epoch": 0.5711392405063291, - "grad_norm": 0.5629988818331454, - "learning_rate": 3.997953378607961e-05, - "loss": 0.397, + "epoch": 0.7139240506329114, + "grad_norm": 0.3320763821369659, + "learning_rate": 3.978021281113228e-05, + "loss": 0.3393, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04076606035232544, - "step": 141, - "valid_targets_mean": 3553.6, - "valid_targets_min": 1049 + "loss_rank_avg": 0.17016340792179108, + "step": 705, + "valid_targets_mean": 4967.0, + "valid_targets_min": 882 }, { - "epoch": 0.5751898734177215, - "grad_norm": 0.5359259818177005, - "learning_rate": 3.9976896071246403e-05, - "loss": 0.3994, + "epoch": 0.7189873417721518, + "grad_norm": 0.2847828838514781, + "learning_rate": 3.9769642534812e-05, + "loss": 0.3414, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04303029179573059, - "step": 142, - "valid_targets_mean": 3950.3, - "valid_targets_min": 1532 + "loss_rank_avg": 0.1707335114479065, + "step": 710, + "valid_targets_mean": 6450.3, + "valid_targets_min": 1014 }, { - "epoch": 0.579240506329114, - "grad_norm": 0.6529004994577806, - "learning_rate": 3.997409862168454e-05, - "loss": 0.4124, + "epoch": 0.7240506329113924, + "grad_norm": 0.3321384385821899, + "learning_rate": 3.9758825484604494e-05, + "loss": 0.3447, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06024843454360962, - "step": 143, - "valid_targets_mean": 5404.2, - "valid_targets_min": 1553 + "loss_rank_avg": 0.1961357295513153, + "step": 715, + "valid_targets_mean": 4652.9, + "valid_targets_min": 1387 }, { - "epoch": 0.5832911392405064, - "grad_norm": 0.45493544439121325, - "learning_rate": 3.997114145976237e-05, - "loss": 0.4152, + "epoch": 0.7291139240506329, + "grad_norm": 0.3032184720855355, + "learning_rate": 3.9747761795533194e-05, + "loss": 0.3451, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05036636069417, - "step": 144, - "valid_targets_mean": 4548.0, - "valid_targets_min": 1389 + "loss_rank_avg": 0.1426798403263092, + "step": 720, + "valid_targets_mean": 6012.8, + "valid_targets_min": 1966 }, { - "epoch": 0.5873417721518988, - "grad_norm": 0.549691947022771, - "learning_rate": 3.9968024609125245e-05, - "loss": 0.3962, + "epoch": 0.7341772151898734, + "grad_norm": 0.3448777606898466, + "learning_rate": 3.973645160570023e-05, + "loss": 0.3546, "loss_nan_ranks": 0, - "loss_rank_avg": 0.043455820530653, - "step": 145, - "valid_targets_mean": 4322.4, - "valid_targets_min": 1112 + "loss_rank_avg": 0.1586744487285614, + "step": 725, + "valid_targets_mean": 4090.4, + "valid_targets_min": 1678 }, { - "epoch": 0.5913924050632912, - "grad_norm": 0.5403024847204195, - "learning_rate": 3.996474809469545e-05, - "loss": 0.4095, + "epoch": 0.739240506329114, + "grad_norm": 0.3156506831386379, + "learning_rate": 3.9724895056284665e-05, + "loss": 0.3376, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03951553627848625, - "step": 146, - "valid_targets_mean": 3759.8, - "valid_targets_min": 1422 + "loss_rank_avg": 0.1619202196598053, + "step": 730, + "valid_targets_mean": 4218.2, + "valid_targets_min": 1064 }, { - "epoch": 0.5954430379746836, - "grad_norm": 0.51919871990684, - "learning_rate": 3.996131194267189e-05, - "loss": 0.392, + "epoch": 0.7443037974683544, + "grad_norm": 0.35504669537877936, + "learning_rate": 3.971309229154072e-05, + "loss": 0.3336, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03196530044078827, - "step": 147, - "valid_targets_mean": 2806.6, - "valid_targets_min": 1104 + "loss_rank_avg": 0.21181583404541016, + "step": 735, + "valid_targets_mean": 4926.1, + "valid_targets_min": 1207 }, { - "epoch": 0.599493670886076, - "grad_norm": 0.5576879996511324, - "learning_rate": 3.995771618052995e-05, - "loss": 0.387, + "epoch": 0.7493670886075949, + "grad_norm": 0.3201734939222349, + "learning_rate": 3.9701043458796015e-05, + "loss": 0.3356, "loss_nan_ranks": 0, - "loss_rank_avg": 0.043483369052410126, - "step": 148, - "valid_targets_mean": 3857.6, - "valid_targets_min": 1281 + "loss_rank_avg": 0.22177475690841675, + "step": 740, + "valid_targets_mean": 6330.8, + "valid_targets_min": 1448 }, { - "epoch": 0.6035443037974684, - "grad_norm": 0.6205996474132962, - "learning_rate": 3.9953960837021256e-05, - "loss": 0.4022, + "epoch": 0.7544303797468355, + "grad_norm": 0.3323013925433624, + "learning_rate": 3.9688748708449686e-05, + "loss": 0.3383, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04536616429686546, - "step": 149, - "valid_targets_mean": 3949.1, - "valid_targets_min": 1340 + "loss_rank_avg": 0.15360356867313385, + "step": 745, + "valid_targets_mean": 4397.9, + "valid_targets_min": 1528 }, { - "epoch": 0.6075949367088608, - "grad_norm": 0.5287455565975784, - "learning_rate": 3.995004594217343e-05, - "loss": 0.4003, + "epoch": 0.759493670886076, + "grad_norm": 0.2700426394697059, + "learning_rate": 3.967620819397053e-05, + "loss": 0.3308, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0511779822409153, - "step": 150, - "valid_targets_mean": 4899.2, - "valid_targets_min": 1345 + "loss_rank_avg": 0.15975482761859894, + "step": 750, + "valid_targets_mean": 5138.0, + "valid_targets_min": 1494 }, { - "epoch": 0.6116455696202532, - "grad_norm": 0.5286353613893214, - "learning_rate": 3.994597152728987e-05, - "loss": 0.3984, + "epoch": 0.7645569620253164, + "grad_norm": 0.32491869195537754, + "learning_rate": 3.9663422071895103e-05, + "loss": 0.3534, "loss_nan_ranks": 0, - "loss_rank_avg": 0.048138149082660675, - "step": 151, - "valid_targets_mean": 4724.3, - "valid_targets_min": 1146 + "loss_rank_avg": 0.1329491138458252, + "step": 755, + "valid_targets_mean": 3496.6, + "valid_targets_min": 1333 }, { - "epoch": 0.6156962025316456, - "grad_norm": 0.45837696459121535, - "learning_rate": 3.994173762494947e-05, - "loss": 0.4264, + "epoch": 0.769620253164557, + "grad_norm": 0.30201583766650153, + "learning_rate": 3.965039050182573e-05, + "loss": 0.3444, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05765845626592636, - "step": 152, - "valid_targets_mean": 4681.2, - "valid_targets_min": 1914 + "loss_rank_avg": 0.17797686159610748, + "step": 760, + "valid_targets_mean": 5996.1, + "valid_targets_min": 1405 }, { - "epoch": 0.619746835443038, - "grad_norm": 0.49883723421511145, - "learning_rate": 3.9937344269006424e-05, - "loss": 0.4049, + "epoch": 0.7746835443037975, + "grad_norm": 0.28815946203036474, + "learning_rate": 3.963711364642854e-05, + "loss": 0.3397, "loss_nan_ranks": 0, - "loss_rank_avg": 0.031516727060079575, - "step": 153, - "valid_targets_mean": 3224.6, - "valid_targets_min": 1286 + "loss_rank_avg": 0.19754289090633392, + "step": 765, + "valid_targets_mean": 6895.8, + "valid_targets_min": 1767 }, { - "epoch": 0.6237974683544304, - "grad_norm": 0.39660923865273007, - "learning_rate": 3.993279149458988e-05, - "loss": 0.3913, + "epoch": 0.779746835443038, + "grad_norm": 0.3154202174762482, + "learning_rate": 3.9623591671431416e-05, + "loss": 0.355, "loss_nan_ranks": 0, - "loss_rank_avg": 0.036629438400268555, - "step": 154, - "valid_targets_mean": 3398.7, - "valid_targets_min": 1294 + "loss_rank_avg": 0.18379993736743927, + "step": 770, + "valid_targets_mean": 5009.2, + "valid_targets_min": 1303 }, { - "epoch": 0.6278481012658228, - "grad_norm": 0.5360988652258376, - "learning_rate": 3.9928079338103695e-05, - "loss": 0.4152, + "epoch": 0.7848101265822784, + "grad_norm": 0.3575700176888927, + "learning_rate": 3.960982474562196e-05, + "loss": 0.3389, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06448749452829361, - "step": 155, - "valid_targets_mean": 5875.6, - "valid_targets_min": 1524 - }, - { - "epoch": 0.6318987341772152, - "grad_norm": 0.5226602694069594, - "learning_rate": 3.992320783722616e-05, - "loss": 0.4017, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05161647871136665, - "step": 156, - "valid_targets_mean": 4747.9, - "valid_targets_min": 1278 - }, - { - "epoch": 0.6359493670886076, - "grad_norm": 0.40436815356541295, - "learning_rate": 3.991817703090965e-05, - "loss": 0.3868, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04100307077169418, - "step": 157, - "valid_targets_mean": 3910.7, - "valid_targets_min": 1557 - }, - { - "epoch": 0.64, - "grad_norm": 0.3926704120594189, - "learning_rate": 3.991298695938038e-05, - "loss": 0.4043, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.042444128543138504, - "step": 158, - "valid_targets_mean": 3522.6, - "valid_targets_min": 1096 - }, - { - "epoch": 0.6440506329113924, - "grad_norm": 0.5076623774445436, - "learning_rate": 3.9907637664138e-05, - "loss": 0.3993, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04470229893922806, - "step": 159, - "valid_targets_mean": 3696.1, - "valid_targets_min": 1064 - }, - { - "epoch": 0.6481012658227848, - "grad_norm": 0.5337854961710667, - "learning_rate": 3.990212918795534e-05, - "loss": 0.4051, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05759483948349953, - "step": 160, - "valid_targets_mean": 5352.3, - "valid_targets_min": 1560 - }, - { - "epoch": 0.6521518987341772, - "grad_norm": 0.49232979360802614, - "learning_rate": 3.9896461574878036e-05, - "loss": 0.4012, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04649016261100769, - "step": 161, - "valid_targets_mean": 4706.9, - "valid_targets_min": 1022 - }, - { - "epoch": 0.6562025316455696, - "grad_norm": 0.4492209651055195, - "learning_rate": 3.989063487022416e-05, - "loss": 0.4099, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.045562755316495895, - "step": 162, - "valid_targets_mean": 4150.2, - "valid_targets_min": 1119 - }, - { - "epoch": 0.660253164556962, - "grad_norm": 0.5807853378786956, - "learning_rate": 3.9884649120583903e-05, - "loss": 0.4069, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04843149334192276, - "step": 163, - "valid_targets_mean": 3811.6, - "valid_targets_min": 807 - }, - { - "epoch": 0.6643037974683544, - "grad_norm": 0.4421586230983281, - "learning_rate": 3.987850437381915e-05, - "loss": 0.392, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03646192327141762, - "step": 164, - "valid_targets_mean": 3694.9, - "valid_targets_min": 884 - }, - { - "epoch": 0.6683544303797468, - "grad_norm": 0.4801528491517805, - "learning_rate": 3.987220067906313e-05, - "loss": 0.3917, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.051381587982177734, - "step": 165, - "valid_targets_mean": 5180.6, - "valid_targets_min": 1305 - }, - { - "epoch": 0.6724050632911392, - "grad_norm": 0.5294172160595377, - "learning_rate": 3.986573808672002e-05, - "loss": 0.3953, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03182698041200638, - "step": 166, - "valid_targets_mean": 3104.5, - "valid_targets_min": 1123 - }, - { - "epoch": 0.6764556962025317, - "grad_norm": 0.5134011377182386, - "learning_rate": 3.9859116648464534e-05, - "loss": 0.3955, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06264036148786545, - "step": 167, - "valid_targets_mean": 5248.3, - "valid_targets_min": 1209 - }, - { - "epoch": 0.6805063291139241, - "grad_norm": 0.4858291424933584, - "learning_rate": 3.985233641724152e-05, - "loss": 0.3959, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04595412313938141, - "step": 168, - "valid_targets_mean": 3716.1, - "valid_targets_min": 1065 - }, - { - "epoch": 0.6845569620253165, - "grad_norm": 0.46922241556163324, - "learning_rate": 3.984539744726554e-05, - "loss": 0.412, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04231109470129013, - "step": 169, - "valid_targets_mean": 4030.9, - "valid_targets_min": 1320 - }, - { - "epoch": 0.6886075949367089, - "grad_norm": 0.4787874437935905, - "learning_rate": 3.983829979402038e-05, - "loss": 0.3949, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.053731370717287064, - "step": 170, - "valid_targets_mean": 4688.7, - "valid_targets_min": 1498 - }, - { - "epoch": 0.6926582278481013, - "grad_norm": 0.46699510261304283, - "learning_rate": 3.983104351425871e-05, - "loss": 0.4031, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03717079013586044, - "step": 171, - "valid_targets_mean": 2916.2, - "valid_targets_min": 1284 - }, - { - "epoch": 0.6967088607594937, - "grad_norm": 0.5286391464842389, - "learning_rate": 3.982362866600154e-05, - "loss": 0.3805, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05524525046348572, - "step": 172, - "valid_targets_mean": 4783.1, - "valid_targets_min": 1617 - }, - { - "epoch": 0.7007594936708861, - "grad_norm": 0.44158987757459944, - "learning_rate": 3.9816055308537806e-05, - "loss": 0.3981, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.059007737785577774, - "step": 173, - "valid_targets_mean": 4662.8, - "valid_targets_min": 902 - }, - { - "epoch": 0.7048101265822785, - "grad_norm": 0.465458920311661, - "learning_rate": 3.980832350242388e-05, - "loss": 0.4016, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.053797923028469086, - "step": 174, - "valid_targets_mean": 5012.7, - "valid_targets_min": 1519 - }, - { - "epoch": 0.7088607594936709, - "grad_norm": 0.4868379216742597, - "learning_rate": 3.980043330948306e-05, - "loss": 0.4135, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06050734966993332, - "step": 175, - "valid_targets_mean": 5410.9, - "valid_targets_min": 1412 - }, - { - "epoch": 0.7129113924050633, - "grad_norm": 0.4412776613977876, - "learning_rate": 3.9792384792805124e-05, - "loss": 0.3905, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.049749113619327545, - "step": 176, - "valid_targets_mean": 4090.3, - "valid_targets_min": 1200 - }, - { - "epoch": 0.7169620253164557, - "grad_norm": 0.42469125011897935, - "learning_rate": 3.978417801674579e-05, - "loss": 0.4029, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04124998301267624, - "step": 177, - "valid_targets_mean": 3514.7, - "valid_targets_min": 1021 - }, - { - "epoch": 0.7210126582278481, - "grad_norm": 0.4989418958189537, - "learning_rate": 3.9775813046926244e-05, - "loss": 0.4007, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.045302219688892365, - "step": 178, - "valid_targets_mean": 5219.8, - "valid_targets_min": 1081 - }, - { - "epoch": 0.7250632911392405, - "grad_norm": 0.38298541539289754, - "learning_rate": 3.976728995023254e-05, - "loss": 0.3936, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03838229179382324, - "step": 179, - "valid_targets_mean": 3586.2, - "valid_targets_min": 1157 - }, - { - "epoch": 0.7291139240506329, - "grad_norm": 0.4784940401420178, - "learning_rate": 3.975860879481514e-05, - "loss": 0.4053, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04661254584789276, - "step": 180, - "valid_targets_mean": 6014.8, - "valid_targets_min": 1968 - }, - { - "epoch": 0.7331645569620253, - "grad_norm": 0.4762470844905524, - "learning_rate": 3.974976965008833e-05, - "loss": 0.3996, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06391244381666183, - "step": 181, - "valid_targets_mean": 6059.4, - "valid_targets_min": 1415 - }, - { - "epoch": 0.7372151898734177, - "grad_norm": 0.4155266684565921, - "learning_rate": 3.9740772586729674e-05, - "loss": 0.3994, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04258221387863159, - "step": 182, - "valid_targets_mean": 3626.9, - "valid_targets_min": 1583 - }, - { - "epoch": 0.7412658227848101, - "grad_norm": 0.4197532740492893, - "learning_rate": 3.973161767667946e-05, - "loss": 0.3947, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0651676282286644, - "step": 183, - "valid_targets_mean": 5145.3, - "valid_targets_min": 1610 - }, - { - "epoch": 0.7453164556962025, - "grad_norm": 0.44567348029714937, - "learning_rate": 3.972230499314009e-05, - "loss": 0.3831, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03477749228477478, - "step": 184, - "valid_targets_mean": 3506.1, - "valid_targets_min": 1272 - }, - { - "epoch": 0.7493670886075949, - "grad_norm": 0.4660019920816794, - "learning_rate": 3.9712834610575545e-05, - "loss": 0.3914, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06879770755767822, - "step": 185, - "valid_targets_mean": 6329.2, - "valid_targets_min": 1450 - }, - { - "epoch": 0.7534177215189873, - "grad_norm": 0.35466972491540216, - "learning_rate": 3.9703206604710746e-05, - "loss": 0.3884, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04594149440526962, - "step": 186, - "valid_targets_mean": 4308.9, - "valid_targets_min": 1318 - }, - { - "epoch": 0.7574683544303797, - "grad_norm": 0.4043292780387038, - "learning_rate": 3.969342105253098e-05, - "loss": 0.378, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04979319870471954, - "step": 187, - "valid_targets_mean": 4318.1, - "valid_targets_min": 1072 - }, - { - "epoch": 0.7615189873417721, - "grad_norm": 0.43762304614629416, - "learning_rate": 3.9683478032281246e-05, - "loss": 0.4087, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05901230499148369, - "step": 188, - "valid_targets_mean": 5515.2, - "valid_targets_min": 1401 - }, - { - "epoch": 0.7655696202531646, - "grad_norm": 0.35257897927594767, - "learning_rate": 3.967337762346568e-05, - "loss": 0.3971, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.036184344440698624, - "step": 189, - "valid_targets_mean": 3648.5, - "valid_targets_min": 1316 - }, - { - "epoch": 0.769620253164557, - "grad_norm": 0.4586516555619478, - "learning_rate": 3.9663119906846885e-05, - "loss": 0.3994, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0621817409992218, - "step": 190, - "valid_targets_mean": 5998.1, - "valid_targets_min": 1407 - }, - { - "epoch": 0.7736708860759494, - "grad_norm": 0.4133487608785662, - "learning_rate": 3.965270496444528e-05, - "loss": 0.3918, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04161564260721207, - "step": 191, - "valid_targets_mean": 4036.8, - "valid_targets_min": 1020 - }, - { - "epoch": 0.7777215189873418, - "grad_norm": 0.4450796537280172, - "learning_rate": 3.9642132879538465e-05, - "loss": 0.3957, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03854193910956383, - "step": 192, - "valid_targets_mean": 3875.6, - "valid_targets_min": 1243 - }, - { - "epoch": 0.7817721518987342, - "grad_norm": 0.3901632045904612, - "learning_rate": 3.9631403736660566e-05, - "loss": 0.4021, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.037209074944257736, - "step": 193, - "valid_targets_mean": 3762.1, - "valid_targets_min": 1137 - }, - { - "epoch": 0.7858227848101266, - "grad_norm": 0.3849767613588487, - "learning_rate": 3.962051762160151e-05, - "loss": 0.3937, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.044705405831336975, - "step": 194, - "valid_targets_mean": 4061.6, - "valid_targets_min": 1186 - }, - { - "epoch": 0.789873417721519, - "grad_norm": 0.38977830003176955, - "learning_rate": 3.960947462140638e-05, - "loss": 0.39, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.043201059103012085, - "step": 195, - "valid_targets_mean": 4334.1, - "valid_targets_min": 865 - }, - { - "epoch": 0.7939240506329114, - "grad_norm": 0.4206313908143476, - "learning_rate": 3.959827482437472e-05, - "loss": 0.4019, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05456563085317612, - "step": 196, - "valid_targets_mean": 4199.2, - "valid_targets_min": 1019 - }, - { - "epoch": 0.7979746835443038, - "grad_norm": 0.4099403626766528, - "learning_rate": 3.958691832005981e-05, - "loss": 0.3886, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.032608289271593094, - "step": 197, - "valid_targets_mean": 3207.4, - "valid_targets_min": 943 - }, - { - "epoch": 0.8020253164556962, - "grad_norm": 0.41275822414873825, - "learning_rate": 3.9575405199267944e-05, - "loss": 0.3859, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04315009340643883, - "step": 198, - "valid_targets_mean": 4100.4, - "valid_targets_min": 1125 - }, - { - "epoch": 0.8060759493670886, - "grad_norm": 0.42788263964263656, - "learning_rate": 3.956373555405774e-05, - "loss": 0.3734, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.040834665298461914, - "step": 199, - "valid_targets_mean": 3672.3, - "valid_targets_min": 1106 - }, - { - "epoch": 0.810126582278481, - "grad_norm": 0.3843454886819529, - "learning_rate": 3.955190947773937e-05, - "loss": 0.3747, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03427031636238098, - "step": 200, - "valid_targets_mean": 3138.8, - "valid_targets_min": 971 - }, - { - "epoch": 0.8141772151898734, - "grad_norm": 0.45090978500288503, - "learning_rate": 3.9539927064873824e-05, - "loss": 0.3986, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0527331605553627, - "step": 201, - "valid_targets_mean": 4807.4, - "valid_targets_min": 1351 - }, - { - "epoch": 0.8182278481012658, - "grad_norm": 0.3969147927096043, - "learning_rate": 3.952778841127214e-05, - "loss": 0.3778, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.030247759073972702, - "step": 202, - "valid_targets_mean": 3209.6, - "valid_targets_min": 1731 - }, - { - "epoch": 0.8222784810126582, - "grad_norm": 0.39455231931822077, - "learning_rate": 3.95154936139947e-05, - "loss": 0.3692, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.034564059227705, - "step": 203, - "valid_targets_mean": 3379.9, - "valid_targets_min": 1167 - }, - { - "epoch": 0.8263291139240506, - "grad_norm": 0.4082760818606499, - "learning_rate": 3.950304277135034e-05, - "loss": 0.3811, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.031288061290979385, - "step": 204, - "valid_targets_mean": 3503.6, - "valid_targets_min": 869 - }, - { - "epoch": 0.830379746835443, - "grad_norm": 0.401292855029016, - "learning_rate": 3.94904359828957e-05, - "loss": 0.3875, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03167634457349777, - "step": 205, - "valid_targets_mean": 2991.9, - "valid_targets_min": 1527 - }, - { - "epoch": 0.8344303797468354, - "grad_norm": 0.4712089367640346, - "learning_rate": 3.94776733494343e-05, - "loss": 0.3928, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.030103646218776703, - "step": 206, - "valid_targets_mean": 2564.2, - "valid_targets_min": 1003 - }, - { - "epoch": 0.8384810126582278, - "grad_norm": 0.40947408231322907, - "learning_rate": 3.9464754973015826e-05, - "loss": 0.3837, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04522818326950073, - "step": 207, - "valid_targets_mean": 4203.4, - "valid_targets_min": 1026 - }, - { - "epoch": 0.8425316455696202, - "grad_norm": 0.4151402680031294, - "learning_rate": 3.9451680956935275e-05, - "loss": 0.3866, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03589247912168503, - "step": 208, - "valid_targets_mean": 3352.5, - "valid_targets_min": 857 - }, - { - "epoch": 0.8465822784810126, - "grad_norm": 0.4503235990787147, - "learning_rate": 3.943845140573213e-05, - "loss": 0.3957, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.029857059940695763, - "step": 209, - "valid_targets_mean": 2977.6, - "valid_targets_min": 1329 - }, - { - "epoch": 0.850632911392405, - "grad_norm": 0.36151361416077155, - "learning_rate": 3.942506642518952e-05, - "loss": 0.3801, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.040279075503349304, - "step": 210, - "valid_targets_mean": 3750.4, - "valid_targets_min": 1110 - }, - { - "epoch": 0.8546835443037974, - "grad_norm": 0.46511902089306856, - "learning_rate": 3.9411526122333396e-05, - "loss": 0.3801, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05088287591934204, - "step": 211, - "valid_targets_mean": 5016.0, - "valid_targets_min": 1202 - }, - { - "epoch": 0.8587341772151899, - "grad_norm": 0.44263610411173665, - "learning_rate": 3.939783060543167e-05, - "loss": 0.403, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03430716320872307, - "step": 212, - "valid_targets_mean": 3398.6, - "valid_targets_min": 875 - }, - { - "epoch": 0.8627848101265823, - "grad_norm": 0.4269425843758099, - "learning_rate": 3.938397998399333e-05, - "loss": 0.3875, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04929053410887718, - "step": 213, - "valid_targets_mean": 4928.9, - "valid_targets_min": 992 - }, - { - "epoch": 0.8668354430379747, - "grad_norm": 0.4507167614566644, - "learning_rate": 3.936997436876756e-05, - "loss": 0.3931, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05837477371096611, - "step": 214, - "valid_targets_mean": 5017.2, - "valid_targets_min": 1566 - }, - { - "epoch": 0.8708860759493671, - "grad_norm": 0.4178256937125121, - "learning_rate": 3.935581387174289e-05, - "loss": 0.4008, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.054189737886190414, - "step": 215, - "valid_targets_mean": 4754.5, - "valid_targets_min": 1273 - }, - { - "epoch": 0.8749367088607595, - "grad_norm": 0.4424970209938244, - "learning_rate": 3.9341498606146295e-05, - "loss": 0.4035, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.07565446197986603, - "step": 216, - "valid_targets_mean": 5814.6, - "valid_targets_min": 1326 - }, - { - "epoch": 0.8789873417721519, - "grad_norm": 0.4163177338806734, - "learning_rate": 3.932702868644224e-05, - "loss": 0.3875, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04473324120044708, - "step": 217, - "valid_targets_mean": 3958.6, - "valid_targets_min": 1551 - }, - { - "epoch": 0.8830379746835443, - "grad_norm": 0.46312477737105173, - "learning_rate": 3.931240422833181e-05, - "loss": 0.3834, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04033679515123367, - "step": 218, - "valid_targets_mean": 3933.8, - "valid_targets_min": 1506 - }, - { - "epoch": 0.8870886075949367, - "grad_norm": 0.4231326125132253, - "learning_rate": 3.9297625348751806e-05, - "loss": 0.3806, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04238021373748779, - "step": 219, - "valid_targets_mean": 4357.4, - "valid_targets_min": 1142 - }, - { - "epoch": 0.8911392405063291, - "grad_norm": 0.3722935718035281, - "learning_rate": 3.9282692165873744e-05, - "loss": 0.3786, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06605974584817886, - "step": 220, - "valid_targets_mean": 5542.3, - "valid_targets_min": 1528 - }, - { - "epoch": 0.8951898734177215, - "grad_norm": 0.4198846670074615, - "learning_rate": 3.9267604799102945e-05, - "loss": 0.3972, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04203680902719498, - "step": 221, - "valid_targets_mean": 3785.0, - "valid_targets_min": 1059 - }, - { - "epoch": 0.8992405063291139, - "grad_norm": 0.3935157764748133, - "learning_rate": 3.9252363369077615e-05, - "loss": 0.3751, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03383598476648331, - "step": 222, - "valid_targets_mean": 3525.6, - "valid_targets_min": 971 - }, - { - "epoch": 0.9032911392405063, - "grad_norm": 0.5234426202913071, - "learning_rate": 3.9236967997667796e-05, - "loss": 0.3866, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06510619819164276, - "step": 223, - "valid_targets_mean": 5984.1, - "valid_targets_min": 822 - }, - { - "epoch": 0.9073417721518987, - "grad_norm": 0.4503790413013757, - "learning_rate": 3.9221418807974496e-05, - "loss": 0.3746, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.045053329318761826, - "step": 224, - "valid_targets_mean": 4381.6, - "valid_targets_min": 892 - }, - { - "epoch": 0.9113924050632911, - "grad_norm": 0.4530961552486848, - "learning_rate": 3.92057159243286e-05, - "loss": 0.3766, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05290493741631508, - "step": 225, - "valid_targets_mean": 5423.4, - "valid_targets_min": 1471 - }, - { - "epoch": 0.9154430379746835, - "grad_norm": 0.4629682871370494, - "learning_rate": 3.9189859472289956e-05, - "loss": 0.3785, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04433508589863777, - "step": 226, - "valid_targets_mean": 4834.6, - "valid_targets_min": 1760 - }, - { - "epoch": 0.9194936708860759, - "grad_norm": 0.4791394492679248, - "learning_rate": 3.9173849578646324e-05, - "loss": 0.4097, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05587176978588104, - "step": 227, - "valid_targets_mean": 5096.2, - "valid_targets_min": 1041 - }, - { - "epoch": 0.9235443037974683, - "grad_norm": 0.5245116702704936, - "learning_rate": 3.9157686371412395e-05, - "loss": 0.3854, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0511966198682785, - "step": 228, - "valid_targets_mean": 4770.9, - "valid_targets_min": 1217 - }, - { - "epoch": 0.9275949367088607, - "grad_norm": 0.4262501396573589, - "learning_rate": 3.914136997982874e-05, - "loss": 0.3961, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04806748032569885, - "step": 229, - "valid_targets_mean": 3857.7, - "valid_targets_min": 1186 - }, - { - "epoch": 0.9316455696202531, - "grad_norm": 0.4264988999563365, - "learning_rate": 3.912490053436079e-05, - "loss": 0.3945, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04742566496133804, - "step": 230, - "valid_targets_mean": 4552.2, - "valid_targets_min": 1180 - }, - { - "epoch": 0.9356962025316455, - "grad_norm": 0.5040072717810649, - "learning_rate": 3.9108278166697805e-05, - "loss": 0.3938, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04683136194944382, - "step": 231, - "valid_targets_mean": 4043.7, - "valid_targets_min": 1310 - }, - { - "epoch": 0.9397468354430379, - "grad_norm": 0.35864337523854756, - "learning_rate": 3.909150300975178e-05, - "loss": 0.3785, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.044740431010723114, - "step": 232, - "valid_targets_mean": 4040.9, - "valid_targets_min": 1382 - }, - { - "epoch": 0.9437974683544303, - "grad_norm": 0.4341125223055753, - "learning_rate": 3.907457519765642e-05, - "loss": 0.3944, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04963330924510956, - "step": 233, - "valid_targets_mean": 4824.9, - "valid_targets_min": 932 - }, - { - "epoch": 0.9478481012658228, - "grad_norm": 0.4214466788514605, - "learning_rate": 3.905749486576607e-05, - "loss": 0.39, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05568487197160721, - "step": 234, - "valid_targets_mean": 4694.1, - "valid_targets_min": 1123 - }, - { - "epoch": 0.9518987341772152, - "grad_norm": 0.4490916948287539, - "learning_rate": 3.90402621506546e-05, - "loss": 0.382, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.045049626380205154, - "step": 235, - "valid_targets_mean": 4411.8, - "valid_targets_min": 698 - }, - { - "epoch": 0.9559493670886076, - "grad_norm": 0.4469433625250188, - "learning_rate": 3.902287719011435e-05, - "loss": 0.3805, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.046605631709098816, - "step": 236, - "valid_targets_mean": 4418.7, - "valid_targets_min": 1229 - }, - { - "epoch": 0.96, - "grad_norm": 0.4116996636586392, - "learning_rate": 3.900534012315499e-05, - "loss": 0.3984, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05655079334974289, - "step": 237, - "valid_targets_mean": 5258.9, - "valid_targets_min": 1299 - }, - { - "epoch": 0.9640506329113924, - "grad_norm": 0.43553038453231674, - "learning_rate": 3.8987651090002454e-05, - "loss": 0.3955, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.041694432497024536, - "step": 238, - "valid_targets_mean": 4233.6, - "valid_targets_min": 1580 - }, - { - "epoch": 0.9681012658227848, - "grad_norm": 0.3897273392949861, - "learning_rate": 3.896981023209777e-05, - "loss": 0.3958, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06257250905036926, - "step": 239, - "valid_targets_mean": 6301.8, - "valid_targets_min": 1614 - }, - { - "epoch": 0.9721518987341772, - "grad_norm": 0.40644566913890406, - "learning_rate": 3.8951817692095966e-05, - "loss": 0.3799, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05280187726020813, - "step": 240, - "valid_targets_mean": 4621.9, - "valid_targets_min": 1158 - }, - { - "epoch": 0.9762025316455696, - "grad_norm": 0.4031570803146659, - "learning_rate": 3.893367361386491e-05, - "loss": 0.3937, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06152264028787613, - "step": 241, - "valid_targets_mean": 5334.7, - "valid_targets_min": 1224 - }, - { - "epoch": 0.980253164556962, - "grad_norm": 0.40558630009805174, - "learning_rate": 3.891537814248417e-05, - "loss": 0.3883, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04186493530869484, - "step": 242, - "valid_targets_mean": 3455.3, - "valid_targets_min": 1365 - }, - { - "epoch": 0.9843037974683544, - "grad_norm": 0.3926160221949178, - "learning_rate": 3.889693142424384e-05, - "loss": 0.3994, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04740064591169357, - "step": 243, - "valid_targets_mean": 4265.6, - "valid_targets_min": 1488 - }, - { - "epoch": 0.9883544303797468, - "grad_norm": 0.40598571265510414, - "learning_rate": 3.8878333606643405e-05, - "loss": 0.3852, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03236037492752075, - "step": 244, - "valid_targets_mean": 3068.9, - "valid_targets_min": 1181 - }, - { - "epoch": 0.9924050632911392, - "grad_norm": 0.36199236830013165, - "learning_rate": 3.885958483839049e-05, - "loss": 0.3896, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06099804490804672, - "step": 245, - "valid_targets_mean": 4501.0, - "valid_targets_min": 1186 - }, - { - "epoch": 0.9964556962025316, - "grad_norm": 0.4415810554714419, - "learning_rate": 3.8840685269399786e-05, - "loss": 0.3885, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05218656361103058, - "step": 246, - "valid_targets_mean": 4923.1, - "valid_targets_min": 1119 - }, - { - "epoch": 1.0, - "grad_norm": 0.4935612926224235, - "learning_rate": 3.882163505079171e-05, - "loss": 0.3867, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.040381744503974915, - "step": 247, - "valid_targets_mean": 3306.5, - "valid_targets_min": 811 - }, - { - "epoch": 1.0040506329113925, - "grad_norm": 0.43881757415111144, - "learning_rate": 3.880243433489132e-05, - "loss": 0.3333, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.043302834033966064, - "step": 248, - "valid_targets_mean": 5179.9, - "valid_targets_min": 1324 - }, - { - "epoch": 1.0081012658227848, - "grad_norm": 0.4768130337286285, - "learning_rate": 3.878308327522702e-05, - "loss": 0.3403, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03876558691263199, - "step": 249, - "valid_targets_mean": 3617.6, - "valid_targets_min": 1289 - }, - { - "epoch": 1.0121518987341773, - "grad_norm": 0.46379437071742236, - "learning_rate": 3.876358202652936e-05, - "loss": 0.3445, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.042863473296165466, - "step": 250, - "valid_targets_mean": 4636.4, - "valid_targets_min": 1324 - }, - { - "epoch": 1.0162025316455696, - "grad_norm": 0.479982525586596, - "learning_rate": 3.8743930744729835e-05, - "loss": 0.3278, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04512117803096771, - "step": 251, - "valid_targets_mean": 5158.1, - "valid_targets_min": 1022 - }, - { - "epoch": 1.0202531645569621, - "grad_norm": 0.4073467211154948, - "learning_rate": 3.8724129586959535e-05, - "loss": 0.3314, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.050884395837783813, - "step": 252, - "valid_targets_mean": 5163.6, - "valid_targets_min": 1070 - }, - { - "epoch": 1.0243037974683544, - "grad_norm": 0.49364056497777786, - "learning_rate": 3.870417871154801e-05, - "loss": 0.3368, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05375582352280617, - "step": 253, - "valid_targets_mean": 5155.7, - "valid_targets_min": 1448 - }, - { - "epoch": 1.028354430379747, - "grad_norm": 0.42136875982681915, - "learning_rate": 3.868407827802192e-05, - "loss": 0.3279, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.043836839497089386, - "step": 254, - "valid_targets_mean": 4467.7, - "valid_targets_min": 1257 - }, - { - "epoch": 1.0324050632911392, - "grad_norm": 0.49067221875631695, - "learning_rate": 3.8663828447103804e-05, - "loss": 0.3467, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0388471893966198, - "step": 255, - "valid_targets_mean": 4225.1, - "valid_targets_min": 860 - }, - { - "epoch": 1.0364556962025318, - "grad_norm": 0.400665256592623, - "learning_rate": 3.8643429380710765e-05, - "loss": 0.3411, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.036631450057029724, - "step": 256, - "valid_targets_mean": 4231.9, - "valid_targets_min": 1610 - }, - { - "epoch": 1.040506329113924, - "grad_norm": 0.49979735329761005, - "learning_rate": 3.862288124195319e-05, - "loss": 0.3425, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04496927186846733, - "step": 257, - "valid_targets_mean": 5401.9, - "valid_targets_min": 1217 - }, - { - "epoch": 1.0445569620253166, - "grad_norm": 0.40250127249183065, - "learning_rate": 3.8602184195133464e-05, - "loss": 0.3291, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03760378807783127, - "step": 258, - "valid_targets_mean": 3832.8, - "valid_targets_min": 1619 - }, - { - "epoch": 1.0486075949367089, - "grad_norm": 0.4993152725336139, - "learning_rate": 3.8581338405744604e-05, - "loss": 0.3409, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.047988615930080414, - "step": 259, - "valid_targets_mean": 5089.2, - "valid_targets_min": 1207 - }, - { - "epoch": 1.0526582278481014, - "grad_norm": 0.4833916443613953, - "learning_rate": 3.8560344040468996e-05, - "loss": 0.3466, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04081379622220993, - "step": 260, - "valid_targets_mean": 4535.2, - "valid_targets_min": 1403 - }, - { - "epoch": 1.0567088607594937, - "grad_norm": 0.49185705402569097, - "learning_rate": 3.853920126717703e-05, - "loss": 0.3506, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04410317540168762, - "step": 261, - "valid_targets_mean": 4016.5, - "valid_targets_min": 1143 - }, - { - "epoch": 1.0607594936708862, - "grad_norm": 0.4732534934525123, - "learning_rate": 3.851791025492574e-05, - "loss": 0.3408, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.048802439123392105, - "step": 262, - "valid_targets_mean": 5977.5, - "valid_targets_min": 1318 - }, - { - "epoch": 1.0648101265822785, - "grad_norm": 0.4651821986477631, - "learning_rate": 3.8496471173957515e-05, - "loss": 0.3429, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04655998945236206, - "step": 263, - "valid_targets_mean": 5134.2, - "valid_targets_min": 906 - }, - { - "epoch": 1.068860759493671, - "grad_norm": 0.44444401244665843, - "learning_rate": 3.847488419569866e-05, - "loss": 0.3326, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05208176374435425, - "step": 264, - "valid_targets_mean": 5735.6, - "valid_targets_min": 1299 - }, - { - "epoch": 1.0729113924050633, - "grad_norm": 0.43965189163949886, - "learning_rate": 3.8453149492758077e-05, - "loss": 0.3336, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04766218364238739, - "step": 265, - "valid_targets_mean": 5380.4, - "valid_targets_min": 1442 - }, - { - "epoch": 1.0769620253164558, - "grad_norm": 0.44147871823418006, - "learning_rate": 3.8431267238925886e-05, - "loss": 0.3274, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06004750728607178, - "step": 266, - "valid_targets_mean": 5674.8, - "valid_targets_min": 1688 - }, - { - "epoch": 1.081012658227848, - "grad_norm": 0.40035732042631894, - "learning_rate": 3.840923760917199e-05, - "loss": 0.334, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.028999630361795425, - "step": 267, - "valid_targets_mean": 4041.1, - "valid_targets_min": 1463 - }, - { - "epoch": 1.0850632911392406, - "grad_norm": 0.4399057296115138, - "learning_rate": 3.838706077964473e-05, - "loss": 0.3262, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.041777171194553375, - "step": 268, - "valid_targets_mean": 5098.9, - "valid_targets_min": 1078 - }, - { - "epoch": 1.089113924050633, - "grad_norm": 0.4193239485777806, - "learning_rate": 3.836473692766944e-05, - "loss": 0.3288, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04640112817287445, - "step": 269, - "valid_targets_mean": 4746.3, - "valid_targets_min": 1270 - }, - { - "epoch": 1.0931645569620254, - "grad_norm": 0.4178067626741526, - "learning_rate": 3.834226623174705e-05, - "loss": 0.3229, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03233584016561508, - "step": 270, - "valid_targets_mean": 3726.8, - "valid_targets_min": 688 - }, - { - "epoch": 1.0972151898734177, - "grad_norm": 0.4277304389800791, - "learning_rate": 3.831964887155264e-05, - "loss": 0.3194, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03970283269882202, - "step": 271, - "valid_targets_mean": 4546.9, - "valid_targets_min": 1055 - }, - { - "epoch": 1.1012658227848102, - "grad_norm": 0.39751963960825176, - "learning_rate": 3.829688502793403e-05, - "loss": 0.332, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.050614580512046814, - "step": 272, - "valid_targets_mean": 6069.5, - "valid_targets_min": 1198 - }, - { - "epoch": 1.1053164556962025, - "grad_norm": 0.39667785576607584, - "learning_rate": 3.827397488291029e-05, - "loss": 0.3365, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.052200429141521454, - "step": 273, - "valid_targets_mean": 4859.7, - "valid_targets_min": 1591 - }, - { - "epoch": 1.109367088607595, - "grad_norm": 0.4631771987391291, - "learning_rate": 3.8250918619670343e-05, - "loss": 0.3294, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05346325412392616, - "step": 274, - "valid_targets_mean": 5889.3, - "valid_targets_min": 1285 - }, - { - "epoch": 1.1134177215189873, - "grad_norm": 0.4095769895472331, - "learning_rate": 3.822771642257145e-05, - "loss": 0.3167, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.042441610246896744, - "step": 275, - "valid_targets_mean": 4516.6, - "valid_targets_min": 1607 - }, - { - "epoch": 1.1174683544303798, - "grad_norm": 0.42832336914382285, - "learning_rate": 3.820436847713776e-05, - "loss": 0.319, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.033627428114414215, - "step": 276, - "valid_targets_mean": 3252.9, - "valid_targets_min": 1235 - }, - { - "epoch": 1.1215189873417721, - "grad_norm": 0.41935447022143024, - "learning_rate": 3.8180874970058826e-05, - "loss": 0.3395, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.046168241649866104, - "step": 277, - "valid_targets_mean": 4573.6, - "valid_targets_min": 1322 - }, - { - "epoch": 1.1255696202531646, - "grad_norm": 0.42766634759524186, - "learning_rate": 3.8157236089188103e-05, - "loss": 0.3336, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03643731027841568, - "step": 278, - "valid_targets_mean": 4285.5, - "valid_targets_min": 1489 - }, - { - "epoch": 1.129620253164557, - "grad_norm": 0.4137474754288218, - "learning_rate": 3.813345202354145e-05, - "loss": 0.3324, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03984470292925835, - "step": 279, - "valid_targets_mean": 4170.9, - "valid_targets_min": 920 - }, - { - "epoch": 1.1336708860759495, - "grad_norm": 0.5587127024191587, - "learning_rate": 3.810952296329563e-05, - "loss": 0.3199, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03912029415369034, - "step": 280, - "valid_targets_mean": 4313.5, - "valid_targets_min": 1575 - }, - { - "epoch": 1.1377215189873418, - "grad_norm": 0.38188114391239336, - "learning_rate": 3.808544909978676e-05, - "loss": 0.3357, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05175253003835678, - "step": 281, - "valid_targets_mean": 5816.8, - "valid_targets_min": 1139 - }, - { - "epoch": 1.1417721518987343, - "grad_norm": 0.48292556377655704, - "learning_rate": 3.806123062550882e-05, - "loss": 0.3164, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05137088894844055, - "step": 282, - "valid_targets_mean": 5902.8, - "valid_targets_min": 1175 - }, - { - "epoch": 1.1458227848101266, - "grad_norm": 0.4688024194245287, - "learning_rate": 3.803686773411208e-05, - "loss": 0.3259, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.046909987926483154, - "step": 283, - "valid_targets_mean": 5458.0, - "valid_targets_min": 961 - }, - { - "epoch": 1.149873417721519, - "grad_norm": 0.4010452184684908, - "learning_rate": 3.801236062040158e-05, - "loss": 0.3475, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.033218979835510254, - "step": 284, - "valid_targets_mean": 3854.1, - "valid_targets_min": 1119 - }, - { - "epoch": 1.1539240506329114, - "grad_norm": 0.48370238933037013, - "learning_rate": 3.798770948033553e-05, - "loss": 0.3474, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.025871790945529938, - "step": 285, - "valid_targets_mean": 3305.8, - "valid_targets_min": 956 - }, - { - "epoch": 1.1579746835443039, - "grad_norm": 0.4471042978546612, - "learning_rate": 3.796291451102381e-05, - "loss": 0.3272, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04039275646209717, - "step": 286, - "valid_targets_mean": 4557.0, - "valid_targets_min": 1476 - }, - { - "epoch": 1.1620253164556962, - "grad_norm": 0.38851396235800023, - "learning_rate": 3.793797591072631e-05, - "loss": 0.3399, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03040398843586445, - "step": 287, - "valid_targets_mean": 3757.9, - "valid_targets_min": 1198 - }, - { - "epoch": 1.1660759493670887, - "grad_norm": 0.4377443454251537, - "learning_rate": 3.791289387885144e-05, - "loss": 0.3307, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04426418989896774, - "step": 288, - "valid_targets_mean": 4752.1, - "valid_targets_min": 1596 - }, - { - "epoch": 1.170126582278481, - "grad_norm": 0.40605885786934004, - "learning_rate": 3.788766861595443e-05, - "loss": 0.3366, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03723035007715225, - "step": 289, - "valid_targets_mean": 4943.8, - "valid_targets_min": 1541 - }, - { - "epoch": 1.1741772151898735, - "grad_norm": 0.4138525326746093, - "learning_rate": 3.7862300323735835e-05, - "loss": 0.3372, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.039631813764572144, - "step": 290, - "valid_targets_mean": 4613.4, - "valid_targets_min": 1194 - }, - { - "epoch": 1.1782278481012658, - "grad_norm": 0.36085473699571674, - "learning_rate": 3.783678920503983e-05, - "loss": 0.3236, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03367921710014343, - "step": 291, - "valid_targets_mean": 3855.4, - "valid_targets_min": 1662 - }, - { - "epoch": 1.1822784810126583, - "grad_norm": 0.40026413984436504, - "learning_rate": 3.781113546385265e-05, - "loss": 0.3384, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04290143400430679, - "step": 292, - "valid_targets_mean": 4215.2, - "valid_targets_min": 1378 - }, - { - "epoch": 1.1863291139240506, - "grad_norm": 0.38610007418503284, - "learning_rate": 3.7785339305300916e-05, - "loss": 0.3284, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03657708317041397, - "step": 293, - "valid_targets_mean": 4263.8, - "valid_targets_min": 1159 - }, - { - "epoch": 1.1903797468354431, - "grad_norm": 0.40277194203840083, - "learning_rate": 3.7759400935650025e-05, - "loss": 0.3315, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06683148443698883, - "step": 294, - "valid_targets_mean": 6487.7, - "valid_targets_min": 1705 - }, - { - "epoch": 1.1944303797468354, - "grad_norm": 0.380575022478197, - "learning_rate": 3.77333205623025e-05, - "loss": 0.3234, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.035810552537441254, - "step": 295, - "valid_targets_mean": 4024.1, - "valid_targets_min": 951 - }, - { - "epoch": 1.198481012658228, - "grad_norm": 0.4241047155197534, - "learning_rate": 3.7707098393796295e-05, - "loss": 0.3259, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03711569681763649, - "step": 296, - "valid_targets_mean": 3789.1, - "valid_targets_min": 1619 - }, - { - "epoch": 1.2025316455696202, - "grad_norm": 0.42925599564732586, - "learning_rate": 3.7680734639803185e-05, - "loss": 0.3362, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06396347284317017, - "step": 297, - "valid_targets_mean": 6671.0, - "valid_targets_min": 1322 - }, - { - "epoch": 1.2065822784810127, - "grad_norm": 0.42717609021903125, - "learning_rate": 3.765422951112704e-05, - "loss": 0.3147, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05779135972261429, - "step": 298, - "valid_targets_mean": 6074.8, - "valid_targets_min": 1801 - }, - { - "epoch": 1.210632911392405, - "grad_norm": 0.5503385227348256, - "learning_rate": 3.762758321970216e-05, - "loss": 0.3374, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0427449569106102, - "step": 299, - "valid_targets_mean": 5239.9, - "valid_targets_min": 1335 - }, - { - "epoch": 1.2146835443037975, - "grad_norm": 0.390158538869697, - "learning_rate": 3.7600795978591584e-05, - "loss": 0.3337, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0317438542842865, - "step": 300, - "valid_targets_mean": 3869.2, - "valid_targets_min": 1430 - }, - { - "epoch": 1.2187341772151898, - "grad_norm": 0.46847184119408986, - "learning_rate": 3.757386800198538e-05, - "loss": 0.3343, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.045495979487895966, - "step": 301, - "valid_targets_mean": 5395.8, - "valid_targets_min": 1545 - }, - { - "epoch": 1.2227848101265824, - "grad_norm": 0.4428823267611126, - "learning_rate": 3.7546799505198925e-05, - "loss": 0.3519, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.055398546159267426, - "step": 302, - "valid_targets_mean": 6161.7, - "valid_targets_min": 1237 - }, - { - "epoch": 1.2268354430379746, - "grad_norm": 0.4064436411344038, - "learning_rate": 3.7519590704671197e-05, - "loss": 0.3441, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04253986105322838, - "step": 303, - "valid_targets_mean": 4458.0, - "valid_targets_min": 1259 - }, - { - "epoch": 1.2308860759493672, - "grad_norm": 0.39611039274123905, - "learning_rate": 3.749224181796305e-05, - "loss": 0.3447, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.036902233958244324, - "step": 304, - "valid_targets_mean": 4446.3, - "valid_targets_min": 1412 - }, - { - "epoch": 1.2349367088607595, - "grad_norm": 0.539713223604662, - "learning_rate": 3.746475306375545e-05, - "loss": 0.3396, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.043336015194654465, - "step": 305, - "valid_targets_mean": 4629.6, - "valid_targets_min": 1188 - }, - { - "epoch": 1.238987341772152, - "grad_norm": 0.31666258051552043, - "learning_rate": 3.743712466184774e-05, - "loss": 0.3117, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04101824015378952, - "step": 306, - "valid_targets_mean": 4542.2, - "valid_targets_min": 1250 - }, - { - "epoch": 1.2430379746835443, - "grad_norm": 0.4603641684309312, - "learning_rate": 3.7409356833155885e-05, - "loss": 0.3392, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03385325148701668, - "step": 307, - "valid_targets_mean": 3908.0, - "valid_targets_min": 1289 - }, - { - "epoch": 1.2470886075949368, - "grad_norm": 0.38131629891769525, - "learning_rate": 3.73814497997107e-05, - "loss": 0.3533, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04682439565658569, - "step": 308, - "valid_targets_mean": 5508.6, - "valid_targets_min": 1640 - }, - { - "epoch": 1.251139240506329, - "grad_norm": 0.3977678912424595, - "learning_rate": 3.7353403784656095e-05, - "loss": 0.3348, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.047584839165210724, - "step": 309, - "valid_targets_mean": 6236.3, - "valid_targets_min": 1146 - }, - { - "epoch": 1.2551898734177216, - "grad_norm": 0.3775688600802151, - "learning_rate": 3.732521901224724e-05, - "loss": 0.3181, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04272007197141647, - "step": 310, - "valid_targets_mean": 4600.5, - "valid_targets_min": 1083 - }, - { - "epoch": 1.2592405063291139, - "grad_norm": 0.35415078868301875, - "learning_rate": 3.7296895707848844e-05, - "loss": 0.3266, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05276188254356384, - "step": 311, - "valid_targets_mean": 4910.4, - "valid_targets_min": 1292 - }, - { - "epoch": 1.2632911392405064, - "grad_norm": 0.37644340464983783, - "learning_rate": 3.7268434097933275e-05, - "loss": 0.3174, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05105176940560341, - "step": 312, - "valid_targets_mean": 5747.3, - "valid_targets_min": 1282 - }, - { - "epoch": 1.2673417721518987, - "grad_norm": 0.4146221684659416, - "learning_rate": 3.723983441007882e-05, - "loss": 0.3451, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.045349009335041046, - "step": 313, - "valid_targets_mean": 4974.2, - "valid_targets_min": 1128 - }, - { - "epoch": 1.2713924050632912, - "grad_norm": 0.3823656488056918, - "learning_rate": 3.721109687296781e-05, - "loss": 0.32, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05362933874130249, - "step": 314, - "valid_targets_mean": 5927.6, - "valid_targets_min": 1286 - }, - { - "epoch": 1.2754430379746835, - "grad_norm": 0.3980190324687026, - "learning_rate": 3.718222171638484e-05, - "loss": 0.3467, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03732815384864807, - "step": 315, - "valid_targets_mean": 4203.2, - "valid_targets_min": 1017 - }, - { - "epoch": 1.279493670886076, - "grad_norm": 0.35730350158693447, - "learning_rate": 3.715320917121488e-05, - "loss": 0.3508, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0718134194612503, - "step": 316, - "valid_targets_mean": 5351.9, - "valid_targets_min": 932 - }, - { - "epoch": 1.2835443037974683, - "grad_norm": 0.38034364154189443, - "learning_rate": 3.712405946944149e-05, - "loss": 0.3243, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.043744880706071854, - "step": 317, - "valid_targets_mean": 4461.6, - "valid_targets_min": 1355 - }, - { - "epoch": 1.2875949367088608, - "grad_norm": 0.37238286698954254, - "learning_rate": 3.709477284414488e-05, - "loss": 0.3317, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04839417338371277, - "step": 318, - "valid_targets_mean": 5919.4, - "valid_targets_min": 1416 - }, - { - "epoch": 1.2916455696202531, - "grad_norm": 0.3694152773764771, - "learning_rate": 3.7065349529500136e-05, - "loss": 0.3329, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05601927638053894, - "step": 319, - "valid_targets_mean": 6470.4, - "valid_targets_min": 1380 - }, - { - "epoch": 1.2956962025316456, - "grad_norm": 0.40603377803421475, - "learning_rate": 3.7035789760775305e-05, - "loss": 0.3364, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03035106509923935, - "step": 320, - "valid_targets_mean": 3595.9, - "valid_targets_min": 1353 - }, - { - "epoch": 1.299746835443038, - "grad_norm": 0.42842444607862246, - "learning_rate": 3.70060937743295e-05, - "loss": 0.3397, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04682677239179611, - "step": 321, - "valid_targets_mean": 5207.8, - "valid_targets_min": 811 - }, - { - "epoch": 1.3037974683544304, - "grad_norm": 0.3504215938773099, - "learning_rate": 3.6976261807611045e-05, - "loss": 0.3443, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05075697600841522, - "step": 322, - "valid_targets_mean": 5177.2, - "valid_targets_min": 1577 - }, - { - "epoch": 1.3078481012658227, - "grad_norm": 0.4032879194995496, - "learning_rate": 3.694629409915555e-05, - "loss": 0.3293, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.045434221625328064, - "step": 323, - "valid_targets_mean": 4798.3, - "valid_targets_min": 851 - }, - { - "epoch": 1.3118987341772153, - "grad_norm": 0.3539200936827596, - "learning_rate": 3.691619088858401e-05, - "loss": 0.3338, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05697557330131531, - "step": 324, - "valid_targets_mean": 6687.8, - "valid_targets_min": 1313 - }, - { - "epoch": 1.3159493670886075, - "grad_norm": 0.4349578347430558, - "learning_rate": 3.68859524166009e-05, - "loss": 0.3375, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.029414789751172066, - "step": 325, - "valid_targets_mean": 3496.2, - "valid_targets_min": 1144 - }, - { - "epoch": 1.32, - "grad_norm": 0.3891623595744905, - "learning_rate": 3.6855578924992226e-05, - "loss": 0.329, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03618660196661949, - "step": 326, - "valid_targets_mean": 4056.2, - "valid_targets_min": 1196 - }, - { - "epoch": 1.3240506329113924, - "grad_norm": 0.3901311810771893, - "learning_rate": 3.6825070656623626e-05, - "loss": 0.3331, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05077538639307022, - "step": 327, - "valid_targets_mean": 5577.2, - "valid_targets_min": 1710 - }, - { - "epoch": 1.3281012658227849, - "grad_norm": 0.3795594032117602, - "learning_rate": 3.67944278554384e-05, - "loss": 0.3324, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.041123196482658386, - "step": 328, - "valid_targets_mean": 4052.9, - "valid_targets_min": 1029 - }, - { - "epoch": 1.3321518987341772, - "grad_norm": 0.3837261513398238, - "learning_rate": 3.676365076645557e-05, - "loss": 0.3375, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0361938551068306, - "step": 329, - "valid_targets_mean": 3657.7, - "valid_targets_min": 1281 - }, - { - "epoch": 1.3362025316455697, - "grad_norm": 0.3872825195453024, - "learning_rate": 3.673273963576791e-05, - "loss": 0.3322, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05670418217778206, - "step": 330, - "valid_targets_mean": 6128.2, - "valid_targets_min": 1762 - }, - { - "epoch": 1.340253164556962, - "grad_norm": 0.40953104436741, - "learning_rate": 3.670169471054001e-05, - "loss": 0.3351, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.048498839139938354, - "step": 331, - "valid_targets_mean": 4052.1, - "valid_targets_min": 1276 - }, - { - "epoch": 1.3443037974683545, - "grad_norm": 0.3895846012883171, - "learning_rate": 3.6670516239006254e-05, - "loss": 0.3282, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06334970146417618, - "step": 332, - "valid_targets_mean": 5396.1, - "valid_targets_min": 1551 - }, - { - "epoch": 1.3483544303797468, - "grad_norm": 0.4223997770807982, - "learning_rate": 3.663920447046888e-05, - "loss": 0.3318, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03903364762663841, - "step": 333, - "valid_targets_mean": 4347.9, - "valid_targets_min": 1491 - }, - { - "epoch": 1.3524050632911393, - "grad_norm": 0.3390409761293719, - "learning_rate": 3.6607759655295954e-05, - "loss": 0.3199, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04412960261106491, - "step": 334, - "valid_targets_mean": 4938.9, - "valid_targets_min": 733 - }, - { - "epoch": 1.3564556962025316, - "grad_norm": 0.36219279290904527, - "learning_rate": 3.657618204491937e-05, - "loss": 0.3282, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.043282829225063324, - "step": 335, - "valid_targets_mean": 4905.5, - "valid_targets_min": 1403 - }, - { - "epoch": 1.360506329113924, - "grad_norm": 0.3303574390032699, - "learning_rate": 3.6544471891832875e-05, - "loss": 0.3432, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03686103969812393, - "step": 336, - "valid_targets_mean": 4191.9, - "valid_targets_min": 1037 - }, - { - "epoch": 1.3645569620253164, - "grad_norm": 0.37333045985626745, - "learning_rate": 3.6512629449589983e-05, - "loss": 0.3412, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.044646792113780975, - "step": 337, - "valid_targets_mean": 4578.7, - "valid_targets_min": 974 - }, - { - "epoch": 1.368607594936709, - "grad_norm": 0.34535259322026063, - "learning_rate": 3.6480654972802045e-05, - "loss": 0.3283, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03553847223520279, - "step": 338, - "valid_targets_mean": 3828.4, - "valid_targets_min": 1676 - }, - { - "epoch": 1.3726582278481012, - "grad_norm": 0.35059742097235924, - "learning_rate": 3.644854871713611e-05, - "loss": 0.328, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04590984433889389, - "step": 339, - "valid_targets_mean": 4716.9, - "valid_targets_min": 847 - }, - { - "epoch": 1.3767088607594937, - "grad_norm": 0.3850539252862224, - "learning_rate": 3.6416310939312924e-05, - "loss": 0.3319, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05009511113166809, - "step": 340, - "valid_targets_mean": 4437.9, - "valid_targets_min": 1058 - }, - { - "epoch": 1.380759493670886, - "grad_norm": 0.35551096939232074, - "learning_rate": 3.638394189710493e-05, - "loss": 0.33, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.034703344106674194, - "step": 341, - "valid_targets_mean": 3578.6, - "valid_targets_min": 1017 - }, - { - "epoch": 1.3848101265822785, - "grad_norm": 0.46909030035234556, - "learning_rate": 3.635144184933412e-05, - "loss": 0.3488, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.032210104167461395, - "step": 342, - "valid_targets_mean": 3431.7, - "valid_targets_min": 1125 - }, - { - "epoch": 1.3888607594936708, - "grad_norm": 0.36339571305087914, - "learning_rate": 3.6318811055869994e-05, - "loss": 0.3322, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04059069603681564, - "step": 343, - "valid_targets_mean": 5441.1, - "valid_targets_min": 1225 - }, - { - "epoch": 1.3929113924050633, - "grad_norm": 0.40886661117023054, - "learning_rate": 3.628604977762752e-05, - "loss": 0.3403, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04117385670542717, - "step": 344, - "valid_targets_mean": 4661.5, - "valid_targets_min": 1282 - }, - { - "epoch": 1.3969620253164556, - "grad_norm": 0.3479116261748228, - "learning_rate": 3.6253158276565004e-05, - "loss": 0.3484, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04562729597091675, - "step": 345, - "valid_targets_mean": 4236.6, - "valid_targets_min": 1080 - }, - { - "epoch": 1.4010126582278482, - "grad_norm": 0.3708622197529606, - "learning_rate": 3.6220136815682016e-05, - "loss": 0.3311, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.053136058151721954, - "step": 346, - "valid_targets_mean": 4996.6, - "valid_targets_min": 1328 - }, - { - "epoch": 1.4050632911392404, - "grad_norm": 0.3675849834932444, - "learning_rate": 3.618698565901727e-05, - "loss": 0.3378, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06611038744449615, - "step": 347, - "valid_targets_mean": 7163.4, - "valid_targets_min": 1242 - }, - { - "epoch": 1.409113924050633, - "grad_norm": 0.3218894297367269, - "learning_rate": 3.615370507164654e-05, - "loss": 0.3277, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.02884174883365631, - "step": 348, - "valid_targets_mean": 3643.3, - "valid_targets_min": 1126 - }, - { - "epoch": 1.4131645569620253, - "grad_norm": 0.36402217004287496, - "learning_rate": 3.61202953196805e-05, - "loss": 0.3332, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.030001427978277206, - "step": 349, - "valid_targets_mean": 3776.5, - "valid_targets_min": 1279 - }, - { - "epoch": 1.4172151898734178, - "grad_norm": 0.35653994311682313, - "learning_rate": 3.6086756670262654e-05, - "loss": 0.3458, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.06715628504753113, - "step": 350, - "valid_targets_mean": 6776.6, - "valid_targets_min": 1473 - }, - { - "epoch": 1.42126582278481, - "grad_norm": 0.35604711232033076, - "learning_rate": 3.6053089391567135e-05, - "loss": 0.3249, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03905327618122101, - "step": 351, - "valid_targets_mean": 4154.4, - "valid_targets_min": 1459 - }, - { - "epoch": 1.4253164556962026, - "grad_norm": 0.35567231926138243, - "learning_rate": 3.601929375279662e-05, - "loss": 0.3385, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04926755651831627, - "step": 352, - "valid_targets_mean": 4944.0, - "valid_targets_min": 909 - }, - { - "epoch": 1.4293670886075949, - "grad_norm": 0.32837476644100877, - "learning_rate": 3.598537002418012e-05, - "loss": 0.3266, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04684937745332718, - "step": 353, - "valid_targets_mean": 4363.1, - "valid_targets_min": 1286 - }, - { - "epoch": 1.4334177215189874, - "grad_norm": 0.2981576463639634, - "learning_rate": 3.5951318476970885e-05, - "loss": 0.3256, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04351592808961868, - "step": 354, - "valid_targets_mean": 5082.2, - "valid_targets_min": 1073 - }, - { - "epoch": 1.4374683544303797, - "grad_norm": 0.3298612706995704, - "learning_rate": 3.591713938344416e-05, - "loss": 0.3224, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04313363507390022, - "step": 355, - "valid_targets_mean": 4193.2, - "valid_targets_min": 1051 - }, - { - "epoch": 1.4415189873417722, - "grad_norm": 0.3254732866947206, - "learning_rate": 3.588283301689507e-05, - "loss": 0.3278, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.045036278665065765, - "step": 356, - "valid_targets_mean": 5073.4, - "valid_targets_min": 1495 - }, - { - "epoch": 1.4455696202531645, - "grad_norm": 0.3316068107096375, - "learning_rate": 3.5848399651636424e-05, - "loss": 0.3263, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03079894930124283, - "step": 357, - "valid_targets_mean": 3537.6, - "valid_targets_min": 1461 - }, - { - "epoch": 1.449620253164557, - "grad_norm": 0.3604454920738615, - "learning_rate": 3.5813839562996473e-05, - "loss": 0.3317, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.054758235812187195, - "step": 358, - "valid_targets_mean": 5031.2, - "valid_targets_min": 1342 - }, - { - "epoch": 1.4536708860759493, - "grad_norm": 0.33420365879889447, - "learning_rate": 3.5779153027316775e-05, - "loss": 0.3294, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.037421490997076035, - "step": 359, - "valid_targets_mean": 4054.1, - "valid_targets_min": 1186 - }, - { - "epoch": 1.4577215189873418, - "grad_norm": 0.4027058616416431, - "learning_rate": 3.574434032194994e-05, - "loss": 0.3316, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.027023347094655037, - "step": 360, - "valid_targets_mean": 2857.1, - "valid_targets_min": 1384 - }, - { - "epoch": 1.461772151898734, - "grad_norm": 0.46987798450445356, - "learning_rate": 3.5709401725257436e-05, - "loss": 0.3449, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03215904161334038, - "step": 361, - "valid_targets_mean": 3671.6, - "valid_targets_min": 1490 - }, - { - "epoch": 1.4658227848101266, - "grad_norm": 0.3272406858775468, - "learning_rate": 3.5674337516607346e-05, - "loss": 0.3448, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.028709491714835167, - "step": 362, - "valid_targets_mean": 3303.6, - "valid_targets_min": 1155 - }, - { - "epoch": 1.469873417721519, - "grad_norm": 0.4371735016087626, - "learning_rate": 3.563914797637216e-05, - "loss": 0.3279, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03896871954202652, - "step": 363, - "valid_targets_mean": 3658.6, - "valid_targets_min": 1059 - }, - { - "epoch": 1.4739240506329114, - "grad_norm": 0.37907120545170914, - "learning_rate": 3.560383338592649e-05, - "loss": 0.3408, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.041140444576740265, - "step": 364, - "valid_targets_mean": 4692.1, - "valid_targets_min": 1321 - }, - { - "epoch": 1.4779746835443037, - "grad_norm": 0.4335216000616064, - "learning_rate": 3.556839402764487e-05, - "loss": 0.3365, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.034600693732500076, - "step": 365, - "valid_targets_mean": 3900.6, - "valid_targets_min": 751 - }, - { - "epoch": 1.4820253164556962, - "grad_norm": 0.3948803916787027, - "learning_rate": 3.553283018489946e-05, - "loss": 0.3293, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.033790454268455505, - "step": 366, - "valid_targets_mean": 3697.8, - "valid_targets_min": 1236 - }, - { - "epoch": 1.4860759493670885, - "grad_norm": 0.34938116772620914, - "learning_rate": 3.54971421420578e-05, - "loss": 0.3092, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.030311282724142075, - "step": 367, - "valid_targets_mean": 3477.8, - "valid_targets_min": 1283 - }, - { - "epoch": 1.490126582278481, - "grad_norm": 0.35498446996451205, - "learning_rate": 3.5461330184480524e-05, - "loss": 0.3252, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.030405160039663315, - "step": 368, - "valid_targets_mean": 3928.5, - "valid_targets_min": 1395 - }, - { - "epoch": 1.4941772151898733, - "grad_norm": 0.32826187877829, - "learning_rate": 3.54253945985191e-05, - "loss": 0.3283, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.047535236924886703, - "step": 369, - "valid_targets_mean": 5368.7, - "valid_targets_min": 1139 - }, - { - "epoch": 1.4982278481012659, - "grad_norm": 0.3797114148319085, - "learning_rate": 3.5389335671513524e-05, - "loss": 0.3283, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04450792446732521, - "step": 370, - "valid_targets_mean": 4625.2, - "valid_targets_min": 1062 - }, - { - "epoch": 1.5022784810126582, - "grad_norm": 0.303916602209006, - "learning_rate": 3.5353153691789986e-05, - "loss": 0.3293, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.041045308113098145, - "step": 371, - "valid_targets_mean": 4232.3, - "valid_targets_min": 952 - }, - { - "epoch": 1.5063291139240507, - "grad_norm": 0.3496379847439436, - "learning_rate": 3.531684894865866e-05, - "loss": 0.3472, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0392393060028553, - "step": 372, - "valid_targets_mean": 4212.8, - "valid_targets_min": 1134 - }, - { - "epoch": 1.5103797468354432, - "grad_norm": 0.30989553544793547, - "learning_rate": 3.528042173241128e-05, - "loss": 0.3321, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.041263651102781296, - "step": 373, - "valid_targets_mean": 4612.2, - "valid_targets_min": 1511 - }, - { - "epoch": 1.5144303797468355, - "grad_norm": 0.3379482122421573, - "learning_rate": 3.524387233431892e-05, - "loss": 0.3163, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04224957525730133, - "step": 374, - "valid_targets_mean": 4965.3, - "valid_targets_min": 1138 - }, - { - "epoch": 1.5184810126582278, - "grad_norm": 0.3074788070468125, - "learning_rate": 3.520720104662958e-05, - "loss": 0.3398, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.030848810449242592, - "step": 375, - "valid_targets_mean": 4269.1, - "valid_targets_min": 1068 - }, - { - "epoch": 1.5225316455696203, - "grad_norm": 0.36045776072321795, - "learning_rate": 3.5170408162565904e-05, - "loss": 0.3423, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03196258470416069, - "step": 376, - "valid_targets_mean": 3321.8, - "valid_targets_min": 875 - }, - { - "epoch": 1.5265822784810128, - "grad_norm": 0.32727093276382224, - "learning_rate": 3.513349397632281e-05, - "loss": 0.3351, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04221861809492111, - "step": 377, - "valid_targets_mean": 4121.6, - "valid_targets_min": 1013 - }, - { - "epoch": 1.530632911392405, - "grad_norm": 0.37888143338164704, - "learning_rate": 3.5096458783065145e-05, - "loss": 0.3304, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.053419679403305054, - "step": 378, - "valid_targets_mean": 5939.3, - "valid_targets_min": 1360 - }, - { - "epoch": 1.5346835443037974, - "grad_norm": 0.34124282562131614, - "learning_rate": 3.505930287892533e-05, - "loss": 0.3308, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.031853239983320236, - "step": 379, - "valid_targets_mean": 3459.0, - "valid_targets_min": 1516 - }, - { - "epoch": 1.53873417721519, - "grad_norm": 0.3267953485708573, - "learning_rate": 3.502202656100099e-05, - "loss": 0.3269, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.035576820373535156, - "step": 380, - "valid_targets_mean": 3330.2, - "valid_targets_min": 1476 - }, - { - "epoch": 1.5427848101265824, - "grad_norm": 0.31760612631955587, - "learning_rate": 3.498463012735256e-05, - "loss": 0.3142, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03268013894557953, - "step": 381, - "valid_targets_mean": 3766.8, - "valid_targets_min": 1185 - }, - { - "epoch": 1.5468354430379747, - "grad_norm": 0.3173788992702674, - "learning_rate": 3.494711387700094e-05, - "loss": 0.3322, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05025570094585419, - "step": 382, - "valid_targets_mean": 5228.7, - "valid_targets_min": 1490 - }, - { - "epoch": 1.550886075949367, - "grad_norm": 0.3039081459849771, - "learning_rate": 3.490947810992505e-05, - "loss": 0.3334, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.042354974895715714, - "step": 383, - "valid_targets_mean": 5028.8, - "valid_targets_min": 1195 - }, - { - "epoch": 1.5549367088607595, - "grad_norm": 0.3297043324355368, - "learning_rate": 3.48717231270595e-05, - "loss": 0.3348, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.032343845814466476, - "step": 384, - "valid_targets_mean": 3612.1, - "valid_targets_min": 1624 - }, - { - "epoch": 1.558987341772152, - "grad_norm": 0.3118757725648861, - "learning_rate": 3.483384923029211e-05, - "loss": 0.3315, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.024401690810918808, - "step": 385, - "valid_targets_mean": 2570.1, - "valid_targets_min": 1189 - }, - { - "epoch": 1.5630379746835443, - "grad_norm": 0.3342509367926826, - "learning_rate": 3.479585672246156e-05, - "loss": 0.3392, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.01928572542965412, - "step": 386, - "valid_targets_mean": 2404.0, - "valid_targets_min": 951 - }, - { - "epoch": 1.5670886075949366, - "grad_norm": 0.33034978184042413, - "learning_rate": 3.4757745907354924e-05, - "loss": 0.3401, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04664783552289009, - "step": 387, - "valid_targets_mean": 4955.7, - "valid_targets_min": 1696 - }, - { - "epoch": 1.5711392405063291, - "grad_norm": 0.3128272729667946, - "learning_rate": 3.471951708970527e-05, - "loss": 0.314, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.032305166125297546, - "step": 388, - "valid_targets_mean": 4388.4, - "valid_targets_min": 1445 - }, - { - "epoch": 1.5751898734177217, - "grad_norm": 0.3278781730514049, - "learning_rate": 3.468117057518921e-05, - "loss": 0.3394, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.023931726813316345, - "step": 389, - "valid_targets_mean": 3438.9, - "valid_targets_min": 801 - }, - { - "epoch": 1.579240506329114, - "grad_norm": 0.3035670296435243, - "learning_rate": 3.4642706670424426e-05, - "loss": 0.309, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.028833506628870964, - "step": 390, - "valid_targets_mean": 3001.4, - "valid_targets_min": 1320 - }, - { - "epoch": 1.5832911392405062, - "grad_norm": 0.30047887062670176, - "learning_rate": 3.460412568296731e-05, - "loss": 0.3329, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03830994665622711, - "step": 391, - "valid_targets_mean": 4628.4, - "valid_targets_min": 828 + "loss_rank_avg": 0.17523907124996185, + "step": 775, + "valid_targets_mean": 4694.8, + "valid_targets_min": 1661 }, { - "epoch": 1.5873417721518988, - "grad_norm": 0.31495946252083773, - "learning_rate": 3.456542792131039e-05, - "loss": 0.3336, + "epoch": 0.789873417721519, + "grad_norm": 0.3310517991000096, + "learning_rate": 3.959581304084536e-05, + "loss": 0.345, "loss_nan_ranks": 0, - "loss_rank_avg": 0.040600620210170746, - "step": 392, - "valid_targets_mean": 4804.1, - "valid_targets_min": 1048 + "loss_rank_avg": 0.1610376238822937, + "step": 780, + "valid_targets_mean": 4332.1, + "valid_targets_min": 863 }, { - "epoch": 1.5913924050632913, - "grad_norm": 0.30159967585448116, - "learning_rate": 3.4526613694879936e-05, - "loss": 0.3296, + "epoch": 0.7949367088607595, + "grad_norm": 0.31363025202931233, + "learning_rate": 3.958155673200223e-05, + "loss": 0.3547, "loss_nan_ranks": 0, - "loss_rank_avg": 0.048246171325445175, - "step": 393, - "valid_targets_mean": 5419.3, - "valid_targets_min": 2118 + "loss_rank_avg": 0.2106252908706665, + "step": 785, + "valid_targets_mean": 5876.1, + "valid_targets_min": 1285 }, { - "epoch": 1.5954430379746836, - "grad_norm": 0.30585703916726353, - "learning_rate": 3.448768331403348e-05, - "loss": 0.335, + "epoch": 0.8, + "grad_norm": 0.2999707270623021, + "learning_rate": 3.956705599704645e-05, + "loss": 0.3307, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0464690625667572, - "step": 394, - "valid_targets_mean": 4379.9, - "valid_targets_min": 1679 + "loss_rank_avg": 0.19507791101932526, + "step": 790, + "valid_targets_mean": 5528.2, + "valid_targets_min": 1012 }, { - "epoch": 1.5994936708860759, - "grad_norm": 0.3096463737816418, - "learning_rate": 3.44486370900573e-05, - "loss": 0.3306, + "epoch": 0.8050632911392405, + "grad_norm": 0.34798398865892316, + "learning_rate": 3.9552311016982964e-05, + "loss": 0.3301, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028461167588829994, - "step": 395, - "valid_targets_mean": 3153.1, - "valid_targets_min": 968 + "loss_rank_avg": 0.1592869609594345, + "step": 795, + "valid_targets_mean": 3994.4, + "valid_targets_min": 1020 }, { - "epoch": 1.6035443037974684, - "grad_norm": 0.2923490186836879, - "learning_rate": 3.440947533516396e-05, - "loss": 0.3284, + "epoch": 0.810126582278481, + "grad_norm": 0.3486093165843197, + "learning_rate": 3.953732197586549e-05, + "loss": 0.3299, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0476490780711174, - "step": 396, - "valid_targets_mean": 4962.8, - "valid_targets_min": 1331 + "loss_rank_avg": 0.14387521147727966, + "step": 800, + "valid_targets_mean": 3136.8, + "valid_targets_min": 969 }, { - "epoch": 1.6075949367088609, - "grad_norm": 0.3032365316110196, - "learning_rate": 3.4370198362489816e-05, + "epoch": 0.8151898734177215, + "grad_norm": 0.34294729405636676, + "learning_rate": 3.952208906079419e-05, "loss": 0.339, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03698112815618515, - "step": 397, - "valid_targets_mean": 3731.7, - "valid_targets_min": 1119 - }, - { - "epoch": 1.6116455696202532, - "grad_norm": 0.3014456358442913, - "learning_rate": 3.43308064860925e-05, - "loss": 0.3237, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03145356476306915, - "step": 398, - "valid_targets_mean": 3412.6, - "valid_targets_min": 889 + "loss_rank_avg": 0.14744120836257935, + "step": 805, + "valid_targets_mean": 3541.7, + "valid_targets_min": 1023 }, { - "epoch": 1.6156962025316455, - "grad_norm": 0.29376436754109375, - "learning_rate": 3.4291300020948393e-05, - "loss": 0.3269, + "epoch": 0.8202531645569621, + "grad_norm": 0.29811481034198406, + "learning_rate": 3.950661246191344e-05, + "loss": 0.3327, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05502167344093323, - "step": 399, - "valid_targets_mean": 5827.9, + "loss_rank_avg": 0.20451560616493225, + "step": 810, + "valid_targets_mean": 5959.1, "valid_targets_min": 1170 }, { - "epoch": 1.619746835443038, - "grad_norm": 0.32590073619863685, - "learning_rate": 3.425167928295015e-05, - "loss": 0.3342, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04351133480668068, - "step": 400, - "valid_targets_mean": 4124.1, - "valid_targets_min": 967 - }, - { - "epoch": 1.6237974683544305, - "grad_norm": 0.314874821138269, - "learning_rate": 3.421194458890411e-05, - "loss": 0.3357, + "epoch": 0.8253164556962025, + "grad_norm": 0.29859308801266926, + "learning_rate": 3.949089237240933e-05, + "loss": 0.3261, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03858516365289688, - "step": 401, - "valid_targets_mean": 4577.7, - "valid_targets_min": 923 + "loss_rank_avg": 0.17102043330669403, + "step": 815, + "valid_targets_mean": 5425.9, + "valid_targets_min": 1281 }, { - "epoch": 1.6278481012658228, - "grad_norm": 0.3270329953263554, - "learning_rate": 3.4172096256527865e-05, - "loss": 0.3241, + "epoch": 0.830379746835443, + "grad_norm": 0.33822208957438105, + "learning_rate": 3.947492898850736e-05, + "loss": 0.3379, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028715766966342926, - "step": 402, - "valid_targets_mean": 3371.6, - "valid_targets_min": 1516 + "loss_rank_avg": 0.12047713249921799, + "step": 820, + "valid_targets_mean": 2989.9, + "valid_targets_min": 1525 }, { - "epoch": 1.631898734177215, - "grad_norm": 0.37463633917697076, - "learning_rate": 3.4132134604447595e-05, - "loss": 0.3397, + "epoch": 0.8354430379746836, + "grad_norm": 0.30589016543655284, + "learning_rate": 3.94587225094699e-05, + "loss": 0.3466, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027709491550922394, - "step": 403, - "valid_targets_mean": 3559.0, - "valid_targets_min": 1318 + "loss_rank_avg": 0.16248869895935059, + "step": 825, + "valid_targets_mean": 4684.8, + "valid_targets_min": 1392 }, { - "epoch": 1.6359493670886076, - "grad_norm": 0.3520789571435681, - "learning_rate": 3.409205995219561e-05, - "loss": 0.3208, + "epoch": 0.8405063291139241, + "grad_norm": 0.2936931469479608, + "learning_rate": 3.94422731375938e-05, + "loss": 0.3185, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03921286016702652, - "step": 404, - "valid_targets_mean": 3807.2, - "valid_targets_min": 1176 + "loss_rank_avg": 0.12211855500936508, + "step": 830, + "valid_targets_mean": 3796.8, + "valid_targets_min": 1392 }, { - "epoch": 1.6400000000000001, - "grad_norm": 0.3436752269222773, - "learning_rate": 3.4051872620207765e-05, - "loss": 0.3433, + "epoch": 0.8455696202531645, + "grad_norm": 0.37559686297102585, + "learning_rate": 3.9425581078207764e-05, + "loss": 0.3532, "loss_nan_ranks": 0, - "loss_rank_avg": 0.040631406009197235, - "step": 405, - "valid_targets_mean": 4202.3, - "valid_targets_min": 1173 + "loss_rank_avg": 0.12265650928020477, + "step": 835, + "valid_targets_mean": 2870.8, + "valid_targets_min": 1300 }, { - "epoch": 1.6440506329113924, - "grad_norm": 0.34364647000492415, - "learning_rate": 3.40115729298209e-05, - "loss": 0.3225, + "epoch": 0.850632911392405, + "grad_norm": 0.35124796916797185, + "learning_rate": 3.940864653966985e-05, + "loss": 0.3336, "loss_nan_ranks": 0, - "loss_rank_avg": 0.058425869792699814, - "step": 406, - "valid_targets_mean": 5856.3, - "valid_targets_min": 1262 + "loss_rank_avg": 0.1492408812046051, + "step": 840, + "valid_targets_mean": 3748.4, + "valid_targets_min": 1108 }, { - "epoch": 1.6481012658227847, - "grad_norm": 0.3331724959905717, - "learning_rate": 3.397116120327025e-05, - "loss": 0.326, + "epoch": 0.8556962025316456, + "grad_norm": 0.3196010195709303, + "learning_rate": 3.939146973336487e-05, + "loss": 0.3411, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04639708250761032, - "step": 407, - "valid_targets_mean": 4204.1, - "valid_targets_min": 1464 + "loss_rank_avg": 0.21070614457130432, + "step": 845, + "valid_targets_mean": 5815.2, + "valid_targets_min": 1741 }, { - "epoch": 1.6521518987341772, - "grad_norm": 0.3137296205528992, - "learning_rate": 3.3930637763686916e-05, - "loss": 0.3421, + "epoch": 0.8607594936708861, + "grad_norm": 0.278654689286952, + "learning_rate": 3.937405087370171e-05, + "loss": 0.3373, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04129298776388168, - "step": 408, - "valid_targets_mean": 4056.5, - "valid_targets_min": 1449 + "loss_rank_avg": 0.16281408071517944, + "step": 850, + "valid_targets_mean": 6301.6, + "valid_targets_min": 1867 }, { - "epoch": 1.6562025316455697, - "grad_norm": 0.3289054917261365, - "learning_rate": 3.389000293509524e-05, - "loss": 0.3485, + "epoch": 0.8658227848101265, + "grad_norm": 0.32174716361043904, + "learning_rate": 3.9356390178110694e-05, + "loss": 0.346, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0556643083691597, - "step": 409, - "valid_targets_mean": 5869.9, - "valid_targets_min": 1631 + "loss_rank_avg": 0.11544165015220642, + "step": 855, + "valid_targets_mean": 3471.2, + "valid_targets_min": 1247 }, { - "epoch": 1.660253164556962, - "grad_norm": 0.33517747820106003, - "learning_rate": 3.3849257042410225e-05, - "loss": 0.3417, + "epoch": 0.8708860759493671, + "grad_norm": 0.31597886690897176, + "learning_rate": 3.9338487867040855e-05, + "loss": 0.3514, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06025295704603195, - "step": 410, - "valid_targets_mean": 5569.8, - "valid_targets_min": 1372 + "loss_rank_avg": 0.1751353144645691, + "step": 860, + "valid_targets_mean": 4752.5, + "valid_targets_min": 1271 }, { - "epoch": 1.6643037974683543, - "grad_norm": 0.3662995924142495, - "learning_rate": 3.380840041143494e-05, - "loss": 0.3475, + "epoch": 0.8759493670886076, + "grad_norm": 0.3376382881344058, + "learning_rate": 3.9320344163957177e-05, + "loss": 0.3505, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05463819205760956, - "step": 411, - "valid_targets_mean": 5496.8, - "valid_targets_min": 940 + "loss_rank_avg": 0.1628148853778839, + "step": 865, + "valid_targets_mean": 4121.5, + "valid_targets_min": 1412 }, { - "epoch": 1.6683544303797468, - "grad_norm": 0.34477861177946845, - "learning_rate": 3.376743336885792e-05, - "loss": 0.3232, + "epoch": 0.8810126582278481, + "grad_norm": 0.3174487269237297, + "learning_rate": 3.9301959295337816e-05, + "loss": 0.3381, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05034920573234558, - "step": 412, - "valid_targets_mean": 5222.1, - "valid_targets_min": 1362 + "loss_rank_avg": 0.19904953241348267, + "step": 870, + "valid_targets_mean": 6095.4, + "valid_targets_min": 1694 }, { - "epoch": 1.6724050632911394, - "grad_norm": 0.36968040457564083, - "learning_rate": 3.372635624225055e-05, - "loss": 0.3387, + "epoch": 0.8860759493670886, + "grad_norm": 0.29589750599326486, + "learning_rate": 3.928333349067125e-05, + "loss": 0.3319, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04692748934030533, - "step": 413, - "valid_targets_mean": 4201.8, - "valid_targets_min": 1148 + "loss_rank_avg": 0.22740904986858368, + "step": 875, + "valid_targets_mean": 6433.5, + "valid_targets_min": 979 }, { - "epoch": 1.6764556962025317, - "grad_norm": 0.35143575068405164, - "learning_rate": 3.3685169360064445e-05, - "loss": 0.3264, + "epoch": 0.8911392405063291, + "grad_norm": 0.28879537949688816, + "learning_rate": 3.926446698245347e-05, + "loss": 0.3262, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04613827168941498, - "step": 414, - "valid_targets_mean": 5270.6, - "valid_targets_min": 1208 + "loss_rank_avg": 0.1922253668308258, + "step": 880, + "valid_targets_mean": 5540.3, + "valid_targets_min": 1526 }, { - "epoch": 1.680506329113924, - "grad_norm": 0.3590426224257718, - "learning_rate": 3.3643873051628804e-05, - "loss": 0.3456, + "epoch": 0.8962025316455696, + "grad_norm": 0.3183527697451739, + "learning_rate": 3.924536000618501e-05, + "loss": 0.3435, "loss_nan_ranks": 0, - "loss_rank_avg": 0.06139792501926422, - "step": 415, - "valid_targets_mean": 6543.7, - "valid_targets_min": 1696 + "loss_rank_avg": 0.1510843187570572, + "step": 885, + "valid_targets_mean": 4520.7, + "valid_targets_min": 1446 }, { - "epoch": 1.6845569620253165, - "grad_norm": 0.36142747750980425, - "learning_rate": 3.3602467647147814e-05, - "loss": 0.3385, + "epoch": 0.9012658227848102, + "grad_norm": 0.3190560534322978, + "learning_rate": 3.922601280036805e-05, + "loss": 0.328, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04478755593299866, - "step": 416, - "valid_targets_mean": 4169.5, - "valid_targets_min": 1095 + "loss_rank_avg": 0.19017574191093445, + "step": 890, + "valid_targets_mean": 4967.6, + "valid_targets_min": 1199 }, { - "epoch": 1.688607594936709, - "grad_norm": 0.32833771591232075, - "learning_rate": 3.356095347769799e-05, - "loss": 0.3295, + "epoch": 0.9063291139240506, + "grad_norm": 0.4456031002758009, + "learning_rate": 3.920642560650343e-05, + "loss": 0.3351, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03753453865647316, - "step": 417, - "valid_targets_mean": 4762.8, - "valid_targets_min": 1921 + "loss_rank_avg": 0.22871103882789612, + "step": 895, + "valid_targets_mean": 5971.6, + "valid_targets_min": 1331 }, { - "epoch": 1.6926582278481013, - "grad_norm": 0.32356028745922505, - "learning_rate": 3.351933087522552e-05, - "loss": 0.3348, + "epoch": 0.9113924050632911, + "grad_norm": 0.30090137054069316, + "learning_rate": 3.918659866908762e-05, + "loss": 0.3296, "loss_nan_ranks": 0, - "loss_rank_avg": 0.043473415076732635, - "step": 418, - "valid_targets_mean": 4554.4, - "valid_targets_min": 1133 + "loss_rank_avg": 0.15791599452495575, + "step": 900, + "valid_targets_mean": 5456.4, + "valid_targets_min": 1469 }, { - "epoch": 1.6967088607594936, - "grad_norm": 0.33419384875899516, - "learning_rate": 3.3477600172543626e-05, - "loss": 0.3399, + "epoch": 0.9164556962025316, + "grad_norm": 0.33133512730966574, + "learning_rate": 3.9166532235609695e-05, + "loss": 0.3397, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04143213480710983, - "step": 419, - "valid_targets_mean": 4895.8, - "valid_targets_min": 1700 + "loss_rank_avg": 0.21240505576133728, + "step": 905, + "valid_targets_mean": 5933.7, + "valid_targets_min": 1591 }, { - "epoch": 1.700759493670886, - "grad_norm": 0.3107542790151482, - "learning_rate": 3.3435761703329894e-05, - "loss": 0.3377, + "epoch": 0.9215189873417722, + "grad_norm": 0.3573450052442163, + "learning_rate": 3.914622655654822e-05, + "loss": 0.3434, "loss_nan_ranks": 0, - "loss_rank_avg": 0.038589876145124435, - "step": 420, - "valid_targets_mean": 3990.7, - "valid_targets_min": 1266 + "loss_rank_avg": 0.13716688752174377, + "step": 910, + "valid_targets_mean": 5188.8, + "valid_targets_min": 1591 }, { - "epoch": 1.7048101265822786, - "grad_norm": 0.3316401272152589, - "learning_rate": 3.3393815802123606e-05, - "loss": 0.3395, + "epoch": 0.9265822784810127, + "grad_norm": 0.3538932529953643, + "learning_rate": 3.912568188536814e-05, + "loss": 0.3537, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04931402951478958, - "step": 421, - "valid_targets_mean": 5523.5, - "valid_targets_min": 1650 + "loss_rank_avg": 0.16137436032295227, + "step": 915, + "valid_targets_mean": 3634.4, + "valid_targets_min": 1339 }, { - "epoch": 1.7088607594936709, - "grad_norm": 0.31553689061241225, - "learning_rate": 3.335176280432308e-05, - "loss": 0.3211, + "epoch": 0.9316455696202531, + "grad_norm": 0.3208113773796828, + "learning_rate": 3.910489847851761e-05, + "loss": 0.3444, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028230369091033936, - "step": 422, - "valid_targets_mean": 3250.0, + "loss_rank_avg": 0.17828276753425598, + "step": 920, + "valid_targets_mean": 4550.2, "valid_targets_min": 1178 }, { - "epoch": 1.7129113924050632, - "grad_norm": 0.3758417635262789, - "learning_rate": 3.330960304618295e-05, - "loss": 0.3437, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.049489326775074005, - "step": 423, - "valid_targets_mean": 4991.7, - "valid_targets_min": 1106 - }, - { - "epoch": 1.7169620253164557, - "grad_norm": 0.3936550836798475, - "learning_rate": 3.326733686481153e-05, - "loss": 0.3315, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.0469365231692791, - "step": 424, - "valid_targets_mean": 5517.9, - "valid_targets_min": 849 - }, - { - "epoch": 1.7210126582278482, - "grad_norm": 0.34400213540431585, - "learning_rate": 3.3224964598168084e-05, - "loss": 0.3324, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04376298189163208, - "step": 425, - "valid_targets_mean": 5182.9, - "valid_targets_min": 1349 - }, - { - "epoch": 1.7250632911392405, - "grad_norm": 0.33882223743314704, - "learning_rate": 3.318248658506012e-05, - "loss": 0.3319, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.05328531190752983, - "step": 426, - "valid_targets_mean": 5232.5, - "valid_targets_min": 1236 - }, - { - "epoch": 1.7291139240506328, - "grad_norm": 0.3243709127485186, - "learning_rate": 3.313990316514069e-05, - "loss": 0.3299, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.035082388669252396, - "step": 427, - "valid_targets_mean": 3614.2, - "valid_targets_min": 1523 - }, - { - "epoch": 1.7331645569620253, - "grad_norm": 0.3660746372986188, - "learning_rate": 3.309721467890571e-05, - "loss": 0.3253, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04420848935842514, - "step": 428, - "valid_targets_mean": 4702.2, - "valid_targets_min": 830 - }, - { - "epoch": 1.7372151898734178, - "grad_norm": 0.29622838869567936, - "learning_rate": 3.305442146769115e-05, - "loss": 0.336, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03971245139837265, - "step": 429, - "valid_targets_mean": 4418.9, - "valid_targets_min": 1136 - }, - { - "epoch": 1.7412658227848101, - "grad_norm": 0.3637366045478612, - "learning_rate": 3.30115238736704e-05, - "loss": 0.3277, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.04427309334278107, - "step": 430, - "valid_targets_mean": 4616.5, - "valid_targets_min": 1148 - }, - { - "epoch": 1.7453164556962024, - "grad_norm": 0.3736217004489942, - "learning_rate": 3.296852223985148e-05, - "loss": 0.3139, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.03867566958069801, - "step": 431, - "valid_targets_mean": 3565.4, - "valid_targets_min": 1374 - }, - { - "epoch": 1.749367088607595, - "grad_norm": 0.34223310955896535, - "learning_rate": 3.2925416910074315e-05, - "loss": 0.3373, + "epoch": 0.9367088607594937, + "grad_norm": 0.3094131187909477, + "learning_rate": 3.908387659542481e-05, + "loss": 0.3457, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04823065549135208, - "step": 432, - "valid_targets_mean": 5013.8, - "valid_targets_min": 1178 + "loss_rank_avg": 0.14312776923179626, + "step": 925, + "valid_targets_mean": 4335.3, + "valid_targets_min": 1291 }, { - "epoch": 1.7534177215189874, - "grad_norm": 0.3525464599213832, - "learning_rate": 3.288220822900796e-05, - "loss": 0.3018, + "epoch": 0.9417721518987342, + "grad_norm": 0.33010185993106733, + "learning_rate": 3.9062616498494656e-05, + "loss": 0.3354, "loss_nan_ranks": 0, - "loss_rank_avg": 0.035914428532123566, - "step": 433, - "valid_targets_mean": 4390.2, - "valid_targets_min": 1513 + "loss_rank_avg": 0.17049264907836914, + "step": 930, + "valid_targets_mean": 3559.0, + "valid_targets_min": 1428 }, { - "epoch": 1.7574683544303797, - "grad_norm": 0.33891873554132135, - "learning_rate": 3.283889654214788e-05, - "loss": 0.3258, + "epoch": 0.9468354430379747, + "grad_norm": 0.3152672156222691, + "learning_rate": 3.904111845310559e-05, + "loss": 0.3347, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025227509438991547, - "step": 434, - "valid_targets_mean": 3230.1, - "valid_targets_min": 1312 + "loss_rank_avg": 0.16469566524028778, + "step": 935, + "valid_targets_mean": 4357.5, + "valid_targets_min": 1218 }, { - "epoch": 1.761518987341772, - "grad_norm": 0.3795304300630568, - "learning_rate": 3.279548219581317e-05, - "loss": 0.3332, + "epoch": 0.9518987341772152, + "grad_norm": 0.3201518158080734, + "learning_rate": 3.901938272760623e-05, + "loss": 0.3452, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02923169732093811, - "step": 435, - "valid_targets_mean": 3928.2, - "valid_targets_min": 1827 + "loss_rank_avg": 0.1640382707118988, + "step": 940, + "valid_targets_mean": 4409.8, + "valid_targets_min": 696 }, { - "epoch": 1.7655696202531646, - "grad_norm": 0.35314291509357043, - "learning_rate": 3.275196553714379e-05, - "loss": 0.322, + "epoch": 0.9569620253164557, + "grad_norm": 0.3422558014341276, + "learning_rate": 3.899740959331204e-05, + "loss": 0.3411, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027506470680236816, - "step": 436, - "valid_targets_mean": 3589.4, - "valid_targets_min": 1689 + "loss_rank_avg": 0.14299672842025757, + "step": 945, + "valid_targets_mean": 3149.9, + "valid_targets_min": 866 }, { - "epoch": 1.769620253164557, - "grad_norm": 0.3403736589530196, - "learning_rate": 3.270834691409778e-05, - "loss": 0.3271, + "epoch": 0.9620253164556962, + "grad_norm": 0.3039490414348751, + "learning_rate": 3.897519932450189e-05, + "loss": 0.3421, "loss_nan_ranks": 0, - "loss_rank_avg": 0.043082475662231445, - "step": 437, - "valid_targets_mean": 4697.5, - "valid_targets_min": 1003 + "loss_rank_avg": 0.14147335290908813, + "step": 950, + "valid_targets_mean": 4333.5, + "valid_targets_min": 1238 }, { - "epoch": 1.7736708860759494, - "grad_norm": 0.3470890475541193, - "learning_rate": 3.266462667544849e-05, - "loss": 0.326, + "epoch": 0.9670886075949368, + "grad_norm": 0.3264243877990281, + "learning_rate": 3.8952752198414716e-05, + "loss": 0.3515, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04425528645515442, - "step": 438, - "valid_targets_mean": 4046.9, - "valid_targets_min": 1103 + "loss_rank_avg": 0.17327773571014404, + "step": 955, + "valid_targets_mean": 4177.0, + "valid_targets_min": 1111 }, { - "epoch": 1.7777215189873417, - "grad_norm": 0.33460584037667124, - "learning_rate": 3.262080517078178e-05, - "loss": 0.3336, + "epoch": 0.9721518987341772, + "grad_norm": 0.3108694369949979, + "learning_rate": 3.893006849524601e-05, + "loss": 0.3335, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05333739146590233, - "step": 439, - "valid_targets_mean": 6205.1, - "valid_targets_min": 2031 + "loss_rank_avg": 0.16756384074687958, + "step": 960, + "valid_targets_mean": 4619.9, + "valid_targets_min": 1156 }, { - "epoch": 1.7817721518987342, - "grad_norm": 0.332223846117327, - "learning_rate": 3.257688275049323e-05, - "loss": 0.3291, + "epoch": 0.9772151898734177, + "grad_norm": 0.3448421182167878, + "learning_rate": 3.890714849814431e-05, + "loss": 0.343, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05119897425174713, - "step": 440, - "valid_targets_mean": 5728.1, - "valid_targets_min": 1444 + "loss_rank_avg": 0.15267406404018402, + "step": 965, + "valid_targets_mean": 4085.4, + "valid_targets_min": 1188 }, { - "epoch": 1.7858227848101267, - "grad_norm": 0.3438853865299442, - "learning_rate": 3.253285976578535e-05, - "loss": 0.3443, + "epoch": 0.9822784810126582, + "grad_norm": 0.30352124369356204, + "learning_rate": 3.8883992493207696e-05, + "loss": 0.3489, "loss_nan_ranks": 0, - "loss_rank_avg": 0.040141962468624115, - "step": 441, - "valid_targets_mean": 4616.1, - "valid_targets_min": 1306 + "loss_rank_avg": 0.2034776359796524, + "step": 970, + "valid_targets_mean": 5403.8, + "valid_targets_min": 1133 }, { - "epoch": 1.789873417721519, - "grad_norm": 0.3634757057983563, - "learning_rate": 3.2488736568664756e-05, - "loss": 0.3396, + "epoch": 0.9873417721518988, + "grad_norm": 0.2876598919430634, + "learning_rate": 3.8860600769480215e-05, + "loss": 0.3446, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05128256976604462, - "step": 442, - "valid_targets_mean": 4661.9, - "valid_targets_min": 1422 + "loss_rank_avg": 0.17782637476921082, + "step": 975, + "valid_targets_mean": 5421.9, + "valid_targets_min": 876 }, { - "epoch": 1.7939240506329113, - "grad_norm": 0.35226806103777647, - "learning_rate": 3.244451351193935e-05, - "loss": 0.3228, + "epoch": 0.9924050632911392, + "grad_norm": 0.33380559486032607, + "learning_rate": 3.883697361894825e-05, + "loss": 0.3439, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030896876007318497, - "step": 443, - "valid_targets_mean": 3503.1, - "valid_targets_min": 786 + "loss_rank_avg": 0.2117309868335724, + "step": 980, + "valid_targets_mean": 4499.0, + "valid_targets_min": 1184 }, { - "epoch": 1.7979746835443038, - "grad_norm": 0.33185212160462785, - "learning_rate": 3.240019094921551e-05, - "loss": 0.3247, + "epoch": 0.9974683544303797, + "grad_norm": 0.3618917728963476, + "learning_rate": 3.881311133653691e-05, + "loss": 0.3412, "loss_nan_ranks": 0, - "loss_rank_avg": 0.037942226976156235, - "step": 444, - "valid_targets_mean": 3962.8, - "valid_targets_min": 1230 + "loss_rank_avg": 0.16749724745750427, + "step": 985, + "valid_targets_mean": 3879.4, + "valid_targets_min": 966 }, { - "epoch": 1.8020253164556963, - "grad_norm": 0.36700704498321507, - "learning_rate": 3.235576923489527e-05, - "loss": 0.3393, + "epoch": 1.0020253164556963, + "grad_norm": 0.3195092960965727, + "learning_rate": 3.878901422010632e-05, + "loss": 0.3347, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04156826436519623, - "step": 445, - "valid_targets_mean": 4232.2, - "valid_targets_min": 1134 + "loss_rank_avg": 0.21217718720436096, + "step": 990, + "valid_targets_mean": 6226.9, + "valid_targets_min": 1827 }, { - "epoch": 1.8060759493670886, - "grad_norm": 0.32868655839505095, - "learning_rate": 3.2311248724173476e-05, - "loss": 0.3314, + "epoch": 1.0070886075949368, + "grad_norm": 0.32404855006530575, + "learning_rate": 3.87646825704479e-05, + "loss": 0.3259, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0416606068611145, - "step": 446, - "valid_targets_mean": 4821.2, - "valid_targets_min": 1406 + "loss_rank_avg": 0.1381339430809021, + "step": 995, + "valid_targets_mean": 3916.4, + "valid_targets_min": 1401 }, { - "epoch": 1.810126582278481, - "grad_norm": 0.3718438171345449, - "learning_rate": 3.226662977303494e-05, - "loss": 0.332, + "epoch": 1.0121518987341773, + "grad_norm": 0.3141166003779331, + "learning_rate": 3.874011669128065e-05, + "loss": 0.3446, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030391667038202286, - "step": 447, - "valid_targets_mean": 3128.0, - "valid_targets_min": 972 + "loss_rank_avg": 0.1709311306476593, + "step": 1000, + "valid_targets_mean": 4634.4, + "valid_targets_min": 1322 }, { - "epoch": 1.8141772151898734, - "grad_norm": 0.373982059055943, - "learning_rate": 3.2221912738251626e-05, - "loss": 0.338, + "epoch": 1.0172151898734176, + "grad_norm": 0.33255131568231555, + "learning_rate": 3.871531688924731e-05, + "loss": 0.3176, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05585087835788727, - "step": 448, - "valid_targets_mean": 5579.8, - "valid_targets_min": 1371 + "loss_rank_avg": 0.11075881123542786, + "step": 1005, + "valid_targets_mean": 2777.5, + "valid_targets_min": 899 }, { - "epoch": 1.818227848101266, - "grad_norm": 0.32577144664806534, - "learning_rate": 3.2177097977379736e-05, - "loss": 0.3202, + "epoch": 1.0222784810126582, + "grad_norm": 0.3551119238834457, + "learning_rate": 3.8690283473910555e-05, + "loss": 0.3343, "loss_nan_ranks": 0, - "loss_rank_avg": 0.043629519641399384, - "step": 449, - "valid_targets_mean": 5300.1, - "valid_targets_min": 868 + "loss_rank_avg": 0.18998833000659943, + "step": 1010, + "valid_targets_mean": 5065.8, + "valid_targets_min": 1392 }, { - "epoch": 1.8222784810126582, - "grad_norm": 0.3881428987379395, - "learning_rate": 3.21321858487569e-05, - "loss": 0.3256, + "epoch": 1.0273417721518987, + "grad_norm": 0.36295431013252033, + "learning_rate": 3.866501675774914e-05, + "loss": 0.3282, "loss_nan_ranks": 0, - "loss_rank_avg": 0.045537859201431274, - "step": 450, - "valid_targets_mean": 5358.4, - "valid_targets_min": 1494 + "loss_rank_avg": 0.1773543804883957, + "step": 1015, + "valid_targets_mean": 3819.9, + "valid_targets_min": 1174 }, { - "epoch": 1.8263291139240505, - "grad_norm": 0.33679408936863786, - "learning_rate": 3.208717671149931e-05, - "loss": 0.3325, + "epoch": 1.0324050632911392, + "grad_norm": 0.33540491774621956, + "learning_rate": 3.8639517056153997e-05, + "loss": 0.3362, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05366458743810654, - "step": 451, - "valid_targets_mean": 4847.1, - "valid_targets_min": 1025 + "loss_rank_avg": 0.16152143478393555, + "step": 1020, + "valid_targets_mean": 4223.1, + "valid_targets_min": 858 }, { - "epoch": 1.830379746835443, - "grad_norm": 0.3164653551106017, - "learning_rate": 3.204207092549882e-05, - "loss": 0.3497, + "epoch": 1.0374683544303798, + "grad_norm": 0.32077100355601573, + "learning_rate": 3.8613784687424275e-05, + "loss": 0.3378, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04051554203033447, - "step": 452, - "valid_targets_mean": 4086.8, - "valid_targets_min": 1593 + "loss_rank_avg": 0.19795547425746918, + "step": 1025, + "valid_targets_mean": 5295.2, + "valid_targets_min": 1496 }, { - "epoch": 1.8344303797468355, - "grad_norm": 0.33504380345770385, - "learning_rate": 3.19968688514201e-05, - "loss": 0.338, + "epoch": 1.0425316455696203, + "grad_norm": 0.3216906551189178, + "learning_rate": 3.858781997276337e-05, + "loss": 0.3318, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05457250401377678, - "step": 453, - "valid_targets_mean": 6232.4, - "valid_targets_min": 1295 + "loss_rank_avg": 0.16188682615756989, + "step": 1030, + "valid_targets_mean": 5138.4, + "valid_targets_min": 947 }, { - "epoch": 1.8384810126582278, - "grad_norm": 0.2839096408910801, - "learning_rate": 3.195157085069774e-05, - "loss": 0.3432, + "epoch": 1.0475949367088608, + "grad_norm": 0.4486889227248024, + "learning_rate": 3.856162323627497e-05, + "loss": 0.3251, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029980182647705078, - "step": 454, - "valid_targets_mean": 3421.4, - "valid_targets_min": 1506 + "loss_rank_avg": 0.09634960442781448, + "step": 1035, + "valid_targets_mean": 3724.6, + "valid_targets_min": 934 }, { - "epoch": 1.8425316455696201, - "grad_norm": 0.3726394710042998, - "learning_rate": 3.190617728553332e-05, - "loss": 0.3129, + "epoch": 1.0526582278481014, + "grad_norm": 0.2934117207565638, + "learning_rate": 3.8535194804958924e-05, + "loss": 0.3367, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0307072214782238, - "step": 455, - "valid_targets_mean": 3492.6, - "valid_targets_min": 1467 + "loss_rank_avg": 0.1526038944721222, + "step": 1040, + "valid_targets_mean": 4829.0, + "valid_targets_min": 1401 }, { - "epoch": 1.8465822784810126, - "grad_norm": 0.31640186998479036, - "learning_rate": 3.1860688518892606e-05, - "loss": 0.3455, + "epoch": 1.0577215189873417, + "grad_norm": 0.2931878952686227, + "learning_rate": 3.8508535008707236e-05, + "loss": 0.346, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04523278772830963, - "step": 456, - "valid_targets_mean": 5002.6, - "valid_targets_min": 1141 + "loss_rank_avg": 0.20354345440864563, + "step": 1045, + "valid_targets_mean": 6153.3, + "valid_targets_min": 1336 }, { - "epoch": 1.8506329113924052, - "grad_norm": 0.34971430216593696, - "learning_rate": 3.181510491450254e-05, - "loss": 0.3302, + "epoch": 1.0627848101265822, + "grad_norm": 0.322713813844516, + "learning_rate": 3.848164418029989e-05, + "loss": 0.3364, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03884712979197502, - "step": 457, - "valid_targets_mean": 4064.7, - "valid_targets_min": 1235 + "loss_rank_avg": 0.1375882625579834, + "step": 1050, + "valid_targets_mean": 4300.9, + "valid_targets_min": 1361 }, { - "epoch": 1.8546835443037974, - "grad_norm": 0.32742203770785705, - "learning_rate": 3.176942683684842e-05, - "loss": 0.354, + "epoch": 1.0678481012658227, + "grad_norm": 0.31096701689662554, + "learning_rate": 3.845452265540074e-05, + "loss": 0.3296, "loss_nan_ranks": 0, - "loss_rank_avg": 0.047533608973026276, - "step": 458, - "valid_targets_mean": 5261.7, - "valid_targets_min": 1702 + "loss_rank_avg": 0.1565578281879425, + "step": 1055, + "valid_targets_mean": 4115.1, + "valid_targets_min": 1141 }, { - "epoch": 1.8587341772151897, - "grad_norm": 0.3131402584468333, - "learning_rate": 3.1723654651170934e-05, - "loss": 0.3155, + "epoch": 1.0729113924050633, + "grad_norm": 0.31792661099154035, + "learning_rate": 3.842717077255329e-05, + "loss": 0.3265, "loss_nan_ranks": 0, - "loss_rank_avg": 0.039669573307037354, - "step": 459, - "valid_targets_mean": 4605.7, - "valid_targets_min": 1480 + "loss_rank_avg": 0.18725517392158508, + "step": 1060, + "valid_targets_mean": 5399.2, + "valid_targets_min": 1440 }, { - "epoch": 1.8627848101265823, - "grad_norm": 0.32060824625839895, - "learning_rate": 3.167778872346327e-05, - "loss": 0.3217, + "epoch": 1.0779746835443038, + "grad_norm": 0.2966357451403385, + "learning_rate": 3.839958887317649e-05, + "loss": 0.3267, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0460028350353241, - "step": 460, - "valid_targets_mean": 4227.1, - "valid_targets_min": 1103 + "loss_rank_avg": 0.1931026577949524, + "step": 1065, + "valid_targets_mean": 6788.4, + "valid_targets_min": 1285 }, { - "epoch": 1.8668354430379748, - "grad_norm": 0.2997542609453774, - "learning_rate": 3.163182942046815e-05, - "loss": 0.324, + "epoch": 1.0830379746835443, + "grad_norm": 0.32526509145376603, + "learning_rate": 3.837177730156045e-05, + "loss": 0.3283, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026472963392734528, - "step": 461, - "valid_targets_mean": 3085.8, - "valid_targets_min": 1151 + "loss_rank_avg": 0.18849416077136993, + "step": 1070, + "valid_targets_mean": 5493.9, + "valid_targets_min": 884 }, { - "epoch": 1.870886075949367, - "grad_norm": 0.3679982728380354, - "learning_rate": 3.158577710967494e-05, - "loss": 0.326, + "epoch": 1.0881012658227849, + "grad_norm": 0.3176994766517001, + "learning_rate": 3.834373640486216e-05, + "loss": 0.329, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03438718616962433, - "step": 462, - "valid_targets_mean": 3432.3, - "valid_targets_min": 1345 + "loss_rank_avg": 0.13460056483745575, + "step": 1075, + "valid_targets_mean": 3655.2, + "valid_targets_min": 1173 }, { - "epoch": 1.8749367088607594, - "grad_norm": 0.2938267435171152, - "learning_rate": 3.153963215931669e-05, - "loss": 0.3215, + "epoch": 1.0931645569620254, + "grad_norm": 0.3802547345823635, + "learning_rate": 3.8315466533101154e-05, + "loss": 0.3191, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03502851724624634, - "step": 463, - "valid_targets_mean": 4178.2, - "valid_targets_min": 963 + "loss_rank_avg": 0.1390688419342041, + "step": 1080, + "valid_targets_mean": 3724.8, + "valid_targets_min": 686 }, { - "epoch": 1.8789873417721519, - "grad_norm": 0.3585044764978308, - "learning_rate": 3.14933949383672e-05, - "loss": 0.3363, + "epoch": 1.0982278481012657, + "grad_norm": 0.30409901587440336, + "learning_rate": 3.828696803915515e-05, + "loss": 0.3295, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0659424364566803, - "step": 464, - "valid_targets_mean": 8122.4, - "valid_targets_min": 1108 + "loss_rank_avg": 0.19093772768974304, + "step": 1085, + "valid_targets_mean": 5509.2, + "valid_targets_min": 1316 }, { - "epoch": 1.8830379746835444, - "grad_norm": 0.3118791436215761, - "learning_rate": 3.144706581653805e-05, - "loss": 0.3333, + "epoch": 1.1032911392405063, + "grad_norm": 0.31857188574561196, + "learning_rate": 3.8258241278755613e-05, + "loss": 0.3259, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05027894675731659, - "step": 465, - "valid_targets_mean": 5247.6, - "valid_targets_min": 1063 + "loss_rank_avg": 0.18015238642692566, + "step": 1090, + "valid_targets_mean": 5178.9, + "valid_targets_min": 1381 }, { - "epoch": 1.8870886075949367, - "grad_norm": 0.3761180103785145, - "learning_rate": 3.140064516427566e-05, - "loss": 0.3463, + "epoch": 1.1083544303797468, + "grad_norm": 0.32050011818280044, + "learning_rate": 3.8229286610483356e-05, + "loss": 0.3233, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03511781617999077, - "step": 466, - "valid_targets_mean": 3987.2, - "valid_targets_min": 893 + "loss_rank_avg": 0.18884283304214478, + "step": 1095, + "valid_targets_mean": 5116.2, + "valid_targets_min": 1159 }, { - "epoch": 1.891139240506329, - "grad_norm": 0.42711588287783636, - "learning_rate": 3.135413335275833e-05, - "loss": 0.3368, + "epoch": 1.1134177215189873, + "grad_norm": 0.3113272756450795, + "learning_rate": 3.820010439576403e-05, + "loss": 0.3164, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04904717206954956, - "step": 467, - "valid_targets_mean": 4935.4, - "valid_targets_min": 1291 + "loss_rank_avg": 0.16775749623775482, + "step": 1100, + "valid_targets_mean": 4514.6, + "valid_targets_min": 1605 }, { - "epoch": 1.8951898734177215, - "grad_norm": 0.36317986690264864, - "learning_rate": 3.130753075389327e-05, - "loss": 0.3435, + "epoch": 1.1184810126582279, + "grad_norm": 0.33153340730169845, + "learning_rate": 3.817069499886364e-05, + "loss": 0.3196, "loss_nan_ranks": 0, - "loss_rank_avg": 0.042481452226638794, - "step": 468, - "valid_targets_mean": 4847.0, - "valid_targets_min": 1741 + "loss_rank_avg": 0.12046626210212708, + "step": 1105, + "valid_targets_mean": 3291.4, + "valid_targets_min": 962 }, { - "epoch": 1.899240506329114, - "grad_norm": 0.29204436191953415, - "learning_rate": 3.1260837740313625e-05, - "loss": 0.3218, + "epoch": 1.1235443037974684, + "grad_norm": 0.3013874756589958, + "learning_rate": 3.814105878688397e-05, + "loss": 0.3325, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03251627832651138, - "step": 469, - "valid_targets_mean": 4043.2, - "valid_targets_min": 1435 + "loss_rank_avg": 0.16391009092330933, + "step": 1110, + "valid_targets_mean": 6036.4, + "valid_targets_min": 1055 }, { - "epoch": 1.9032911392405063, - "grad_norm": 0.361688803214787, - "learning_rate": 3.121405468537547e-05, - "loss": 0.3417, + "epoch": 1.128607594936709, + "grad_norm": 0.331804851918644, + "learning_rate": 3.8111196129758025e-05, + "loss": 0.3281, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04765615239739418, - "step": 470, - "valid_targets_mean": 5419.4, - "valid_targets_min": 1151 + "loss_rank_avg": 0.1217694953083992, + "step": 1115, + "valid_targets_mean": 2960.4, + "valid_targets_min": 1255 }, { - "epoch": 1.9073417721518986, - "grad_norm": 0.3191407264558652, - "learning_rate": 3.116718196315488e-05, - "loss": 0.3314, + "epoch": 1.1336708860759495, + "grad_norm": 0.35058089757688515, + "learning_rate": 3.808110740024541e-05, + "loss": 0.3189, "loss_nan_ranks": 0, - "loss_rank_avg": 0.050547726452350616, - "step": 471, - "valid_targets_mean": 5128.0, - "valid_targets_min": 1240 + "loss_rank_avg": 0.18976081907749176, + "step": 1120, + "valid_targets_mean": 4311.5, + "valid_targets_min": 1573 }, { - "epoch": 1.9113924050632911, - "grad_norm": 0.34633636808004736, - "learning_rate": 3.1120219948444894e-05, - "loss": 0.3439, + "epoch": 1.13873417721519, + "grad_norm": 0.2598654453403866, + "learning_rate": 3.8050792973927654e-05, + "loss": 0.3227, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03523585945367813, - "step": 472, - "valid_targets_mean": 4280.5, - "valid_targets_min": 1036 + "loss_rank_avg": 0.16648131608963013, + "step": 1125, + "valid_targets_mean": 6690.9, + "valid_targets_min": 1006 }, { - "epoch": 1.9154430379746836, - "grad_norm": 0.2968650475339727, - "learning_rate": 3.107316901675252e-05, - "loss": 0.3249, + "epoch": 1.1437974683544303, + "grad_norm": 0.36580297380970445, + "learning_rate": 3.8020253229203555e-05, + "loss": 0.327, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05320196598768234, - "step": 473, - "valid_targets_mean": 5309.4, - "valid_targets_min": 1250 + "loss_rank_avg": 0.173434779047966, + "step": 1130, + "valid_targets_mean": 3429.3, + "valid_targets_min": 1217 }, { - "epoch": 1.919493670886076, - "grad_norm": 0.3030978413371183, - "learning_rate": 3.1026029544295745e-05, - "loss": 0.3264, + "epoch": 1.1488607594936708, + "grad_norm": 0.3170699501916741, + "learning_rate": 3.798948854728441e-05, + "loss": 0.3303, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03609578311443329, - "step": 474, - "valid_targets_mean": 4145.1, - "valid_targets_min": 915 + "loss_rank_avg": 0.12679925560951233, + "step": 1135, + "valid_targets_mean": 3721.6, + "valid_targets_min": 726 }, { - "epoch": 1.9235443037974682, - "grad_norm": 0.33869189366733815, - "learning_rate": 3.097880190800057e-05, - "loss": 0.3365, + "epoch": 1.1539240506329114, + "grad_norm": 0.3315048726728199, + "learning_rate": 3.7958499312189344e-05, + "loss": 0.3389, "loss_nan_ranks": 0, - "loss_rank_avg": 0.051121585071086884, - "step": 475, - "valid_targets_mean": 5403.0, - "valid_targets_min": 1633 + "loss_rank_avg": 0.11187270283699036, + "step": 1140, + "valid_targets_mean": 3303.8, + "valid_targets_min": 954 }, { - "epoch": 1.9275949367088607, - "grad_norm": 0.334386515109002, - "learning_rate": 3.093148648549788e-05, - "loss": 0.3424, + "epoch": 1.158987341772152, + "grad_norm": 0.2969536976332786, + "learning_rate": 3.792728591074041e-05, + "loss": 0.3199, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05128683149814606, - "step": 476, - "valid_targets_mean": 4943.8, - "valid_targets_min": 1134 + "loss_rank_avg": 0.11002499610185623, + "step": 1145, + "valid_targets_mean": 4168.6, + "valid_targets_min": 1316 }, { - "epoch": 1.9316455696202532, - "grad_norm": 0.3430991389462553, - "learning_rate": 3.088408365512055e-05, - "loss": 0.3201, + "epoch": 1.1640506329113924, + "grad_norm": 0.29906816062312314, + "learning_rate": 3.7895848732557815e-05, + "loss": 0.3329, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04292987287044525, - "step": 477, - "valid_targets_mean": 5557.6, - "valid_targets_min": 1453 + "loss_rank_avg": 0.1309490203857422, + "step": 1150, + "valid_targets_mean": 4264.5, + "valid_targets_min": 945 }, { - "epoch": 1.9356962025316455, - "grad_norm": 0.3639481087556812, - "learning_rate": 3.083659379590034e-05, - "loss": 0.3384, + "epoch": 1.169113924050633, + "grad_norm": 0.3373393294160328, + "learning_rate": 3.786418817005507e-05, + "loss": 0.3299, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04263174161314964, - "step": 478, - "valid_targets_mean": 5070.9, - "valid_targets_min": 1419 + "loss_rank_avg": 0.16449810564517975, + "step": 1155, + "valid_targets_mean": 4430.1, + "valid_targets_min": 1548 }, { - "epoch": 1.9397468354430378, - "grad_norm": 0.34507267926661267, - "learning_rate": 3.0789017287564913e-05, - "loss": 0.3294, + "epoch": 1.1741772151898735, + "grad_norm": 0.31814918332777564, + "learning_rate": 3.783230461843406e-05, + "loss": 0.3211, "loss_nan_ranks": 0, - "loss_rank_avg": 0.045663461089134216, - "step": 479, - "valid_targets_mean": 4628.5, - "valid_targets_min": 869 + "loss_rank_avg": 0.1603396236896515, + "step": 1160, + "valid_targets_mean": 4611.4, + "valid_targets_min": 1192 }, { - "epoch": 1.9437974683544303, - "grad_norm": 0.3515284965881423, - "learning_rate": 3.074135451053475e-05, - "loss": 0.3351, + "epoch": 1.1792405063291138, + "grad_norm": 0.37957067548492934, + "learning_rate": 3.7800198475680136e-05, + "loss": 0.3264, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04386589676141739, - "step": 480, - "valid_targets_mean": 4744.3, - "valid_targets_min": 1600 + "loss_rank_avg": 0.09246158599853516, + "step": 1165, + "valid_targets_mean": 2677.1, + "valid_targets_min": 1357 }, { - "epoch": 1.9478481012658229, - "grad_norm": 0.37366993391351117, - "learning_rate": 3.069360584592015e-05, - "loss": 0.3183, + "epoch": 1.1843037974683543, + "grad_norm": 0.3094282166752189, + "learning_rate": 3.776787014255712e-05, + "loss": 0.3292, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04510515183210373, - "step": 481, - "valid_targets_mean": 4698.1, - "valid_targets_min": 1135 + "loss_rank_avg": 0.1497591733932495, + "step": 1170, + "valid_targets_mean": 4014.9, + "valid_targets_min": 1510 }, { - "epoch": 1.9518987341772152, - "grad_norm": 0.3289061989714357, - "learning_rate": 3.064577167551816e-05, - "loss": 0.3538, + "epoch": 1.1893670886075949, + "grad_norm": 0.35082198305199175, + "learning_rate": 3.773532002260233e-05, + "loss": 0.3276, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03470800444483757, - "step": 482, - "valid_targets_mean": 4177.1, - "valid_targets_min": 1143 + "loss_rank_avg": 0.1902400553226471, + "step": 1175, + "valid_targets_mean": 4051.2, + "valid_targets_min": 926 }, { - "epoch": 1.9559493670886074, - "grad_norm": 0.2961429018385786, - "learning_rate": 3.059785238180953e-05, - "loss": 0.3134, + "epoch": 1.1944303797468354, + "grad_norm": 0.3363300235759021, + "learning_rate": 3.770254852212152e-05, + "loss": 0.3236, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05565746873617172, - "step": 483, - "valid_targets_mean": 6605.4, - "valid_targets_min": 1223 + "loss_rank_avg": 0.1484481394290924, + "step": 1180, + "valid_targets_mean": 4022.1, + "valid_targets_min": 949 }, { - "epoch": 1.96, - "grad_norm": 0.3193374118216837, - "learning_rate": 3.054984834795566e-05, - "loss": 0.3284, + "epoch": 1.199493670886076, + "grad_norm": 0.2959219712023686, + "learning_rate": 3.766955605018384e-05, + "loss": 0.3197, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05232729762792587, - "step": 484, - "valid_targets_mean": 6810.3, - "valid_targets_min": 1217 + "loss_rank_avg": 0.17988604307174683, + "step": 1185, + "valid_targets_mean": 5788.9, + "valid_targets_min": 927 }, { - "epoch": 1.9640506329113925, - "grad_norm": 0.2970041554551343, - "learning_rate": 3.0501759957795537e-05, - "loss": 0.3274, + "epoch": 1.2045569620253165, + "grad_norm": 0.30513862700761135, + "learning_rate": 3.7636343018616675e-05, + "loss": 0.3243, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04064800962805748, - "step": 485, - "valid_targets_mean": 4291.6, - "valid_targets_min": 1209 + "loss_rank_avg": 0.162398099899292, + "step": 1190, + "valid_targets_mean": 4152.4, + "valid_targets_min": 1183 }, { - "epoch": 1.9681012658227848, - "grad_norm": 0.3222203224742733, - "learning_rate": 3.0453587595842627e-05, - "loss": 0.3253, + "epoch": 1.209620253164557, + "grad_norm": 0.3256099867852839, + "learning_rate": 3.7602909842000565e-05, + "loss": 0.3255, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03722073882818222, - "step": 486, - "valid_targets_mean": 3872.8, - "valid_targets_min": 1251 + "loss_rank_avg": 0.1604665219783783, + "step": 1195, + "valid_targets_mean": 4510.4, + "valid_targets_min": 1107 }, { - "epoch": 1.972151898734177, - "grad_norm": 0.32905816289022255, - "learning_rate": 3.0405331647281856e-05, - "loss": 0.328, + "epoch": 1.2146835443037975, + "grad_norm": 0.31652452857276137, + "learning_rate": 3.7569256937663995e-05, + "loss": 0.3228, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04163438081741333, - "step": 487, - "valid_targets_mean": 4996.4, - "valid_targets_min": 1017 + "loss_rank_avg": 0.11722761392593384, + "step": 1200, + "valid_targets_mean": 3867.2, + "valid_targets_min": 1428 }, { - "epoch": 1.9762025316455696, - "grad_norm": 0.3033504131427964, - "learning_rate": 3.0356992497966508e-05, - "loss": 0.3192, + "epoch": 1.219746835443038, + "grad_norm": 0.3570058144248121, + "learning_rate": 3.753538472567819e-05, + "loss": 0.3343, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02825050801038742, - "step": 488, - "valid_targets_mean": 3304.9, - "valid_targets_min": 1152 + "loss_rank_avg": 0.132009819149971, + "step": 1205, + "valid_targets_mean": 3776.8, + "valid_targets_min": 1444 }, { - "epoch": 1.980253164556962, - "grad_norm": 0.3467979919349524, - "learning_rate": 3.0308570534415137e-05, - "loss": 0.3329, + "epoch": 1.2248101265822784, + "grad_norm": 0.3253492095946553, + "learning_rate": 3.750129362885188e-05, + "loss": 0.3491, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0449945442378521, - "step": 489, - "valid_targets_mean": 5895.3, - "valid_targets_min": 1437 + "loss_rank_avg": 0.20971378684043884, + "step": 1210, + "valid_targets_mean": 5854.2, + "valid_targets_min": 895 }, { - "epoch": 1.9843037974683544, - "grad_norm": 0.32266276242539704, - "learning_rate": 3.0260066143808467e-05, - "loss": 0.3316, + "epoch": 1.229873417721519, + "grad_norm": 0.31813799271358817, + "learning_rate": 3.7466984072726014e-05, + "loss": 0.3278, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03618864715099335, - "step": 490, - "valid_targets_mean": 3618.2, - "valid_targets_min": 1060 + "loss_rank_avg": 0.14392566680908203, + "step": 1215, + "valid_targets_mean": 4255.3, + "valid_targets_min": 1377 }, { - "epoch": 1.9883544303797467, - "grad_norm": 0.30250022892082834, - "learning_rate": 3.021147971398632e-05, - "loss": 0.324, + "epoch": 1.2349367088607595, + "grad_norm": 0.3161090773410782, + "learning_rate": 3.743245648556845e-05, + "loss": 0.333, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04590712860226631, - "step": 491, - "valid_targets_mean": 5090.6, - "valid_targets_min": 1261 + "loss_rank_avg": 0.15600383281707764, + "step": 1220, + "valid_targets_mean": 4627.6, + "valid_targets_min": 1186 }, { - "epoch": 1.9924050632911392, - "grad_norm": 0.29843459484025175, - "learning_rate": 3.0162811633444478e-05, - "loss": 0.325, + "epoch": 1.24, + "grad_norm": 0.3383098606932185, + "learning_rate": 3.739771129836863e-05, + "loss": 0.3106, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03130924701690674, - "step": 492, - "valid_targets_mean": 3311.7, - "valid_targets_min": 1693 + "loss_rank_avg": 0.15630879998207092, + "step": 1225, + "valid_targets_mean": 3907.4, + "valid_targets_min": 1318 }, { - "epoch": 1.9964556962025317, - "grad_norm": 0.3185193871458022, - "learning_rate": 3.0114062291331635e-05, - "loss": 0.3261, + "epoch": 1.2450632911392405, + "grad_norm": 0.380311170159032, + "learning_rate": 3.736274894483215e-05, + "loss": 0.3443, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03464746102690697, - "step": 493, - "valid_targets_mean": 3960.8, - "valid_targets_min": 1462 + "loss_rank_avg": 0.17396849393844604, + "step": 1230, + "valid_targets_mean": 3654.8, + "valid_targets_min": 1228 }, { - "epoch": 2.0, - "grad_norm": 0.30644069464206103, - "learning_rate": 3.0065232077446217e-05, - "loss": 0.3114, + "epoch": 1.250126582278481, + "grad_norm": 0.29633634831203143, + "learning_rate": 3.7327569861375404e-05, + "loss": 0.3368, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030792618170380592, - "step": 494, - "valid_targets_mean": 3500.9, - "valid_targets_min": 901 + "loss_rank_avg": 0.20720690488815308, + "step": 1235, + "valid_targets_mean": 6698.7, + "valid_targets_min": 1554 }, { - "epoch": 2.0040506329113925, - "grad_norm": 0.4743719160458842, - "learning_rate": 3.001632138223332e-05, - "loss": 0.2585, + "epoch": 1.2551898734177216, + "grad_norm": 0.32181581382324975, + "learning_rate": 3.729217448712009e-05, + "loss": 0.31, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030326060950756073, - "step": 495, - "valid_targets_mean": 4105.0, - "valid_targets_min": 1372 + "loss_rank_avg": 0.15546496212482452, + "step": 1240, + "valid_targets_mean": 4598.5, + "valid_targets_min": 1081 }, { - "epoch": 2.008101265822785, - "grad_norm": 0.3171536640082535, - "learning_rate": 2.9967330596781565e-05, - "loss": 0.2619, + "epoch": 1.260253164556962, + "grad_norm": 0.2876370801591027, + "learning_rate": 3.725656326388776e-05, + "loss": 0.312, "loss_nan_ranks": 0, - "loss_rank_avg": 0.040422745048999786, - "step": 496, - "valid_targets_mean": 5578.3, - "valid_targets_min": 1982 + "loss_rank_avg": 0.12497955560684204, + "step": 1245, + "valid_targets_mean": 4425.2, + "valid_targets_min": 1167 }, { - "epoch": 2.012151898734177, - "grad_norm": 0.5811372726698385, - "learning_rate": 2.9918260112819952e-05, - "loss": 0.26, + "epoch": 1.2653164556962024, + "grad_norm": 0.3193054610028216, + "learning_rate": 3.722073663619429e-05, + "loss": 0.3268, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03700966387987137, - "step": 497, - "valid_targets_mean": 5488.2, - "valid_targets_min": 1924 + "loss_rank_avg": 0.15073925256729126, + "step": 1250, + "valid_targets_mean": 4511.2, + "valid_targets_min": 1143 }, { - "epoch": 2.0162025316455696, - "grad_norm": 0.35672827420284275, - "learning_rate": 2.9869110322714778e-05, - "loss": 0.2416, + "epoch": 1.270379746835443, + "grad_norm": 0.39145813292181675, + "learning_rate": 3.718469505124434e-05, + "loss": 0.3246, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025780418887734413, - "step": 498, - "valid_targets_mean": 4780.2, - "valid_targets_min": 1272 + "loss_rank_avg": 0.15670371055603027, + "step": 1255, + "valid_targets_mean": 4593.4, + "valid_targets_min": 885 }, { - "epoch": 2.020253164556962, - "grad_norm": 0.39321861478004955, - "learning_rate": 2.9819881619466447e-05, - "loss": 0.2402, + "epoch": 1.2754430379746835, + "grad_norm": 0.3007158793143366, + "learning_rate": 3.714843895892576e-05, + "loss": 0.332, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023468071594834328, - "step": 499, - "valid_targets_mean": 4412.1, - "valid_targets_min": 1296 + "loss_rank_avg": 0.14526674151420593, + "step": 1260, + "valid_targets_mean": 4201.2, + "valid_targets_min": 1015 }, { - "epoch": 2.0243037974683546, - "grad_norm": 0.3669361535720418, - "learning_rate": 2.9770574396706364e-05, - "loss": 0.2558, + "epoch": 1.280506329113924, + "grad_norm": 0.3431473760602178, + "learning_rate": 3.7111968811803985e-05, + "loss": 0.3374, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026013996452093124, - "step": 500, - "valid_targets_mean": 3958.3, - "valid_targets_min": 1253 + "loss_rank_avg": 0.11407409608364105, + "step": 1265, + "valid_targets_mean": 2609.9, + "valid_targets_min": 1008 }, { - "epoch": 2.0283544303797467, - "grad_norm": 0.42664137168830724, - "learning_rate": 2.972118904869376e-05, - "loss": 0.2475, + "epoch": 1.2855696202531646, + "grad_norm": 0.31430226120597105, + "learning_rate": 3.7075285065116376e-05, + "loss": 0.3255, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028630584478378296, - "step": 501, - "valid_targets_mean": 3853.2, - "valid_targets_min": 1320 + "loss_rank_avg": 0.21788525581359863, + "step": 1270, + "valid_targets_mean": 5227.1, + "valid_targets_min": 1159 }, { - "epoch": 2.0324050632911392, - "grad_norm": 0.3340542010102885, - "learning_rate": 2.9671725970312563e-05, - "loss": 0.2527, + "epoch": 1.290632911392405, + "grad_norm": 0.2942207875130029, + "learning_rate": 3.703838817676654e-05, + "loss": 0.3248, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028039783239364624, - "step": 502, - "valid_targets_mean": 3687.6, - "valid_targets_min": 1307 + "loss_rank_avg": 0.16078102588653564, + "step": 1275, + "valid_targets_mean": 5179.4, + "valid_targets_min": 966 }, { - "epoch": 2.0364556962025318, - "grad_norm": 0.38777171194781507, - "learning_rate": 2.9622185557068238e-05, - "loss": 0.25, + "epoch": 1.2956962025316456, + "grad_norm": 0.33801133523700155, + "learning_rate": 3.7001278607318646e-05, + "loss": 0.3293, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02727580815553665, - "step": 503, - "valid_targets_mean": 3820.6, - "valid_targets_min": 1367 + "loss_rank_avg": 0.13204364478588104, + "step": 1280, + "valid_targets_mean": 3593.9, + "valid_targets_min": 1351 }, { - "epoch": 2.0405063291139243, - "grad_norm": 0.32924842344596694, - "learning_rate": 2.95725682050846e-05, - "loss": 0.2439, + "epoch": 1.3007594936708862, + "grad_norm": 0.36194997437445076, + "learning_rate": 3.696395681999161e-05, + "loss": 0.3315, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02716241590678692, - "step": 504, - "valid_targets_mean": 3934.8, - "valid_targets_min": 1618 + "loss_rank_avg": 0.15205729007720947, + "step": 1285, + "valid_targets_mean": 3709.8, + "valid_targets_min": 1456 }, { - "epoch": 2.0445569620253163, - "grad_norm": 0.3039367974293122, - "learning_rate": 2.9522874311100684e-05, - "loss": 0.2498, + "epoch": 1.3058227848101267, + "grad_norm": 0.37635182260044775, + "learning_rate": 3.692642328065337e-05, + "loss": 0.3373, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03982719033956528, - "step": 505, - "valid_targets_mean": 6318.9, - "valid_targets_min": 1567 + "loss_rank_avg": 0.12143959850072861, + "step": 1290, + "valid_targets_mean": 3222.8, + "valid_targets_min": 1077 }, { - "epoch": 2.048607594936709, - "grad_norm": 0.30628238080051057, - "learning_rate": 2.9473104272467526e-05, - "loss": 0.2443, + "epoch": 1.310886075949367, + "grad_norm": 0.3080355652465348, + "learning_rate": 3.688867845781506e-05, + "loss": 0.327, "loss_nan_ranks": 0, - "loss_rank_avg": 0.031220629811286926, - "step": 506, - "valid_targets_mean": 4749.3, - "valid_targets_min": 1159 + "loss_rank_avg": 0.14508505165576935, + "step": 1295, + "valid_targets_mean": 3859.7, + "valid_targets_min": 1113 }, { - "epoch": 2.0526582278481014, - "grad_norm": 0.3423732745997802, - "learning_rate": 2.942325848714505e-05, - "loss": 0.2571, + "epoch": 1.3159493670886075, + "grad_norm": 0.3439159780043492, + "learning_rate": 3.685072282262511e-05, + "loss": 0.3302, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025803158059716225, - "step": 507, - "valid_targets_mean": 3670.8, - "valid_targets_min": 1155 + "loss_rank_avg": 0.14666421711444855, + "step": 1300, + "valid_targets_mean": 3587.8, + "valid_targets_min": 1142 }, { - "epoch": 2.056708860759494, - "grad_norm": 0.3181443981098617, - "learning_rate": 2.9373337353698804e-05, - "loss": 0.2512, + "epoch": 1.321012658227848, + "grad_norm": 0.30506292484752023, + "learning_rate": 3.6812556848863474e-05, + "loss": 0.3253, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02923399582505226, - "step": 508, - "valid_targets_mean": 3626.4, - "valid_targets_min": 1134 + "loss_rank_avg": 0.15456736087799072, + "step": 1305, + "valid_targets_mean": 3908.8, + "valid_targets_min": 1305 }, { - "epoch": 2.060759493670886, - "grad_norm": 0.3188060851658609, - "learning_rate": 2.932334127129686e-05, - "loss": 0.2508, + "epoch": 1.3260759493670886, + "grad_norm": 0.32896698136778196, + "learning_rate": 3.6774181012935595e-05, + "loss": 0.3302, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03547723591327667, - "step": 509, - "valid_targets_mean": 4740.8, - "valid_targets_min": 1428 + "loss_rank_avg": 0.14750021696090698, + "step": 1310, + "valid_targets_mean": 3579.4, + "valid_targets_min": 696 }, { - "epoch": 2.0648101265822785, - "grad_norm": 0.35017866716587204, - "learning_rate": 2.9273270639706547e-05, - "loss": 0.2522, + "epoch": 1.3311392405063291, + "grad_norm": 0.3494681443588045, + "learning_rate": 3.673559579386653e-05, + "loss": 0.3329, "loss_nan_ranks": 0, - "loss_rank_avg": 0.042312368750572205, - "step": 510, - "valid_targets_mean": 6287.3, - "valid_targets_min": 728 + "loss_rank_avg": 0.16966864466667175, + "step": 1315, + "valid_targets_mean": 3639.5, + "valid_targets_min": 1136 }, { - "epoch": 2.068860759493671, - "grad_norm": 0.3314228937290726, - "learning_rate": 2.922312585929131e-05, - "loss": 0.2594, + "epoch": 1.3362025316455697, + "grad_norm": 0.3191056529939702, + "learning_rate": 3.6696801673294984e-05, + "loss": 0.3274, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029436534270644188, - "step": 511, - "valid_targets_mean": 4389.8, - "valid_targets_min": 2048 + "loss_rank_avg": 0.2022847682237625, + "step": 1320, + "valid_targets_mean": 6126.2, + "valid_targets_min": 1760 }, { - "epoch": 2.0729113924050635, - "grad_norm": 0.36123869120637675, - "learning_rate": 2.9172907331007478e-05, - "loss": 0.2598, + "epoch": 1.34126582278481, + "grad_norm": 0.40439808049151743, + "learning_rate": 3.665779913546721e-05, + "loss": 0.3207, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03139062598347664, - "step": 512, - "valid_targets_mean": 4490.1, - "valid_targets_min": 1400 + "loss_rank_avg": 0.1815149486064911, + "step": 1325, + "valid_targets_mean": 3929.4, + "valid_targets_min": 1092 }, { - "epoch": 2.0769620253164556, - "grad_norm": 0.3152518397145963, - "learning_rate": 2.9122615456401053e-05, - "loss": 0.2554, + "epoch": 1.3463291139240505, + "grad_norm": 0.29566683990417303, + "learning_rate": 3.6618588667231064e-05, + "loss": 0.3244, "loss_nan_ranks": 0, - "loss_rank_avg": 0.033695369958877563, - "step": 513, - "valid_targets_mean": 4604.3, - "valid_targets_min": 939 + "loss_rank_avg": 0.1440686285495758, + "step": 1330, + "valid_targets_mean": 4236.4, + "valid_targets_min": 1086 }, { - "epoch": 2.081012658227848, - "grad_norm": 0.353497447966068, - "learning_rate": 2.9072250637604535e-05, - "loss": 0.2596, + "epoch": 1.351392405063291, + "grad_norm": 0.3344359616665295, + "learning_rate": 3.6579170758029885e-05, + "loss": 0.324, "loss_nan_ranks": 0, - "loss_rank_avg": 0.043300822377204895, - "step": 514, - "valid_targets_mean": 6654.4, - "valid_targets_min": 1476 + "loss_rank_avg": 0.15648147463798523, + "step": 1335, + "valid_targets_mean": 3588.6, + "valid_targets_min": 1635 }, { - "epoch": 2.0850632911392406, - "grad_norm": 0.30130567529114766, - "learning_rate": 2.9021813277333667e-05, - "loss": 0.2581, + "epoch": 1.3564556962025316, + "grad_norm": 0.2879031761545053, + "learning_rate": 3.653954589989637e-05, + "loss": 0.3215, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027196582406759262, - "step": 515, - "valid_targets_mean": 4558.4, - "valid_targets_min": 1535 + "loss_rank_avg": 0.16395555436611176, + "step": 1340, + "valid_targets_mean": 4903.5, + "valid_targets_min": 1401 }, { - "epoch": 2.089113924050633, - "grad_norm": 0.32168385547845024, - "learning_rate": 2.8971303778884244e-05, - "loss": 0.2486, + "epoch": 1.3615189873417721, + "grad_norm": 0.3076034430138629, + "learning_rate": 3.649971458744645e-05, + "loss": 0.3332, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023042596876621246, - "step": 516, - "valid_targets_mean": 2868.8, - "valid_targets_min": 1010 + "loss_rank_avg": 0.14920540153980255, + "step": 1345, + "valid_targets_mean": 4404.3, + "valid_targets_min": 1479 }, { - "epoch": 2.093164556962025, - "grad_norm": 0.3578906280639518, - "learning_rate": 2.8920722546128863e-05, - "loss": 0.2616, + "epoch": 1.3665822784810127, + "grad_norm": 0.37728920822736073, + "learning_rate": 3.645967731787313e-05, + "loss": 0.3273, "loss_nan_ranks": 0, - "loss_rank_avg": 0.046286728233098984, - "step": 517, - "valid_targets_mean": 5850.9, - "valid_targets_min": 1198 + "loss_rank_avg": 0.16919684410095215, + "step": 1350, + "valid_targets_mean": 3916.3, + "valid_targets_min": 1491 }, { - "epoch": 2.0972151898734177, - "grad_norm": 0.290499018540267, - "learning_rate": 2.887006998351372e-05, - "loss": 0.247, + "epoch": 1.3716455696202532, + "grad_norm": 0.2944205665752948, + "learning_rate": 3.641943459094026e-05, + "loss": 0.32, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021764980629086494, - "step": 518, - "valid_targets_mean": 3495.7, - "valid_targets_min": 1256 + "loss_rank_avg": 0.20011773705482483, + "step": 1355, + "valid_targets_mean": 6104.8, + "valid_targets_min": 1110 }, { - "epoch": 2.1012658227848102, - "grad_norm": 0.35911454856145447, - "learning_rate": 2.881934649605535e-05, - "loss": 0.2515, + "epoch": 1.3767088607594937, + "grad_norm": 0.31998862065193057, + "learning_rate": 3.63789869089763e-05, + "loss": 0.3281, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02156168967485428, - "step": 519, - "valid_targets_mean": 4110.6, - "valid_targets_min": 1325 + "loss_rank_avg": 0.17156195640563965, + "step": 1360, + "valid_targets_mean": 4435.9, + "valid_targets_min": 1056 }, { - "epoch": 2.1053164556962027, - "grad_norm": 0.33183904920020607, - "learning_rate": 2.8768552489337408e-05, - "loss": 0.2643, + "epoch": 1.3817721518987343, + "grad_norm": 0.3359058600132148, + "learning_rate": 3.633833477686805e-05, + "loss": 0.326, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03599020093679428, - "step": 520, - "valid_targets_mean": 4835.8, - "valid_targets_min": 1515 + "loss_rank_avg": 0.1116013303399086, + "step": 1365, + "valid_targets_mean": 2720.7, + "valid_targets_min": 1100 }, { - "epoch": 2.109367088607595, - "grad_norm": 0.32230216170063813, - "learning_rate": 2.8717688369507425e-05, - "loss": 0.2555, + "epoch": 1.3868354430379748, + "grad_norm": 0.3167283899854193, + "learning_rate": 3.629747870205438e-05, + "loss": 0.3375, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029470087960362434, - "step": 521, - "valid_targets_mean": 4240.4, - "valid_targets_min": 1175 + "loss_rank_avg": 0.15123698115348816, + "step": 1370, + "valid_targets_mean": 4809.0, + "valid_targets_min": 1288 }, { - "epoch": 2.1134177215189873, - "grad_norm": 0.34079290172519544, - "learning_rate": 2.8666754543273535e-05, - "loss": 0.2596, + "epoch": 1.3918987341772153, + "grad_norm": 0.3176638421764831, + "learning_rate": 3.625641919451982e-05, + "loss": 0.3251, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03355570137500763, - "step": 522, - "valid_targets_mean": 4991.8, - "valid_targets_min": 977 + "loss_rank_avg": 0.116270050406456, + "step": 1375, + "valid_targets_mean": 3099.4, + "valid_targets_min": 1302 }, { - "epoch": 2.11746835443038, - "grad_norm": 0.3287057012336886, - "learning_rate": 2.8615751417901273e-05, - "loss": 0.2614, + "epoch": 1.3969620253164556, + "grad_norm": 0.3567607135105218, + "learning_rate": 3.621515676678829e-05, + "loss": 0.3403, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04670153558254242, - "step": 523, - "valid_targets_mean": 6104.7, - "valid_targets_min": 1441 + "loss_rank_avg": 0.18901550769805908, + "step": 1380, + "valid_targets_mean": 4234.6, + "valid_targets_min": 1078 }, { - "epoch": 2.1215189873417724, - "grad_norm": 0.39226120428182537, - "learning_rate": 2.856467940121025e-05, - "loss": 0.2537, + "epoch": 1.4020253164556962, + "grad_norm": 0.33150001426521647, + "learning_rate": 3.617369193391663e-05, + "loss": 0.3257, "loss_nan_ranks": 0, - "loss_rank_avg": 0.031637925654649734, - "step": 524, - "valid_targets_mean": 4412.3, - "valid_targets_min": 1529 + "loss_rank_avg": 0.14285731315612793, + "step": 1385, + "valid_targets_mean": 3883.8, + "valid_targets_min": 1292 }, { - "epoch": 2.1255696202531644, - "grad_norm": 0.365395614656608, - "learning_rate": 2.8513538901570964e-05, - "loss": 0.254, + "epoch": 1.4070886075949367, + "grad_norm": 0.32040861152447253, + "learning_rate": 3.61320252134882e-05, + "loss": 0.3202, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03924763202667236, - "step": 525, - "valid_targets_mean": 5742.6, - "valid_targets_min": 1811 + "loss_rank_avg": 0.15014925599098206, + "step": 1390, + "valid_targets_mean": 4633.4, + "valid_targets_min": 1150 }, { - "epoch": 2.129620253164557, - "grad_norm": 0.3435845959967996, - "learning_rate": 2.846233032790149e-05, - "loss": 0.25, + "epoch": 1.4121518987341772, + "grad_norm": 0.3364407178912512, + "learning_rate": 3.6090157125606405e-05, + "loss": 0.3308, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028139624744653702, - "step": 526, - "valid_targets_mean": 3931.2, - "valid_targets_min": 1297 + "loss_rank_avg": 0.19701796770095825, + "step": 1395, + "valid_targets_mean": 4298.4, + "valid_targets_min": 1055 }, { - "epoch": 2.1336708860759495, - "grad_norm": 0.3397642853173752, - "learning_rate": 2.84110540896642e-05, - "loss": 0.2534, + "epoch": 1.4172151898734178, + "grad_norm": 0.2998592984370577, + "learning_rate": 3.604808819288823e-05, + "loss": 0.3213, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02817264385521412, - "step": 527, - "valid_targets_mean": 4044.0, - "valid_targets_min": 1334 + "loss_rank_avg": 0.21334891021251678, + "step": 1400, + "valid_targets_mean": 6774.6, + "valid_targets_min": 1471 }, { - "epoch": 2.137721518987342, - "grad_norm": 0.3470751268385581, - "learning_rate": 2.8359710596862555e-05, - "loss": 0.2408, + "epoch": 1.4222784810126583, + "grad_norm": 0.32266679078127786, + "learning_rate": 3.600581894045768e-05, + "loss": 0.3162, "loss_nan_ranks": 0, - "loss_rank_avg": 0.037513479590415955, - "step": 528, - "valid_targets_mean": 4566.8, - "valid_targets_min": 1166 + "loss_rank_avg": 0.10229863971471786, + "step": 1405, + "valid_targets_mean": 2818.3, + "valid_targets_min": 1002 }, { - "epoch": 2.141772151898734, - "grad_norm": 0.3078086766463351, - "learning_rate": 2.8308300260037734e-05, - "loss": 0.2419, + "epoch": 1.4273417721518986, + "grad_norm": 0.2989355644726402, + "learning_rate": 3.596334989593927e-05, + "loss": 0.3324, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03615918010473251, - "step": 529, - "valid_targets_mean": 5390.9, - "valid_targets_min": 1652 + "loss_rank_avg": 0.16216593980789185, + "step": 1410, + "valid_targets_mean": 4271.4, + "valid_targets_min": 773 }, { - "epoch": 2.1458227848101266, - "grad_norm": 0.32227431988680616, - "learning_rate": 2.8256823490265424e-05, - "loss": 0.245, + "epoch": 1.4324050632911391, + "grad_norm": 0.33686789223908364, + "learning_rate": 3.5920681589451385e-05, + "loss": 0.3177, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030684471130371094, - "step": 530, - "valid_targets_mean": 4033.6, - "valid_targets_min": 1149 + "loss_rank_avg": 0.16129520535469055, + "step": 1415, + "valid_targets_mean": 3987.1, + "valid_targets_min": 776 }, { - "epoch": 2.149873417721519, - "grad_norm": 0.3200750213307613, - "learning_rate": 2.8205280699152515e-05, - "loss": 0.2508, + "epoch": 1.4374683544303797, + "grad_norm": 0.2948991655130385, + "learning_rate": 3.58778145535997e-05, + "loss": 0.3152, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019646242260932922, - "step": 531, - "valid_targets_mean": 3203.2, - "valid_targets_min": 1081 + "loss_rank_avg": 0.14479148387908936, + "step": 1420, + "valid_targets_mean": 4191.2, + "valid_targets_min": 1049 }, { - "epoch": 2.1539240506329116, - "grad_norm": 0.30413373085344425, - "learning_rate": 2.8153672298833777e-05, - "loss": 0.2605, + "epoch": 1.4425316455696202, + "grad_norm": 0.278153706236472, + "learning_rate": 3.583474932347054e-05, + "loss": 0.3194, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0323205292224884, - "step": 532, - "valid_targets_mean": 4365.4, - "valid_targets_min": 1320 + "loss_rank_avg": 0.15073184669017792, + "step": 1425, + "valid_targets_mean": 5600.0, + "valid_targets_min": 978 }, { - "epoch": 2.1579746835443037, - "grad_norm": 0.3227746772569516, - "learning_rate": 2.8101998701968615e-05, - "loss": 0.254, + "epoch": 1.4475949367088607, + "grad_norm": 0.2989703152632012, + "learning_rate": 3.5791486436624145e-05, + "loss": 0.322, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02868857979774475, - "step": 533, - "valid_targets_mean": 4597.7, - "valid_targets_min": 974 + "loss_rank_avg": 0.1581433117389679, + "step": 1430, + "valid_targets_mean": 4557.4, + "valid_targets_min": 1311 }, { - "epoch": 2.162025316455696, - "grad_norm": 0.33095219812272253, - "learning_rate": 2.805026032173773e-05, - "loss": 0.2559, + "epoch": 1.4526582278481013, + "grad_norm": 0.3156763543957603, + "learning_rate": 3.5748026433088036e-05, + "loss": 0.3234, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03351202979683876, - "step": 534, - "valid_targets_mean": 4203.1, - "valid_targets_min": 1186 + "loss_rank_avg": 0.1554793417453766, + "step": 1435, + "valid_targets_mean": 5105.0, + "valid_targets_min": 1086 }, { - "epoch": 2.1660759493670887, - "grad_norm": 0.31803856832646626, - "learning_rate": 2.7998457571839832e-05, - "loss": 0.2571, + "epoch": 1.4577215189873418, + "grad_norm": 0.30549649598454104, + "learning_rate": 3.5704369855350226e-05, + "loss": 0.3285, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028351303189992905, - "step": 535, - "valid_targets_mean": 4174.6, - "valid_targets_min": 961 + "loss_rank_avg": 0.10838840901851654, + "step": 1440, + "valid_targets_mean": 2855.1, + "valid_targets_min": 1382 }, { - "epoch": 2.170126582278481, - "grad_norm": 0.3098804337345109, - "learning_rate": 2.7946590866488322e-05, - "loss": 0.2502, + "epoch": 1.4627848101265823, + "grad_norm": 0.37394597619150627, + "learning_rate": 3.566051724835245e-05, + "loss": 0.3409, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025098508223891258, - "step": 536, - "valid_targets_mean": 3296.6, - "valid_targets_min": 1194 + "loss_rank_avg": 0.13602185249328613, + "step": 1445, + "valid_targets_mean": 3419.4, + "valid_targets_min": 1146 }, { - "epoch": 2.1741772151898733, - "grad_norm": 0.30957451812750464, - "learning_rate": 2.7894660620408e-05, - "loss": 0.2583, + "epoch": 1.4678481012658229, + "grad_norm": 0.3156375413907948, + "learning_rate": 3.5616469159483363e-05, + "loss": 0.3224, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023833472281694412, - "step": 537, - "valid_targets_mean": 3825.5, - "valid_targets_min": 1270 + "loss_rank_avg": 0.14657479524612427, + "step": 1450, + "valid_targets_mean": 4287.6, + "valid_targets_min": 1162 }, { - "epoch": 2.178227848101266, - "grad_norm": 0.32285871900224744, - "learning_rate": 2.784266724883173e-05, - "loss": 0.2546, + "epoch": 1.4729113924050634, + "grad_norm": 0.32282455141307365, + "learning_rate": 3.5572226138571753e-05, + "loss": 0.3428, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03730406612157822, - "step": 538, - "valid_targets_mean": 5946.6, - "valid_targets_min": 917 + "loss_rank_avg": 0.21434099972248077, + "step": 1455, + "valid_targets_mean": 6099.5, + "valid_targets_min": 1650 }, { - "epoch": 2.1822784810126583, - "grad_norm": 0.30315455729748625, - "learning_rate": 2.7790611167497128e-05, - "loss": 0.2551, + "epoch": 1.4779746835443037, + "grad_norm": 0.3934165636040704, + "learning_rate": 3.5527788737879595e-05, + "loss": 0.3293, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027421358972787857, - "step": 539, - "valid_targets_mean": 4410.9, - "valid_targets_min": 1703 + "loss_rank_avg": 0.13554619252681732, + "step": 1460, + "valid_targets_mean": 3898.6, + "valid_targets_min": 749 }, { - "epoch": 2.186329113924051, - "grad_norm": 0.3101908957957333, - "learning_rate": 2.7738492792643224e-05, - "loss": 0.2522, + "epoch": 1.4830379746835443, + "grad_norm": 0.3532333332147838, + "learning_rate": 3.548315751209524e-05, + "loss": 0.3211, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03136130049824715, - "step": 540, - "valid_targets_mean": 3916.8, - "valid_targets_min": 1137 + "loss_rank_avg": 0.1423749029636383, + "step": 1465, + "valid_targets_mean": 3396.2, + "valid_targets_min": 1245 }, { - "epoch": 2.190379746835443, - "grad_norm": 0.33793475672526513, - "learning_rate": 2.768631254100716e-05, - "loss": 0.2554, + "epoch": 1.4881012658227848, + "grad_norm": 0.3046035862379382, + "learning_rate": 3.543833301832642e-05, + "loss": 0.3092, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03737739101052284, - "step": 541, - "valid_targets_mean": 4827.4, - "valid_targets_min": 1447 + "loss_rank_avg": 0.11005077511072159, + "step": 1470, + "valid_targets_mean": 3165.8, + "valid_targets_min": 1018 }, { - "epoch": 2.1944303797468354, - "grad_norm": 0.3135772042063124, - "learning_rate": 2.7634070829820836e-05, - "loss": 0.2486, + "epoch": 1.4931645569620253, + "grad_norm": 0.2934515113132098, + "learning_rate": 3.539331581609337e-05, + "loss": 0.3228, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025419535115361214, - "step": 542, - "valid_targets_mean": 3488.4, - "valid_targets_min": 1217 + "loss_rank_avg": 0.16373209655284882, + "step": 1475, + "valid_targets_mean": 5422.3, + "valid_targets_min": 938 }, { - "epoch": 2.198481012658228, - "grad_norm": 0.3204768128524193, - "learning_rate": 2.7581768076807592e-05, - "loss": 0.2373, + "epoch": 1.4982278481012659, + "grad_norm": 0.29330851366159866, + "learning_rate": 3.5348106467321756e-05, + "loss": 0.3204, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014784103259444237, - "step": 543, - "valid_targets_mean": 2494.2, - "valid_targets_min": 991 + "loss_rank_avg": 0.1334601640701294, + "step": 1480, + "valid_targets_mean": 4623.2, + "valid_targets_min": 1060 }, { - "epoch": 2.2025316455696204, - "grad_norm": 0.33193823700912034, - "learning_rate": 2.7529404700178832e-05, - "loss": 0.2448, + "epoch": 1.5032911392405062, + "grad_norm": 0.3494149115756711, + "learning_rate": 3.530270553633574e-05, + "loss": 0.3347, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03723284602165222, - "step": 544, - "valid_targets_mean": 4625.8, - "valid_targets_min": 1372 + "loss_rank_avg": 0.14463084936141968, + "step": 1485, + "valid_targets_mean": 3681.4, + "valid_targets_min": 1175 }, { - "epoch": 2.2065822784810125, - "grad_norm": 0.32807630280677497, - "learning_rate": 2.7476981118630735e-05, - "loss": 0.2586, + "epoch": 1.5083544303797467, + "grad_norm": 0.34094476372572885, + "learning_rate": 3.5257113589850895e-05, + "loss": 0.3283, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02511894702911377, - "step": 545, - "valid_targets_mean": 3786.2, - "valid_targets_min": 1642 + "loss_rank_avg": 0.13542452454566956, + "step": 1490, + "valid_targets_mean": 3142.8, + "valid_targets_min": 1338 }, { - "epoch": 2.210632911392405, - "grad_norm": 0.31380484555706084, - "learning_rate": 2.742449775134085e-05, - "loss": 0.2536, + "epoch": 1.5134177215189872, + "grad_norm": 0.33689606267312805, + "learning_rate": 3.521133119696712e-05, + "loss": 0.3264, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01563825085759163, - "step": 546, - "valid_targets_mean": 2965.8, - "valid_targets_min": 1310 + "loss_rank_avg": 0.16328886151313782, + "step": 1495, + "valid_targets_mean": 4708.0, + "valid_targets_min": 1214 }, { - "epoch": 2.2146835443037975, - "grad_norm": 0.32866936967105825, - "learning_rate": 2.7371955017964788e-05, - "loss": 0.2601, + "epoch": 1.5184810126582278, + "grad_norm": 0.8587040130178458, + "learning_rate": 3.516535892916159e-05, + "loss": 0.3199, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024734562262892723, - "step": 547, - "valid_targets_mean": 3240.2, - "valid_targets_min": 1380 + "loss_rank_avg": 0.166406512260437, + "step": 1500, + "valid_targets_mean": 4267.1, + "valid_targets_min": 1066 }, { - "epoch": 2.21873417721519, - "grad_norm": 0.3521184862713378, - "learning_rate": 2.7319353338632845e-05, - "loss": 0.2459, + "epoch": 1.5235443037974683, + "grad_norm": 0.31747165527732263, + "learning_rate": 3.5119197360281553e-05, + "loss": 0.332, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02412096969783306, - "step": 548, - "valid_targets_mean": 3382.1, - "valid_targets_min": 1026 + "loss_rank_avg": 0.1378452181816101, + "step": 1505, + "valid_targets_mean": 3656.2, + "valid_targets_min": 1064 }, { - "epoch": 2.222784810126582, - "grad_norm": 0.3221306823431023, - "learning_rate": 2.7266693133946645e-05, - "loss": 0.2419, + "epoch": 1.5286075949367088, + "grad_norm": 0.3521429432867297, + "learning_rate": 3.507284706653722e-05, + "loss": 0.3369, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020261388272047043, - "step": 549, - "valid_targets_mean": 3593.0, - "valid_targets_min": 1223 + "loss_rank_avg": 0.22082693874835968, + "step": 1510, + "valid_targets_mean": 4618.8, + "valid_targets_min": 1392 }, { - "epoch": 2.2268354430379746, - "grad_norm": 0.3368087467122894, - "learning_rate": 2.7213974824975785e-05, - "loss": 0.2625, + "epoch": 1.5336708860759494, + "grad_norm": 0.3319407091362445, + "learning_rate": 3.5026308626494545e-05, + "loss": 0.3134, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04048336297273636, - "step": 550, - "valid_targets_mean": 6000.6, - "valid_targets_min": 1353 + "loss_rank_avg": 0.18031668663024902, + "step": 1515, + "valid_targets_mean": 3750.2, + "valid_targets_min": 977 }, { - "epoch": 2.230886075949367, - "grad_norm": 0.2954368338935399, - "learning_rate": 2.7161198833254464e-05, - "loss": 0.255, + "epoch": 1.53873417721519, + "grad_norm": 0.33498834727615584, + "learning_rate": 3.497958262106801e-05, + "loss": 0.3205, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024658389389514923, - "step": 551, - "valid_targets_mean": 3828.2, - "valid_targets_min": 1487 + "loss_rank_avg": 0.1643165647983551, + "step": 1520, + "valid_targets_mean": 3328.2, + "valid_targets_min": 1474 }, { - "epoch": 2.2349367088607597, - "grad_norm": 0.3349581200749901, - "learning_rate": 2.7108365580778095e-05, - "loss": 0.2616, + "epoch": 1.5437974683544304, + "grad_norm": 0.3537186281988192, + "learning_rate": 3.493266963351339e-05, + "loss": 0.3115, "loss_nan_ranks": 0, - "loss_rank_avg": 0.044930506497621536, - "step": 552, - "valid_targets_mean": 5943.4, - "valid_targets_min": 1021 + "loss_rank_avg": 0.1584773063659668, + "step": 1525, + "valid_targets_mean": 3274.9, + "valid_targets_min": 1245 }, { - "epoch": 2.2389873417721518, - "grad_norm": 0.32044780080787033, - "learning_rate": 2.705547548999996e-05, - "loss": 0.2682, + "epoch": 1.548860759493671, + "grad_norm": 0.30657928098680826, + "learning_rate": 3.4885570249420454e-05, + "loss": 0.3272, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0389753133058548, - "step": 553, - "valid_targets_mean": 4595.6, - "valid_targets_min": 1392 + "loss_rank_avg": 0.1703530102968216, + "step": 1530, + "valid_targets_mean": 5461.6, + "valid_targets_min": 1154 }, { - "epoch": 2.2430379746835443, - "grad_norm": 0.31328225966606027, - "learning_rate": 2.7002528983827817e-05, - "loss": 0.2525, + "epoch": 1.5539240506329115, + "grad_norm": 0.338680368171272, + "learning_rate": 3.483828505670563e-05, + "loss": 0.3255, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0390719473361969, - "step": 554, - "valid_targets_mean": 5455.2, - "valid_targets_min": 1115 + "loss_rank_avg": 0.15479744970798492, + "step": 1535, + "valid_targets_mean": 4522.8, + "valid_targets_min": 1326 }, { - "epoch": 2.247088607594937, - "grad_norm": 0.3350003475019475, - "learning_rate": 2.6949526485620517e-05, - "loss": 0.2556, + "epoch": 1.558987341772152, + "grad_norm": 0.369309200288726, + "learning_rate": 3.479081464560475e-05, + "loss": 0.3191, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0258515402674675, - "step": 555, - "valid_targets_mean": 3440.7, - "valid_targets_min": 1175 + "loss_rank_avg": 0.12721368670463562, + "step": 1540, + "valid_targets_mean": 2568.1, + "valid_targets_min": 1187 }, { - "epoch": 2.2511392405063293, - "grad_norm": 0.29418896974560066, - "learning_rate": 2.6896468419184623e-05, - "loss": 0.2494, + "epoch": 1.5640506329113926, + "grad_norm": 0.3519884099657949, + "learning_rate": 3.474315960866558e-05, + "loss": 0.326, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04345124214887619, - "step": 556, - "valid_targets_mean": 6255.0, - "valid_targets_min": 1503 + "loss_rank_avg": 0.21022453904151917, + "step": 1545, + "valid_targets_mean": 5003.4, + "valid_targets_min": 1333 }, { - "epoch": 2.2551898734177214, - "grad_norm": 0.31284133074228326, - "learning_rate": 2.6843355208771024e-05, - "loss": 0.2651, + "epoch": 1.5691139240506329, + "grad_norm": 0.33365501094495986, + "learning_rate": 3.469532054074049e-05, + "loss": 0.3275, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0307228434830904, - "step": 557, - "valid_targets_mean": 4361.1, - "valid_targets_min": 1051 + "loss_rank_avg": 0.11454639583826065, + "step": 1550, + "valid_targets_mean": 3473.1, + "valid_targets_min": 1181 }, { - "epoch": 2.259240506329114, - "grad_norm": 0.29849006850574383, - "learning_rate": 2.6790187279071536e-05, - "loss": 0.2478, + "epoch": 1.5741772151898734, + "grad_norm": 0.3487036569312814, + "learning_rate": 3.464729803897902e-05, + "loss": 0.3207, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03746002912521362, - "step": 558, - "valid_targets_mean": 5214.0, - "valid_targets_min": 1284 + "loss_rank_avg": 0.0789409726858139, + "step": 1555, + "valid_targets_mean": 2519.6, + "valid_targets_min": 1102 }, { - "epoch": 2.2632911392405064, - "grad_norm": 0.3481923927123726, - "learning_rate": 2.67369650552155e-05, - "loss": 0.2453, + "epoch": 1.579240506329114, + "grad_norm": 0.33603816281790033, + "learning_rate": 3.4599092702820394e-05, + "loss": 0.3031, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026629457250237465, - "step": 559, - "valid_targets_mean": 3566.5, - "valid_targets_min": 1076 + "loss_rank_avg": 0.11025889217853546, + "step": 1560, + "valid_targets_mean": 2999.4, + "valid_targets_min": 1318 }, { - "epoch": 2.267341772151899, - "grad_norm": 0.3109132395106645, - "learning_rate": 2.6683688962766407e-05, - "loss": 0.2471, + "epoch": 1.5843037974683545, + "grad_norm": 0.33746214929234886, + "learning_rate": 3.4550705133986085e-05, + "loss": 0.3297, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03283403813838959, - "step": 560, - "valid_targets_mean": 4456.2, - "valid_targets_min": 1371 + "loss_rank_avg": 0.1540479212999344, + "step": 1565, + "valid_targets_mean": 3550.0, + "valid_targets_min": 1130 }, { - "epoch": 2.271392405063291, - "grad_norm": 0.3371083146820319, - "learning_rate": 2.6630359427718468e-05, - "loss": 0.2573, + "epoch": 1.5893670886075948, + "grad_norm": 0.3700007824527567, + "learning_rate": 3.450213593647226e-05, + "loss": 0.3235, "loss_nan_ranks": 0, - "loss_rank_avg": 0.033645160496234894, - "step": 561, - "valid_targets_mean": 4629.2, - "valid_targets_min": 1251 + "loss_rank_avg": 0.179176926612854, + "step": 1570, + "valid_targets_mean": 3797.9, + "valid_targets_min": 1468 }, { - "epoch": 2.2754430379746835, - "grad_norm": 0.3004647288482063, - "learning_rate": 2.6576976876493225e-05, - "loss": 0.2519, + "epoch": 1.5944303797468353, + "grad_norm": 0.43557784622964635, + "learning_rate": 3.445338571654227e-05, + "loss": 0.3245, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03325767070055008, - "step": 562, - "valid_targets_mean": 4771.6, - "valid_targets_min": 1227 + "loss_rank_avg": 0.12111867964267731, + "step": 1575, + "valid_targets_mean": 3459.7, + "valid_targets_min": 1321 }, { - "epoch": 2.279493670886076, - "grad_norm": 0.3084492314730116, - "learning_rate": 2.652354173593614e-05, - "loss": 0.2545, + "epoch": 1.5994936708860759, + "grad_norm": 0.3234005911683535, + "learning_rate": 3.440445508271907e-05, + "loss": 0.3303, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03239452838897705, - "step": 563, - "valid_targets_mean": 4820.6, - "valid_targets_min": 1212 + "loss_rank_avg": 0.11545059829950333, + "step": 1580, + "valid_targets_mean": 3151.1, + "valid_targets_min": 966 }, { - "epoch": 2.2835443037974685, - "grad_norm": 0.29720381711643024, - "learning_rate": 2.647005443331317e-05, - "loss": 0.2509, + "epoch": 1.6045569620253164, + "grad_norm": 0.31481487050082757, + "learning_rate": 3.435534464577762e-05, + "loss": 0.3185, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04225707799196243, - "step": 564, - "valid_targets_mean": 6034.4, - "valid_targets_min": 1778 + "loss_rank_avg": 0.14349615573883057, + "step": 1585, + "valid_targets_mean": 4098.8, + "valid_targets_min": 1553 }, { - "epoch": 2.2875949367088606, - "grad_norm": 0.32464412279319016, - "learning_rate": 2.6416515396307354e-05, - "loss": 0.2561, + "epoch": 1.609620253164557, + "grad_norm": 0.3263020753319533, + "learning_rate": 3.4306055018737274e-05, + "loss": 0.3262, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03437012434005737, - "step": 565, - "valid_targets_mean": 5410.6, - "valid_targets_min": 1184 + "loss_rank_avg": 0.1886909306049347, + "step": 1590, + "valid_targets_mean": 5531.9, + "valid_targets_min": 1290 }, { - "epoch": 2.291645569620253, - "grad_norm": 0.28582335898985517, - "learning_rate": 2.636292505301542e-05, - "loss": 0.2428, + "epoch": 1.6146835443037975, + "grad_norm": 0.3440773642804762, + "learning_rate": 3.425658681685412e-05, + "loss": 0.3247, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04065122827887535, - "step": 566, - "valid_targets_mean": 5343.1, - "valid_targets_min": 967 + "loss_rank_avg": 0.17707081139087677, + "step": 1595, + "valid_targets_mean": 3534.2, + "valid_targets_min": 1301 }, { - "epoch": 2.2956962025316456, - "grad_norm": 0.3090492224591957, - "learning_rate": 2.630928383194431e-05, - "loss": 0.2616, + "epoch": 1.619746835443038, + "grad_norm": 0.32440177535383535, + "learning_rate": 3.420694065761328e-05, + "loss": 0.3282, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0343312993645668, - "step": 567, - "valid_targets_mean": 4817.2, - "valid_targets_min": 1044 + "loss_rank_avg": 0.16896334290504456, + "step": 1600, + "valid_targets_mean": 4122.1, + "valid_targets_min": 965 }, { - "epoch": 2.299746835443038, - "grad_norm": 0.30265240010177236, - "learning_rate": 2.625559216200781e-05, - "loss": 0.2431, + "epoch": 1.6248101265822785, + "grad_norm": 0.2928837040048521, + "learning_rate": 3.415711716072126e-05, + "loss": 0.3318, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04373444989323616, - "step": 568, - "valid_targets_mean": 4855.6, - "valid_targets_min": 1664 + "loss_rank_avg": 0.17395883798599243, + "step": 1605, + "valid_targets_mean": 5510.7, + "valid_targets_min": 1836 }, { - "epoch": 2.3037974683544302, - "grad_norm": 0.33054956711731676, - "learning_rate": 2.620185047252308e-05, - "loss": 0.2492, + "epoch": 1.629873417721519, + "grad_norm": 0.29996111358110616, + "learning_rate": 3.4107116948098155e-05, + "loss": 0.3208, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028334802016615868, - "step": 569, - "valid_targets_mean": 3796.7, - "valid_targets_min": 1634 + "loss_rank_avg": 0.16087687015533447, + "step": 1610, + "valid_targets_mean": 5291.1, + "valid_targets_min": 812 }, { - "epoch": 2.3078481012658227, - "grad_norm": 0.2893427649819799, - "learning_rate": 2.614805919320723e-05, - "loss": 0.2513, + "epoch": 1.6349367088607596, + "grad_norm": 0.3391636106718898, + "learning_rate": 3.4056940643869905e-05, + "loss": 0.328, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03859302029013634, - "step": 570, - "valid_targets_mean": 5215.1, - "valid_targets_min": 1381 + "loss_rank_avg": 0.12102165073156357, + "step": 1615, + "valid_targets_mean": 2991.2, + "valid_targets_min": 987 }, { - "epoch": 2.3118987341772153, - "grad_norm": 0.3194258038200415, - "learning_rate": 2.6094218754173895e-05, - "loss": 0.2371, + "epoch": 1.6400000000000001, + "grad_norm": 0.30250385799846535, + "learning_rate": 3.400658887436051e-05, + "loss": 0.3274, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030152734369039536, - "step": 571, - "valid_targets_mean": 4297.3, - "valid_targets_min": 1471 + "loss_rank_avg": 0.15453828871250153, + "step": 1620, + "valid_targets_mean": 4200.3, + "valid_targets_min": 1171 }, { - "epoch": 2.3159493670886078, - "grad_norm": 0.280776302266346, - "learning_rate": 2.60403295859298e-05, - "loss": 0.2615, + "epoch": 1.6450632911392407, + "grad_norm": 0.31602413877978186, + "learning_rate": 3.3956062268084215e-05, + "loss": 0.3167, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026337942108511925, - "step": 572, - "valid_targets_mean": 3656.6, - "valid_targets_min": 1274 + "loss_rank_avg": 0.1669228971004486, + "step": 1625, + "valid_targets_mean": 4059.0, + "valid_targets_min": 1494 }, { - "epoch": 2.32, - "grad_norm": 0.33440416375508986, - "learning_rate": 2.5986392119371297e-05, - "loss": 0.2513, + "epoch": 1.650126582278481, + "grad_norm": 0.3430073741627476, + "learning_rate": 3.390536145573766e-05, + "loss": 0.3311, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02118188515305519, - "step": 573, - "valid_targets_mean": 3200.6, - "valid_targets_min": 941 + "loss_rank_avg": 0.15472730994224548, + "step": 1630, + "valid_targets_mean": 4020.9, + "valid_targets_min": 1121 }, { - "epoch": 2.3240506329113924, - "grad_norm": 0.31817968317557005, - "learning_rate": 2.5932406785780928e-05, - "loss": 0.262, + "epoch": 1.6551898734177215, + "grad_norm": 0.30844530157775457, + "learning_rate": 3.385448707019199e-05, + "loss": 0.3353, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03111189603805542, - "step": 574, - "valid_targets_mean": 4663.5, - "valid_targets_min": 1166 + "loss_rank_avg": 0.13865000009536743, + "step": 1635, + "valid_targets_mean": 3493.1, + "valid_targets_min": 1516 }, { - "epoch": 2.328101265822785, - "grad_norm": 0.3240466981779466, - "learning_rate": 2.5878374016823987e-05, - "loss": 0.2563, + "epoch": 1.660253164556962, + "grad_norm": 0.29309820401088355, + "learning_rate": 3.3803439746484996e-05, + "loss": 0.3371, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03127945959568024, - "step": 575, - "valid_targets_mean": 3908.9, - "valid_targets_min": 1131 + "loss_rank_avg": 0.18344934284687042, + "step": 1640, + "valid_targets_mean": 5938.8, + "valid_targets_min": 1370 }, { - "epoch": 2.3321518987341774, - "grad_norm": 0.2814225317214882, - "learning_rate": 2.5824294244545045e-05, - "loss": 0.2551, + "epoch": 1.6653164556962026, + "grad_norm": 0.2917099753276015, + "learning_rate": 3.375222012181315e-05, + "loss": 0.3324, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03601471334695816, - "step": 576, - "valid_targets_mean": 5604.6, - "valid_targets_min": 1460 + "loss_rank_avg": 0.15374572575092316, + "step": 1645, + "valid_targets_mean": 4424.8, + "valid_targets_min": 1247 }, { - "epoch": 2.3362025316455695, - "grad_norm": 0.2925319005446987, - "learning_rate": 2.577016790136453e-05, - "loss": 0.2413, + "epoch": 1.6703797468354429, + "grad_norm": 0.31174326634371047, + "learning_rate": 3.370082883552366e-05, + "loss": 0.3248, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02899722382426262, - "step": 577, - "valid_targets_mean": 4115.4, - "valid_targets_min": 1293 + "loss_rank_avg": 0.18676675856113434, + "step": 1650, + "valid_targets_mean": 4577.7, + "valid_targets_min": 1153 }, { - "epoch": 2.340253164556962, - "grad_norm": 0.299581537856646, - "learning_rate": 2.5715995420075225e-05, - "loss": 0.2561, + "epoch": 1.6754430379746834, + "grad_norm": 0.2937468748877549, + "learning_rate": 3.364926652910651e-05, + "loss": 0.328, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03020840510725975, - "step": 578, - "valid_targets_mean": 3873.1, - "valid_targets_min": 981 + "loss_rank_avg": 0.14714813232421875, + "step": 1655, + "valid_targets_mean": 4791.8, + "valid_targets_min": 1213 }, { - "epoch": 2.3443037974683545, - "grad_norm": 0.29720958694672484, - "learning_rate": 2.5661777233838864e-05, - "loss": 0.2575, + "epoch": 1.680506329113924, + "grad_norm": 0.2879601888076102, + "learning_rate": 3.359753384618641e-05, + "loss": 0.3339, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03779207915067673, - "step": 579, - "valid_targets_mean": 6122.3, - "valid_targets_min": 1270 + "loss_rank_avg": 0.19825273752212524, + "step": 1660, + "valid_targets_mean": 6597.8, + "valid_targets_min": 1694 }, { - "epoch": 2.348354430379747, - "grad_norm": 0.3339333851319194, - "learning_rate": 2.5607513776182616e-05, - "loss": 0.263, + "epoch": 1.6855696202531645, + "grad_norm": 0.28734174588103684, + "learning_rate": 3.354563143251483e-05, + "loss": 0.3251, "loss_nan_ranks": 0, - "loss_rank_avg": 0.033767811954021454, - "step": 580, - "valid_targets_mean": 3967.4, - "valid_targets_min": 1203 + "loss_rank_avg": 0.13350191712379456, + "step": 1665, + "valid_targets_mean": 4193.6, + "valid_targets_min": 1311 }, { - "epoch": 2.352405063291139, - "grad_norm": 0.29003044027962066, - "learning_rate": 2.5553205480995633e-05, - "loss": 0.2498, + "epoch": 1.690632911392405, + "grad_norm": 0.3163624678749021, + "learning_rate": 3.3493559935961854e-05, + "loss": 0.3332, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0322946161031723, - "step": 581, - "valid_targets_mean": 4815.2, - "valid_targets_min": 1555 + "loss_rank_avg": 0.19836895167827606, + "step": 1670, + "valid_targets_mean": 4911.9, + "valid_targets_min": 1235 }, { - "epoch": 2.3564556962025316, - "grad_norm": 0.3183024209535512, - "learning_rate": 2.5498852782525604e-05, - "loss": 0.2516, + "epoch": 1.6956962025316455, + "grad_norm": 0.26723388677186943, + "learning_rate": 3.344132000650817e-05, + "loss": 0.3229, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030187206342816353, - "step": 582, - "valid_targets_mean": 4114.4, - "valid_targets_min": 1224 + "loss_rank_avg": 0.14954085648059845, + "step": 1675, + "valid_targets_mean": 5300.8, + "valid_targets_min": 1208 }, { - "epoch": 2.360506329113924, - "grad_norm": 0.314168755966548, - "learning_rate": 2.5444456115375252e-05, - "loss": 0.2633, + "epoch": 1.700759493670886, + "grad_norm": 0.3367011427653921, + "learning_rate": 3.3388912296236906e-05, + "loss": 0.3369, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028827618807554245, - "step": 583, - "valid_targets_mean": 4781.1, - "valid_targets_min": 1182 + "loss_rank_avg": 0.15727460384368896, + "step": 1680, + "valid_targets_mean": 3988.7, + "valid_targets_min": 1264 }, { - "epoch": 2.3645569620253166, - "grad_norm": 0.3343463864058334, - "learning_rate": 2.539001591449889e-05, - "loss": 0.2627, + "epoch": 1.7058227848101266, + "grad_norm": 0.46226016366837236, + "learning_rate": 3.3336337459325526e-05, + "loss": 0.3267, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04334329441189766, - "step": 584, - "valid_targets_mean": 5263.4, - "valid_targets_min": 1096 + "loss_rank_avg": 0.18564224243164062, + "step": 1685, + "valid_targets_mean": 6143.1, + "valid_targets_min": 1246 }, { - "epoch": 2.3686075949367087, - "grad_norm": 0.3110837175415601, - "learning_rate": 2.53355326151989e-05, - "loss": 0.2553, + "epoch": 1.7108860759493671, + "grad_norm": 0.3175894026472926, + "learning_rate": 3.328359615203762e-05, + "loss": 0.3183, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021545108407735825, - "step": 585, - "valid_targets_mean": 3364.5, - "valid_targets_min": 1045 + "loss_rank_avg": 0.19139517843723297, + "step": 1690, + "valid_targets_mean": 5133.2, + "valid_targets_min": 1080 }, { - "epoch": 2.372658227848101, - "grad_norm": 0.32372691675059556, - "learning_rate": 2.5281006653122288e-05, - "loss": 0.2764, + "epoch": 1.7159493670886077, + "grad_norm": 0.27972490266827565, + "learning_rate": 3.323068903271476e-05, + "loss": 0.3344, "loss_nan_ranks": 0, - "loss_rank_avg": 0.05526130646467209, - "step": 586, - "valid_targets_mean": 6967.8, - "valid_targets_min": 1101 + "loss_rank_avg": 0.11912492662668228, + "step": 1695, + "valid_targets_mean": 4236.9, + "valid_targets_min": 1213 }, { - "epoch": 2.3767088607594937, - "grad_norm": 0.3012800068851953, - "learning_rate": 2.5226438464257217e-05, - "loss": 0.2638, + "epoch": 1.7210126582278482, + "grad_norm": 0.28905133463909916, + "learning_rate": 3.317761676176825e-05, + "loss": 0.326, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030668795108795166, - "step": 587, - "valid_targets_mean": 4461.7, - "valid_targets_min": 946 + "loss_rank_avg": 0.1582462191581726, + "step": 1700, + "valid_targets_mean": 5365.8, + "valid_targets_min": 1347 }, { - "epoch": 2.3807594936708862, - "grad_norm": 0.3124888285643379, - "learning_rate": 2.5171828484929447e-05, - "loss": 0.2603, + "epoch": 1.7260759493670887, + "grad_norm": 0.2928951231194897, + "learning_rate": 3.31243800016709e-05, + "loss": 0.3223, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03976716846227646, - "step": 588, - "valid_targets_mean": 4932.5, - "valid_targets_min": 1154 + "loss_rank_avg": 0.16399666666984558, + "step": 1705, + "valid_targets_mean": 4478.1, + "valid_targets_min": 913 }, { - "epoch": 2.3848101265822783, - "grad_norm": 0.2691816871691649, - "learning_rate": 2.5117177151798934e-05, - "loss": 0.2558, + "epoch": 1.7311392405063293, + "grad_norm": 0.3067909993527847, + "learning_rate": 3.307097941694876e-05, + "loss": 0.3203, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03441830724477768, - "step": 589, - "valid_targets_mean": 5091.1, - "valid_targets_min": 1419 + "loss_rank_avg": 0.1793975532054901, + "step": 1710, + "valid_targets_mean": 4923.8, + "valid_targets_min": 1793 }, { - "epoch": 2.388860759493671, - "grad_norm": 0.3215830136868613, - "learning_rate": 2.506248490185627e-05, - "loss": 0.2631, + "epoch": 1.7362025316455696, + "grad_norm": 0.29679310025747774, + "learning_rate": 3.3017415674172795e-05, + "loss": 0.3205, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030570250004529953, - "step": 590, - "valid_targets_mean": 3887.0, - "valid_targets_min": 1148 + "loss_rank_avg": 0.1398848444223404, + "step": 1715, + "valid_targets_mean": 3729.8, + "valid_targets_min": 1281 }, { - "epoch": 2.3929113924050633, - "grad_norm": 0.31485583061901584, - "learning_rate": 2.500775217241923e-05, - "loss": 0.261, + "epoch": 1.7412658227848101, + "grad_norm": 0.33157110353633035, + "learning_rate": 3.296368944195059e-05, + "loss": 0.3258, "loss_nan_ranks": 0, - "loss_rank_avg": 0.038104347884655, - "step": 591, - "valid_targets_mean": 6018.2, - "valid_targets_min": 1546 + "loss_rank_avg": 0.16532516479492188, + "step": 1720, + "valid_targets_mean": 4614.5, + "valid_targets_min": 1146 }, { - "epoch": 2.396962025316456, - "grad_norm": 0.3080284475460948, - "learning_rate": 2.4952979401129266e-05, - "loss": 0.2537, + "epoch": 1.7463291139240507, + "grad_norm": 0.3237383031466305, + "learning_rate": 3.2909801390918e-05, + "loss": 0.3165, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02784135192632675, - "step": 592, - "valid_targets_mean": 3782.8, - "valid_targets_min": 861 + "loss_rank_avg": 0.16484834253787994, + "step": 1725, + "valid_targets_mean": 4277.1, + "valid_targets_min": 1022 }, { - "epoch": 2.401012658227848, - "grad_norm": 0.304473757906764, - "learning_rate": 2.4898167025948e-05, - "loss": 0.257, + "epoch": 1.7513924050632912, + "grad_norm": 0.30303998742275196, + "learning_rate": 3.285575219373079e-05, + "loss": 0.3068, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025928225368261337, - "step": 593, - "valid_targets_mean": 3888.6, - "valid_targets_min": 1261 + "loss_rank_avg": 0.1231764703989029, + "step": 1730, + "valid_targets_mean": 3492.9, + "valid_targets_min": 908 }, { - "epoch": 2.4050632911392404, - "grad_norm": 0.3231693169855003, - "learning_rate": 2.4843315485153703e-05, - "loss": 0.2577, + "epoch": 1.7564556962025315, + "grad_norm": 0.3131204074959935, + "learning_rate": 3.2801542525056204e-05, + "loss": 0.3236, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029293615370988846, - "step": 594, - "valid_targets_mean": 4102.2, - "valid_targets_min": 1643 + "loss_rank_avg": 0.16018734872341156, + "step": 1735, + "valid_targets_mean": 4216.1, + "valid_targets_min": 909 }, { - "epoch": 2.409113924050633, - "grad_norm": 0.30696065623367774, - "learning_rate": 2.4788425217337856e-05, - "loss": 0.2618, + "epoch": 1.761518987341772, + "grad_norm": 0.5226006047690179, + "learning_rate": 3.27471730615646e-05, + "loss": 0.3168, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03263610601425171, - "step": 595, - "valid_targets_mean": 3837.7, - "valid_targets_min": 1478 + "loss_rank_avg": 0.13336879014968872, + "step": 1740, + "valid_targets_mean": 3926.2, + "valid_targets_min": 1825 }, { - "epoch": 2.4131645569620255, - "grad_norm": 0.2760337527277468, - "learning_rate": 2.4733496661401572e-05, - "loss": 0.2501, + "epoch": 1.7665822784810126, + "grad_norm": 0.288468699839479, + "learning_rate": 3.2692644481920926e-05, + "loss": 0.3091, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04267909377813339, - "step": 596, - "valid_targets_mean": 6636.6, - "valid_targets_min": 1240 + "loss_rank_avg": 0.1637478768825531, + "step": 1745, + "valid_targets_mean": 5375.8, + "valid_targets_min": 1193 }, { - "epoch": 2.4172151898734175, - "grad_norm": 0.30165615231190473, - "learning_rate": 2.4678530256552114e-05, - "loss": 0.2604, + "epoch": 1.771645569620253, + "grad_norm": 0.3345448593431147, + "learning_rate": 3.263795746677633e-05, + "loss": 0.3169, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021362274885177612, - "step": 597, - "valid_targets_mean": 3170.9, - "valid_targets_min": 737 + "loss_rank_avg": 0.1924804151058197, + "step": 1750, + "valid_targets_mean": 4262.6, + "valid_targets_min": 1246 }, { - "epoch": 2.42126582278481, - "grad_norm": 0.3280741446956329, - "learning_rate": 2.46235264422994e-05, - "loss": 0.265, + "epoch": 1.7767088607594936, + "grad_norm": 0.31362215927542564, + "learning_rate": 3.258311269875959e-05, + "loss": 0.3295, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03641419857740402, - "step": 598, - "valid_targets_mean": 5095.3, - "valid_targets_min": 1164 + "loss_rank_avg": 0.17079243063926697, + "step": 1755, + "valid_targets_mean": 4860.1, + "valid_targets_min": 1270 }, { - "epoch": 2.4253164556962026, - "grad_norm": 0.28204803788757965, - "learning_rate": 2.4568485658452452e-05, - "loss": 0.2584, + "epoch": 1.7817721518987342, + "grad_norm": 0.2881402537381144, + "learning_rate": 3.2528110862468664e-05, + "loss": 0.3212, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03064592182636261, - "step": 599, - "valid_targets_mean": 3911.2, - "valid_targets_min": 945 + "loss_rank_avg": 0.1843501329421997, + "step": 1760, + "valid_targets_mean": 5736.6, + "valid_targets_min": 1442 }, { - "epoch": 2.429367088607595, - "grad_norm": 0.29615169855204987, - "learning_rate": 2.451340834511592e-05, - "loss": 0.25, + "epoch": 1.7868354430379747, + "grad_norm": 0.33940507923082636, + "learning_rate": 3.247295264446209e-05, + "loss": 0.332, "loss_nan_ranks": 0, - "loss_rank_avg": 0.031110865995287895, - "step": 600, - "valid_targets_mean": 4780.8, - "valid_targets_min": 1196 + "loss_rank_avg": 0.14896419644355774, + "step": 1765, + "valid_targets_mean": 4003.1, + "valid_targets_min": 960 }, { - "epoch": 2.433417721518987, - "grad_norm": 0.3188121688952584, - "learning_rate": 2.4458294942686528e-05, - "loss": 0.2637, + "epoch": 1.7918987341772152, + "grad_norm": 0.305481958944062, + "learning_rate": 3.241763873325044e-05, + "loss": 0.3321, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04329856485128403, - "step": 601, - "valid_targets_mean": 5275.9, - "valid_targets_min": 985 + "loss_rank_avg": 0.1464659869670868, + "step": 1770, + "valid_targets_mean": 4571.4, + "valid_targets_min": 1342 }, { - "epoch": 2.4374683544303797, - "grad_norm": 0.27951189958912137, - "learning_rate": 2.440314589184958e-05, - "loss": 0.2478, + "epoch": 1.7969620253164558, + "grad_norm": 0.3372959294620817, + "learning_rate": 3.2362169819287714e-05, + "loss": 0.3198, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024103689938783646, - "step": 602, - "valid_targets_mean": 4180.4, - "valid_targets_min": 1375 + "loss_rank_avg": 0.17976534366607666, + "step": 1775, + "valid_targets_mean": 4028.4, + "valid_targets_min": 1261 }, { - "epoch": 2.441518987341772, - "grad_norm": 0.3029822254490272, - "learning_rate": 2.434796163357541e-05, - "loss": 0.2592, + "epoch": 1.8020253164556963, + "grad_norm": 0.3104430678347615, + "learning_rate": 3.2306546594962744e-05, + "loss": 0.3274, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03358926996588707, - "step": 603, - "valid_targets_mean": 4572.4, - "valid_targets_min": 1281 + "loss_rank_avg": 0.16419640183448792, + "step": 1780, + "valid_targets_mean": 4230.2, + "valid_targets_min": 1132 }, { - "epoch": 2.4455696202531647, - "grad_norm": 0.3031290979928384, - "learning_rate": 2.429274260911588e-05, - "loss": 0.2454, + "epoch": 1.8070886075949368, + "grad_norm": 0.3241781257834898, + "learning_rate": 3.225076975459051e-05, + "loss": 0.3243, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026303090155124664, - "step": 604, - "valid_targets_mean": 3897.2, - "valid_targets_min": 1052 + "loss_rank_avg": 0.21455496549606323, + "step": 1785, + "valid_targets_mean": 7327.5, + "valid_targets_min": 1343 }, { - "epoch": 2.449620253164557, - "grad_norm": 0.28066556297235906, - "learning_rate": 2.423748926000084e-05, - "loss": 0.2531, + "epoch": 1.8121518987341774, + "grad_norm": 0.3169044936479542, + "learning_rate": 3.219483999440354e-05, + "loss": 0.3169, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02926298975944519, - "step": 605, - "valid_targets_mean": 3421.1, - "valid_targets_min": 885 + "loss_rank_avg": 0.12079709023237228, + "step": 1790, + "valid_targets_mean": 3173.1, + "valid_targets_min": 1329 }, { - "epoch": 2.4536708860759493, - "grad_norm": 0.3193581360060043, - "learning_rate": 2.418220202803459e-05, - "loss": 0.2507, + "epoch": 1.8172151898734177, + "grad_norm": 0.331980276874436, + "learning_rate": 3.213875801254314e-05, + "loss": 0.3276, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02278808504343033, - "step": 606, - "valid_targets_mean": 3550.6, - "valid_targets_min": 851 + "loss_rank_avg": 0.14299997687339783, + "step": 1795, + "valid_targets_mean": 3476.8, + "valid_targets_min": 942 }, { - "epoch": 2.457721518987342, - "grad_norm": 0.3047526662043712, - "learning_rate": 2.412688135529237e-05, - "loss": 0.2587, + "epoch": 1.8222784810126582, + "grad_norm": 0.25978521097127444, + "learning_rate": 3.2082524509050723e-05, + "loss": 0.3194, "loss_nan_ranks": 0, - "loss_rank_avg": 0.031472377479076385, - "step": 607, - "valid_targets_mean": 4140.6, - "valid_targets_min": 1186 + "loss_rank_avg": 0.14637430012226105, + "step": 1800, + "valid_targets_mean": 5356.4, + "valid_targets_min": 1492 }, { - "epoch": 2.4617721518987343, - "grad_norm": 0.3030005726177253, - "learning_rate": 2.4071527684116807e-05, - "loss": 0.2555, + "epoch": 1.8273417721518987, + "grad_norm": 0.4563427674089283, + "learning_rate": 3.20261401858591e-05, + "loss": 0.3246, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03665117174386978, - "step": 608, - "valid_targets_mean": 4628.6, - "valid_targets_min": 1018 + "loss_rank_avg": 0.16923454403877258, + "step": 1805, + "valid_targets_mean": 4621.6, + "valid_targets_min": 954 }, { - "epoch": 2.4658227848101264, - "grad_norm": 0.2852849326847587, - "learning_rate": 2.4016141457114372e-05, - "loss": 0.263, + "epoch": 1.8324050632911393, + "grad_norm": 0.28306126650762187, + "learning_rate": 3.196960574678363e-05, + "loss": 0.3335, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030085954815149307, - "step": 609, - "valid_targets_mean": 4607.9, - "valid_targets_min": 838 + "loss_rank_avg": 0.11796549707651138, + "step": 1810, + "valid_targets_mean": 4214.6, + "valid_targets_min": 1296 }, { - "epoch": 2.469873417721519, - "grad_norm": 0.29629453300425185, - "learning_rate": 2.396072311715187e-05, - "loss": 0.2536, + "epoch": 1.8374683544303796, + "grad_norm": 0.32499866336342004, + "learning_rate": 3.191292189751353e-05, + "loss": 0.3441, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03866127133369446, - "step": 610, - "valid_targets_mean": 4362.0, - "valid_targets_min": 1135 + "loss_rank_avg": 0.1776324212551117, + "step": 1815, + "valid_targets_mean": 5706.0, + "valid_targets_min": 1377 }, { - "epoch": 2.4739240506329114, - "grad_norm": 0.3143382515410931, - "learning_rate": 2.3905273107352863e-05, - "loss": 0.2616, + "epoch": 1.8425316455696201, + "grad_norm": 0.29118017187843964, + "learning_rate": 3.185608934560301e-05, + "loss": 0.3105, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03561249375343323, - "step": 611, - "valid_targets_mean": 4431.0, - "valid_targets_min": 851 + "loss_rank_avg": 0.12048833072185516, + "step": 1820, + "valid_targets_mean": 3490.6, + "valid_targets_min": 1465 }, { - "epoch": 2.477974683544304, - "grad_norm": 0.31019023346681507, - "learning_rate": 2.3849791871094157e-05, - "loss": 0.2507, + "epoch": 1.8475949367088607, + "grad_norm": 0.3114271880772345, + "learning_rate": 3.1799108800462466e-05, + "loss": 0.3377, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03153450787067413, - "step": 612, - "valid_targets_mean": 4573.3, - "valid_targets_min": 1572 + "loss_rank_avg": 0.14511121809482574, + "step": 1825, + "valid_targets_mean": 3770.2, + "valid_targets_min": 1165 }, { - "epoch": 2.482025316455696, - "grad_norm": 0.32844579891221176, - "learning_rate": 2.379427985200223e-05, - "loss": 0.2675, + "epoch": 1.8526582278481012, + "grad_norm": 0.28385385385120393, + "learning_rate": 3.174198097334959e-05, + "loss": 0.3248, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03163554146885872, - "step": 613, - "valid_targets_mean": 4459.7, - "valid_targets_min": 1269 + "loss_rank_avg": 0.16951501369476318, + "step": 1830, + "valid_targets_mean": 4812.1, + "valid_targets_min": 995 }, { - "epoch": 2.4860759493670885, - "grad_norm": 0.3329061332621241, - "learning_rate": 2.373873749394972e-05, - "loss": 0.257, + "epoch": 1.8577215189873417, + "grad_norm": 0.33205815359378843, + "learning_rate": 3.168470657736053e-05, + "loss": 0.3306, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02794114127755165, - "step": 614, - "valid_targets_mean": 4042.6, - "valid_targets_min": 920 + "loss_rank_avg": 0.1585332751274109, + "step": 1835, + "valid_targets_mean": 5364.6, + "valid_targets_min": 1166 }, { - "epoch": 2.490126582278481, - "grad_norm": 0.2943763735771222, - "learning_rate": 2.3683165241051836e-05, - "loss": 0.2532, + "epoch": 1.8627848101265823, + "grad_norm": 0.31689534847164214, + "learning_rate": 3.162728632742098e-05, + "loss": 0.3115, "loss_nan_ranks": 0, - "loss_rank_avg": 0.031979337334632874, - "step": 615, - "valid_targets_mean": 4640.8, - "valid_targets_min": 1653 + "loss_rank_avg": 0.16908031702041626, + "step": 1840, + "valid_targets_mean": 4225.1, + "valid_targets_min": 1101 }, { - "epoch": 2.4941772151898736, - "grad_norm": 0.2833662624998012, - "learning_rate": 2.3627563537662823e-05, - "loss": 0.2539, + "epoch": 1.8678481012658228, + "grad_norm": 0.3087921045352938, + "learning_rate": 3.1569720940277267e-05, + "loss": 0.3178, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02792017161846161, - "step": 616, - "valid_targets_mean": 4450.8, - "valid_targets_min": 1192 + "loss_rank_avg": 0.17665991187095642, + "step": 1845, + "valid_targets_mean": 4580.1, + "valid_targets_min": 1284 }, { - "epoch": 2.4982278481012656, - "grad_norm": 0.29721603364616905, - "learning_rate": 2.3571932828372433e-05, - "loss": 0.2653, + "epoch": 1.8729113924050633, + "grad_norm": 0.28162350739466746, + "learning_rate": 3.151201113448735e-05, + "loss": 0.3198, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028793741017580032, - "step": 617, - "valid_targets_mean": 4491.8, - "valid_targets_min": 1621 + "loss_rank_avg": 0.16834816336631775, + "step": 1850, + "valid_targets_mean": 5581.1, + "valid_targets_min": 1362 }, { - "epoch": 2.502278481012658, - "grad_norm": 0.33444596381029734, - "learning_rate": 2.351627355800233e-05, - "loss": 0.2655, + "epoch": 1.8779746835443039, + "grad_norm": 0.33622728572725646, + "learning_rate": 3.1454157630411905e-05, + "loss": 0.3297, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01843031868338585, - "step": 618, - "valid_targets_mean": 2783.9, - "valid_targets_min": 1379 + "loss_rank_avg": 0.13378959894180298, + "step": 1855, + "valid_targets_mean": 3264.8, + "valid_targets_min": 1178 }, { - "epoch": 2.5063291139240507, - "grad_norm": 0.27517479471822953, - "learning_rate": 2.346058617160256e-05, - "loss": 0.2591, + "epoch": 1.8830379746835444, + "grad_norm": 0.2888883767197038, + "learning_rate": 3.1396161150205324e-05, + "loss": 0.3243, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026295827701687813, - "step": 619, - "valid_targets_mean": 3806.1, - "valid_targets_min": 1204 + "loss_rank_avg": 0.15173028409481049, + "step": 1860, + "valid_targets_mean": 5245.6, + "valid_targets_min": 1061 }, { - "epoch": 2.510379746835443, - "grad_norm": 0.2916338910826666, - "learning_rate": 2.3404871114447976e-05, - "loss": 0.2548, + "epoch": 1.888101265822785, + "grad_norm": 0.3008873149690533, + "learning_rate": 3.133802241780669e-05, + "loss": 0.3417, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020635364577174187, - "step": 620, - "valid_targets_mean": 3079.8, - "valid_targets_min": 1064 + "loss_rank_avg": 0.168132483959198, + "step": 1865, + "valid_targets_mean": 5342.9, + "valid_targets_min": 1360 }, { - "epoch": 2.5144303797468357, - "grad_norm": 0.30894555244757066, - "learning_rate": 2.334912883203469e-05, - "loss": 0.2701, + "epoch": 1.8931645569620255, + "grad_norm": 0.30961408751335806, + "learning_rate": 3.127974215893075e-05, + "loss": 0.3319, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04036060720682144, - "step": 621, - "valid_targets_mean": 5933.9, - "valid_targets_min": 1190 + "loss_rank_avg": 0.15519672632217407, + "step": 1870, + "valid_targets_mean": 4829.4, + "valid_targets_min": 1051 }, { - "epoch": 2.5184810126582278, - "grad_norm": 0.30113451870141916, - "learning_rate": 2.3293359770076505e-05, - "loss": 0.2661, + "epoch": 1.8982278481012658, + "grad_norm": 0.3375894079078535, + "learning_rate": 3.1221321101058826e-05, + "loss": 0.3233, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023307915776968002, - "step": 622, - "valid_targets_mean": 3366.1, - "valid_targets_min": 1684 + "loss_rank_avg": 0.2031598687171936, + "step": 1875, + "valid_targets_mean": 4846.2, + "valid_targets_min": 1616 }, { - "epoch": 2.5225316455696203, - "grad_norm": 0.32708924994767863, - "learning_rate": 2.323756437450136e-05, - "loss": 0.2605, + "epoch": 1.9032911392405063, + "grad_norm": 0.30102939403192525, + "learning_rate": 3.116275997342979e-05, + "loss": 0.3328, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03131123632192612, - "step": 623, - "valid_targets_mean": 4423.8, - "valid_targets_min": 1800 + "loss_rank_avg": 0.1768466830253601, + "step": 1880, + "valid_targets_mean": 5417.4, + "valid_targets_min": 1149 }, { - "epoch": 2.526582278481013, - "grad_norm": 0.3213327010704033, - "learning_rate": 2.3181743091447754e-05, - "loss": 0.2557, + "epoch": 1.9083544303797468, + "grad_norm": 0.31204461528240096, + "learning_rate": 3.110405950703091e-05, + "loss": 0.3286, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028300907462835312, - "step": 624, - "valid_targets_mean": 4199.7, - "valid_targets_min": 1425 + "loss_rank_avg": 0.15786431729793549, + "step": 1885, + "valid_targets_mean": 4424.1, + "valid_targets_min": 1590 }, { - "epoch": 2.530632911392405, - "grad_norm": 0.29768752466827214, - "learning_rate": 2.3125896367261164e-05, - "loss": 0.2612, + "epoch": 1.9134177215189874, + "grad_norm": 0.3212411559315451, + "learning_rate": 3.104522043458875e-05, + "loss": 0.3326, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028280166909098625, - "step": 625, - "valid_targets_mean": 4495.2, - "valid_targets_min": 1336 + "loss_rank_avg": 0.15173178911209106, + "step": 1890, + "valid_targets_mean": 3546.5, + "valid_targets_min": 1048 }, { - "epoch": 2.5346835443037974, - "grad_norm": 0.29324733188037844, - "learning_rate": 2.307002464849052e-05, - "loss": 0.2582, + "epoch": 1.918481012658228, + "grad_norm": 0.3477054279687247, + "learning_rate": 3.098624349056003e-05, + "loss": 0.3131, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03605882078409195, - "step": 626, - "valid_targets_mean": 5320.2, - "valid_targets_min": 1240 + "loss_rank_avg": 0.09834770858287811, + "step": 1895, + "valid_targets_mean": 3314.4, + "valid_targets_min": 1081 }, { - "epoch": 2.53873417721519, - "grad_norm": 0.3005243259009893, - "learning_rate": 2.30141283818846e-05, - "loss": 0.2539, + "epoch": 1.9235443037974682, + "grad_norm": 0.3224350947488976, + "learning_rate": 3.0927129411122433e-05, + "loss": 0.3307, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026947829872369766, - "step": 627, - "valid_targets_mean": 3318.6, - "valid_targets_min": 1248 + "loss_rank_avg": 0.1967785656452179, + "step": 1900, + "valid_targets_mean": 5401.0, + "valid_targets_min": 1631 }, { - "epoch": 2.5427848101265824, - "grad_norm": 0.27350582442900084, - "learning_rate": 2.2958208014388462e-05, - "loss": 0.2514, + "epoch": 1.9286075949367087, + "grad_norm": 0.3407210065894635, + "learning_rate": 3.086787893416543e-05, + "loss": 0.3344, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03252295404672623, - "step": 628, - "valid_targets_mean": 5166.3, - "valid_targets_min": 1345 + "loss_rank_avg": 0.15977072715759277, + "step": 1905, + "valid_targets_mean": 3843.9, + "valid_targets_min": 1465 }, { - "epoch": 2.546835443037975, - "grad_norm": 0.28854390431284904, - "learning_rate": 2.2902263993139857e-05, - "loss": 0.2462, + "epoch": 1.9336708860759493, + "grad_norm": 0.3048824087454436, + "learning_rate": 3.080849279928108e-05, + "loss": 0.3186, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03174077346920967, - "step": 629, - "valid_targets_mean": 4279.8, - "valid_targets_min": 983 + "loss_rank_avg": 0.21388033032417297, + "step": 1910, + "valid_targets_mean": 5811.2, + "valid_targets_min": 735 }, { - "epoch": 2.550886075949367, - "grad_norm": 0.28372611723663116, - "learning_rate": 2.2846296765465708e-05, - "loss": 0.2467, + "epoch": 1.9387341772151898, + "grad_norm": 0.3220507717171466, + "learning_rate": 3.074897174775478e-05, + "loss": 0.3231, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0290265791118145, - "step": 630, - "valid_targets_mean": 4876.6, - "valid_targets_min": 1103 + "loss_rank_avg": 0.17468535900115967, + "step": 1915, + "valid_targets_mean": 4189.4, + "valid_targets_min": 1442 }, { - "epoch": 2.5549367088607595, - "grad_norm": 0.27799669957215767, - "learning_rate": 2.2790306778878465e-05, - "loss": 0.2577, + "epoch": 1.9437974683544303, + "grad_norm": 0.33220201972445723, + "learning_rate": 3.0689316522556026e-05, + "loss": 0.3242, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03007456660270691, - "step": 631, - "valid_targets_mean": 4479.5, - "valid_targets_min": 1483 + "loss_rank_avg": 0.1948956400156021, + "step": 1920, + "valid_targets_mean": 4742.3, + "valid_targets_min": 1598 }, { - "epoch": 2.558987341772152, - "grad_norm": 0.282275352024946, - "learning_rate": 2.273429448107257e-05, - "loss": 0.2535, + "epoch": 1.9488607594936709, + "grad_norm": 0.32448507249493624, + "learning_rate": 3.062952786832912e-05, + "loss": 0.3189, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0425049290060997, - "step": 632, - "valid_targets_mean": 5926.8, - "valid_targets_min": 1197 + "loss_rank_avg": 0.1440562605857849, + "step": 1925, + "valid_targets_mean": 4186.6, + "valid_targets_min": 1640 }, { - "epoch": 2.563037974683544, - "grad_norm": 0.25916687600374244, - "learning_rate": 2.2678260319920868e-05, - "loss": 0.2573, + "epoch": 1.9539240506329114, + "grad_norm": 0.29766519811078285, + "learning_rate": 3.05696065313839e-05, + "loss": 0.3306, "loss_nan_ranks": 0, - "loss_rank_avg": 0.033781878650188446, - "step": 633, - "valid_targets_mean": 4686.0, - "valid_targets_min": 1132 + "loss_rank_avg": 0.18304754793643951, + "step": 1930, + "valid_targets_mean": 6132.7, + "valid_targets_min": 1516 }, { - "epoch": 2.5670886075949366, - "grad_norm": 0.2705411998780125, - "learning_rate": 2.262220474347102e-05, - "loss": 0.2544, + "epoch": 1.958987341772152, + "grad_norm": 0.2949900786324887, + "learning_rate": 3.050955325968641e-05, + "loss": 0.3211, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02024194225668907, - "step": 634, - "valid_targets_mean": 3486.6, - "valid_targets_min": 1290 + "loss_rank_avg": 0.15778782963752747, + "step": 1935, + "valid_targets_mean": 5078.4, + "valid_targets_min": 1196 }, { - "epoch": 2.571139240506329, - "grad_norm": 0.2697006089406536, - "learning_rate": 2.2566128199941918e-05, - "loss": 0.2553, + "epoch": 1.9640506329113925, + "grad_norm": 0.3263199970381054, + "learning_rate": 3.0449368802849553e-05, + "loss": 0.3181, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03225478157401085, - "step": 635, - "valid_targets_mean": 5035.3, - "valid_targets_min": 1411 + "loss_rank_avg": 0.14029563963413239, + "step": 1940, + "valid_targets_mean": 4289.6, + "valid_targets_min": 1207 }, { - "epoch": 2.5751898734177217, - "grad_norm": 0.2686734454523371, - "learning_rate": 2.2510031137720104e-05, - "loss": 0.2625, + "epoch": 1.969113924050633, + "grad_norm": 0.3116697424009326, + "learning_rate": 3.0389053912123755e-05, + "loss": 0.3215, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026321150362491608, - "step": 636, - "valid_targets_mean": 3903.1, - "valid_targets_min": 1029 + "loss_rank_avg": 0.1622355878353119, + "step": 1945, + "valid_targets_mean": 4043.3, + "valid_targets_min": 1217 }, { - "epoch": 2.579240506329114, - "grad_norm": 0.31013089351460327, - "learning_rate": 2.2453914005356206e-05, - "loss": 0.2546, + "epoch": 1.9741772151898735, + "grad_norm": 0.33957146598689614, + "learning_rate": 3.0328609340387584e-05, + "loss": 0.319, "loss_nan_ranks": 0, - "loss_rank_avg": 0.037434741854667664, - "step": 637, - "valid_targets_mean": 5300.7, - "valid_targets_min": 1202 + "loss_rank_avg": 0.1829889565706253, + "step": 1950, + "valid_targets_mean": 3790.2, + "valid_targets_min": 768 }, { - "epoch": 2.5832911392405062, - "grad_norm": 0.2786446450188604, - "learning_rate": 2.2397777251561302e-05, - "loss": 0.2544, + "epoch": 1.979240506329114, + "grad_norm": 0.31004887940586384, + "learning_rate": 3.026803584213834e-05, + "loss": 0.3196, "loss_nan_ranks": 0, - "loss_rank_avg": 0.031491074711084366, - "step": 638, - "valid_targets_mean": 4333.8, - "valid_targets_min": 1201 + "loss_rank_avg": 0.1571284979581833, + "step": 1955, + "valid_targets_mean": 4538.4, + "valid_targets_min": 956 }, { - "epoch": 2.5873417721518988, - "grad_norm": 0.29864961232770876, - "learning_rate": 2.2341621325203393e-05, - "loss": 0.2466, + "epoch": 1.9843037974683544, + "grad_norm": 0.2945095160712328, + "learning_rate": 3.0207334173482635e-05, + "loss": 0.3223, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027186639606952667, - "step": 639, - "valid_targets_mean": 3333.9, - "valid_targets_min": 1120 + "loss_rank_avg": 0.14519169926643372, + "step": 1960, + "valid_targets_mean": 3616.2, + "valid_targets_min": 1058 }, { - "epoch": 2.5913924050632913, - "grad_norm": 0.3215698124780734, - "learning_rate": 2.2285446675303764e-05, - "loss": 0.2637, + "epoch": 1.989367088607595, + "grad_norm": 0.29690655635239654, + "learning_rate": 3.0146505092126987e-05, + "loss": 0.3159, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03543717414140701, - "step": 640, - "valid_targets_mean": 4715.6, - "valid_targets_min": 1411 + "loss_rank_avg": 0.14115923643112183, + "step": 1965, + "valid_targets_mean": 4046.1, + "valid_targets_min": 1832 }, { - "epoch": 2.5954430379746833, - "grad_norm": 0.3050945512291773, - "learning_rate": 2.2229253751033423e-05, - "loss": 0.2601, + "epoch": 1.9944303797468355, + "grad_norm": 0.3836659337432429, + "learning_rate": 3.0085549357368316e-05, + "loss": 0.3245, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024836286902427673, - "step": 641, - "valid_targets_mean": 4104.9, - "valid_targets_min": 1240 + "loss_rank_avg": 0.16044825315475464, + "step": 1970, + "valid_targets_mean": 4667.9, + "valid_targets_min": 1443 }, { - "epoch": 2.599493670886076, - "grad_norm": 0.3050775033486984, - "learning_rate": 2.2173043001709492e-05, - "loss": 0.262, + "epoch": 1.999493670886076, + "grad_norm": 0.3322124150108135, + "learning_rate": 3.0024467730084508e-05, + "loss": 0.3169, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030903708189725876, - "step": 642, - "valid_targets_mean": 4335.6, - "valid_targets_min": 1410 + "loss_rank_avg": 0.12640810012817383, + "step": 1975, + "valid_targets_mean": 3170.9, + "valid_targets_min": 832 }, { - "epoch": 2.6035443037974684, - "grad_norm": 0.2997633406509843, - "learning_rate": 2.2116814876791627e-05, - "loss": 0.264, + "epoch": 2.0040506329113925, + "grad_norm": 0.3291075554103832, + "learning_rate": 2.996326097272487e-05, + "loss": 0.2897, "loss_nan_ranks": 0, - "loss_rank_avg": 0.032921142876148224, - "step": 643, - "valid_targets_mean": 4964.4, - "valid_targets_min": 1126 + "loss_rank_avg": 0.16433456540107727, + "step": 1980, + "valid_targets_mean": 4258.9, + "valid_targets_min": 1370 }, { - "epoch": 2.607594936708861, - "grad_norm": 0.3087476373401668, - "learning_rate": 2.2060569825878413e-05, - "loss": 0.262, + "epoch": 2.009113924050633, + "grad_norm": 0.28624539538450133, + "learning_rate": 2.9901929849300676e-05, + "loss": 0.3121, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03322776407003403, - "step": 644, - "valid_targets_mean": 4452.0, - "valid_targets_min": 920 + "loss_rank_avg": 0.156062513589859, + "step": 1985, + "valid_targets_mean": 5479.2, + "valid_targets_min": 1383 }, { - "epoch": 2.6116455696202534, - "grad_norm": 0.2828106261268277, - "learning_rate": 2.2004308298703777e-05, - "loss": 0.2516, + "epoch": 2.0141772151898736, + "grad_norm": 0.31864237413570884, + "learning_rate": 2.984047512537557e-05, + "loss": 0.3086, "loss_nan_ranks": 0, - "loss_rank_avg": 0.031175900250673294, - "step": 645, - "valid_targets_mean": 4302.5, - "valid_targets_min": 780 + "loss_rank_avg": 0.17700161039829254, + "step": 1990, + "valid_targets_mean": 4326.2, + "valid_targets_min": 1491 }, { - "epoch": 2.6156962025316455, - "grad_norm": 0.3008393389781136, - "learning_rate": 2.1948030745133396e-05, - "loss": 0.2586, + "epoch": 2.019240506329114, + "grad_norm": 0.29138155253699194, + "learning_rate": 2.977889756805604e-05, + "loss": 0.2973, "loss_nan_ranks": 0, - "loss_rank_avg": 0.037226781249046326, - "step": 646, - "valid_targets_mean": 4957.2, - "valid_targets_min": 1283 + "loss_rank_avg": 0.19598735868930817, + "step": 1995, + "valid_targets_mean": 5823.9, + "valid_targets_min": 1470 }, { - "epoch": 2.619746835443038, - "grad_norm": 0.29503272638846983, - "learning_rate": 2.1891737615161083e-05, - "loss": 0.2514, + "epoch": 2.0243037974683546, + "grad_norm": 0.32618376749689376, + "learning_rate": 2.9717197945981844e-05, + "loss": 0.3078, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02325281873345375, - "step": 647, - "valid_targets_mean": 3224.1, - "valid_targets_min": 1214 + "loss_rank_avg": 0.14639677107334137, + "step": 2000, + "valid_targets_mean": 3956.3, + "valid_targets_min": 1251 }, { - "epoch": 2.6237974683544305, - "grad_norm": 0.29677005409578583, - "learning_rate": 2.1835429358905218e-05, - "loss": 0.2541, + "epoch": 2.0293670886075947, + "grad_norm": 0.3231822134904899, + "learning_rate": 2.9655377029316415e-05, + "loss": 0.3038, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030813105404376984, - "step": 648, - "valid_targets_mean": 4649.4, - "valid_targets_min": 1340 + "loss_rank_avg": 0.1973341703414917, + "step": 2005, + "valid_targets_mean": 6070.5, + "valid_targets_min": 1268 }, { - "epoch": 2.6278481012658226, - "grad_norm": 0.2693461924966625, - "learning_rate": 2.17791064266051e-05, - "loss": 0.2629, + "epoch": 2.0344303797468353, + "grad_norm": 0.4445885950556669, + "learning_rate": 2.9593435589737212e-05, + "loss": 0.3081, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03964713588356972, - "step": 649, - "valid_targets_mean": 5541.3, - "valid_targets_min": 1081 + "loss_rank_avg": 0.1424170434474945, + "step": 2010, + "valid_targets_mean": 3355.1, + "valid_targets_min": 1257 }, { - "epoch": 2.631898734177215, - "grad_norm": 0.302821999547357, - "learning_rate": 2.172276926861741e-05, - "loss": 0.2504, + "epoch": 2.039493670886076, + "grad_norm": 0.31012702805843667, + "learning_rate": 2.9531374400426158e-05, + "loss": 0.3029, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028620591387152672, - "step": 650, - "valid_targets_mean": 4284.2, - "valid_targets_min": 814 + "loss_rank_avg": 0.17267721891403198, + "step": 2015, + "valid_targets_mean": 5018.6, + "valid_targets_min": 803 }, { - "epoch": 2.6359493670886076, - "grad_norm": 0.29782965749403933, - "learning_rate": 2.166641833541255e-05, - "loss": 0.2579, + "epoch": 2.0445569620253163, + "grad_norm": 0.27456099255552485, + "learning_rate": 2.9469194236059916e-05, + "loss": 0.3109, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03146466612815857, - "step": 651, - "valid_targets_mean": 4254.1, - "valid_targets_min": 1112 + "loss_rank_avg": 0.18913531303405762, + "step": 2020, + "valid_targets_mean": 6316.9, + "valid_targets_min": 1565 }, { - "epoch": 2.64, - "grad_norm": 0.2842374042867517, - "learning_rate": 2.161005407757109e-05, - "loss": 0.2448, + "epoch": 2.049620253164557, + "grad_norm": 0.30759866121772944, + "learning_rate": 2.940689587280027e-05, + "loss": 0.3056, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02665664814412594, - "step": 652, - "valid_targets_mean": 3843.0, - "valid_targets_min": 1300 + "loss_rank_avg": 0.17620842158794403, + "step": 2025, + "valid_targets_mean": 5025.1, + "valid_targets_min": 1103 }, { - "epoch": 2.6440506329113926, - "grad_norm": 0.318201022567993, - "learning_rate": 2.155367694578013e-05, - "loss": 0.2676, + "epoch": 2.0546835443037974, + "grad_norm": 0.3107498769401775, + "learning_rate": 2.9344480088284403e-05, + "loss": 0.3108, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030992835760116577, - "step": 653, - "valid_targets_mean": 4395.9, - "valid_targets_min": 1175 + "loss_rank_avg": 0.14356273412704468, + "step": 2030, + "valid_targets_mean": 3878.3, + "valid_targets_min": 1435 }, { - "epoch": 2.6481012658227847, - "grad_norm": 0.3003631747307125, - "learning_rate": 2.1497287390829712e-05, - "loss": 0.2576, + "epoch": 2.059746835443038, + "grad_norm": 0.3272342894958522, + "learning_rate": 2.9281947661615206e-05, + "loss": 0.3141, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03913872689008713, - "step": 654, - "valid_targets_mean": 5126.5, - "valid_targets_min": 1052 + "loss_rank_avg": 0.11045265197753906, + "step": 2035, + "valid_targets_mean": 2769.6, + "valid_targets_min": 775 }, { - "epoch": 2.6521518987341772, - "grad_norm": 0.2867285062665385, - "learning_rate": 2.1440885863609208e-05, - "loss": 0.2583, + "epoch": 2.0648101265822785, + "grad_norm": 0.28376297804532025, + "learning_rate": 2.921929937335157e-05, + "loss": 0.3134, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01915227808058262, - "step": 655, - "valid_targets_mean": 3469.7, - "valid_targets_min": 783 + "loss_rank_avg": 0.18254786729812622, + "step": 2040, + "valid_targets_mean": 6384.8, + "valid_targets_min": 726 }, { - "epoch": 2.6562025316455697, - "grad_norm": 0.30572330910607615, - "learning_rate": 2.1384472815103737e-05, - "loss": 0.2556, + "epoch": 2.069873417721519, + "grad_norm": 0.3494712370742931, + "learning_rate": 2.9156536005498616e-05, + "loss": 0.3226, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03280739486217499, - "step": 656, - "valid_targets_mean": 4244.4, - "valid_targets_min": 1440 + "loss_rank_avg": 0.15539231896400452, + "step": 2045, + "valid_targets_mean": 3678.8, + "valid_targets_min": 558 }, { - "epoch": 2.660253164556962, - "grad_norm": 0.29116636191372325, - "learning_rate": 2.1328048696390534e-05, - "loss": 0.2554, + "epoch": 2.0749367088607595, + "grad_norm": 0.36306320132408093, + "learning_rate": 2.909365834149792e-05, + "loss": 0.3188, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02809366211295128, - "step": 657, - "valid_targets_mean": 4139.7, - "valid_targets_min": 1281 + "loss_rank_avg": 0.16074064373970032, + "step": 2050, + "valid_targets_mean": 3803.6, + "valid_targets_min": 992 }, { - "epoch": 2.6643037974683543, - "grad_norm": 0.29492182773411035, - "learning_rate": 2.1271613958635342e-05, - "loss": 0.2415, + "epoch": 2.08, + "grad_norm": 0.28771743850152404, + "learning_rate": 2.903066716621779e-05, + "loss": 0.3257, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0448947437107563, - "step": 658, - "valid_targets_mean": 6116.7, - "valid_targets_min": 2123 + "loss_rank_avg": 0.17194947600364685, + "step": 2055, + "valid_targets_mean": 6007.1, + "valid_targets_min": 788 }, { - "epoch": 2.668354430379747, - "grad_norm": 0.2897066616049584, - "learning_rate": 2.1215169053088825e-05, - "loss": 0.2455, + "epoch": 2.0850632911392406, + "grad_norm": 0.3176958806269076, + "learning_rate": 2.896756326594341e-05, + "loss": 0.3174, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03614652901887894, - "step": 659, - "valid_targets_mean": 5847.3, - "valid_targets_min": 1304 + "loss_rank_avg": 0.15832051634788513, + "step": 2060, + "valid_targets_mean": 4807.6, + "valid_targets_min": 1533 }, { - "epoch": 2.6724050632911394, - "grad_norm": 0.328253562044885, - "learning_rate": 2.1158714431082937e-05, - "loss": 0.2698, + "epoch": 2.090126582278481, + "grad_norm": 0.30643347389492315, + "learning_rate": 2.890434742836706e-05, + "loss": 0.3061, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026467427611351013, - "step": 660, - "valid_targets_mean": 4154.1, - "valid_targets_min": 1534 + "loss_rank_avg": 0.17868554592132568, + "step": 2065, + "valid_targets_mean": 5870.9, + "valid_targets_min": 1477 }, { - "epoch": 2.676455696202532, - "grad_norm": 0.29549581880007425, - "learning_rate": 2.110225054402733e-05, - "loss": 0.2611, + "epoch": 2.0951898734177217, + "grad_norm": 0.328300503212321, + "learning_rate": 2.8841020442578274e-05, + "loss": 0.3219, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04412643983960152, - "step": 661, - "valid_targets_mean": 5657.8, - "valid_targets_min": 1309 + "loss_rank_avg": 0.15090826153755188, + "step": 2070, + "valid_targets_mean": 4159.4, + "valid_targets_min": 767 }, { - "epoch": 2.680506329113924, - "grad_norm": 0.31282829359365777, - "learning_rate": 2.1045777843405747e-05, - "loss": 0.2531, + "epoch": 2.100253164556962, + "grad_norm": 0.3116451598810816, + "learning_rate": 2.8777583099053985e-05, + "loss": 0.3096, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02726035937666893, - "step": 662, - "valid_targets_mean": 3428.0, - "valid_targets_min": 900 + "loss_rank_avg": 0.15223953127861023, + "step": 2075, + "valid_targets_mean": 4922.9, + "valid_targets_min": 1236 }, { - "epoch": 2.6845569620253165, - "grad_norm": 0.3009950377222184, - "learning_rate": 2.09892967807724e-05, - "loss": 0.2579, + "epoch": 2.1053164556962027, + "grad_norm": 0.32927381366700853, + "learning_rate": 2.871403618964867e-05, + "loss": 0.3177, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03681063652038574, - "step": 663, - "valid_targets_mean": 4952.4, - "valid_targets_min": 778 + "loss_rank_avg": 0.18153706192970276, + "step": 2080, + "valid_targets_mean": 4833.8, + "valid_targets_min": 1513 }, { - "epoch": 2.688607594936709, - "grad_norm": 0.30137309251881117, - "learning_rate": 2.093280780774835e-05, - "loss": 0.2615, + "epoch": 2.110379746835443, + "grad_norm": 0.30099077420276216, + "learning_rate": 2.8650380507584444e-05, + "loss": 0.3185, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03740883618593216, - "step": 664, - "valid_targets_mean": 5394.9, - "valid_targets_min": 1522 + "loss_rank_avg": 0.11155495047569275, + "step": 2085, + "valid_targets_mean": 3890.5, + "valid_targets_min": 1469 }, { - "epoch": 2.692658227848101, - "grad_norm": 0.28601586593852013, - "learning_rate": 2.087631137601793e-05, - "loss": 0.2502, + "epoch": 2.1154430379746834, + "grad_norm": 0.304896658143913, + "learning_rate": 2.8586616847441192e-05, + "loss": 0.3203, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03194505721330643, - "step": 665, - "valid_targets_mean": 4750.3, - "valid_targets_min": 1325 + "loss_rank_avg": 0.143421471118927, + "step": 2090, + "valid_targets_mean": 4747.4, + "valid_targets_min": 849 }, { - "epoch": 2.6967088607594936, - "grad_norm": 0.32148793052456576, - "learning_rate": 2.0819807937325104e-05, - "loss": 0.2639, + "epoch": 2.120506329113924, + "grad_norm": 0.33627609235395134, + "learning_rate": 2.852274600514662e-05, + "loss": 0.3236, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04088940471410751, - "step": 666, - "valid_targets_mean": 5782.1, - "valid_targets_min": 1447 + "loss_rank_avg": 0.17665691673755646, + "step": 2095, + "valid_targets_mean": 4558.9, + "valid_targets_min": 1540 }, { - "epoch": 2.700759493670886, - "grad_norm": 0.29417424569091954, - "learning_rate": 2.0763297943469847e-05, - "loss": 0.2619, + "epoch": 2.1255696202531644, + "grad_norm": 0.3025889533254811, + "learning_rate": 2.8458768777966333e-05, + "loss": 0.3151, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017188072204589844, - "step": 667, - "valid_targets_mean": 3231.1, - "valid_targets_min": 1094 + "loss_rank_avg": 0.16682836413383484, + "step": 2100, + "valid_targets_mean": 5740.6, + "valid_targets_min": 1809 }, { - "epoch": 2.7048101265822786, - "grad_norm": 0.31965059393490125, - "learning_rate": 2.070678184630458e-05, - "loss": 0.2403, + "epoch": 2.130632911392405, + "grad_norm": 0.3039388867958498, + "learning_rate": 2.8394685964493886e-05, + "loss": 0.3029, "loss_nan_ranks": 0, - "loss_rank_avg": 0.033367276191711426, - "step": 668, - "valid_targets_mean": 4940.9, - "valid_targets_min": 1223 + "loss_rank_avg": 0.0811905711889267, + "step": 2105, + "valid_targets_mean": 3185.6, + "valid_targets_min": 1428 }, { - "epoch": 2.708860759493671, - "grad_norm": 0.3027836295255975, - "learning_rate": 2.06502600977305e-05, - "loss": 0.246, + "epoch": 2.1356962025316455, + "grad_norm": 0.34645798201468236, + "learning_rate": 2.8330498364640803e-05, + "loss": 0.3169, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03082066960632801, - "step": 669, - "valid_targets_mean": 4925.0, - "valid_targets_min": 1102 + "loss_rank_avg": 0.20455661416053772, + "step": 2110, + "valid_targets_mean": 5101.9, + "valid_targets_min": 1051 }, { - "epoch": 2.712911392405063, - "grad_norm": 0.32643311209170656, - "learning_rate": 2.0593733149694008e-05, - "loss": 0.2603, + "epoch": 2.140759493670886, + "grad_norm": 0.2965620255799478, + "learning_rate": 2.8266206779626604e-05, + "loss": 0.2991, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027547260746359825, - "step": 670, - "valid_targets_mean": 4204.4, - "valid_targets_min": 1152 + "loss_rank_avg": 0.15685412287712097, + "step": 2115, + "valid_targets_mean": 5286.0, + "valid_targets_min": 1289 }, { - "epoch": 2.7169620253164557, - "grad_norm": 0.3072630507340393, - "learning_rate": 2.0537201454183074e-05, - "loss": 0.2572, + "epoch": 2.1458227848101266, + "grad_norm": 0.3555931137634147, + "learning_rate": 2.8201812011968807e-05, + "loss": 0.3027, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02958519198000431, - "step": 671, - "valid_targets_mean": 4499.6, - "valid_targets_min": 975 + "loss_rank_avg": 0.16602477431297302, + "step": 2120, + "valid_targets_mean": 4031.6, + "valid_targets_min": 1147 }, { - "epoch": 2.721012658227848, - "grad_norm": 0.2872594254641355, - "learning_rate": 2.0480665463223626e-05, - "loss": 0.2532, + "epoch": 2.150886075949367, + "grad_norm": 0.31549642905504277, + "learning_rate": 2.8137314865472896e-05, + "loss": 0.3115, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03325200825929642, - "step": 672, - "valid_targets_mean": 5680.6, - "valid_targets_min": 1443 + "loss_rank_avg": 0.2147747278213501, + "step": 2125, + "valid_targets_mean": 5917.6, + "valid_targets_min": 1516 }, { - "epoch": 2.7250632911392403, - "grad_norm": 0.34577995147391477, - "learning_rate": 2.042412562887594e-05, - "loss": 0.2631, + "epoch": 2.1559493670886076, + "grad_norm": 0.3817360788487001, + "learning_rate": 2.8072716145222295e-05, + "loss": 0.3204, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02334584668278694, - "step": 673, - "valid_targets_mean": 3676.2, - "valid_targets_min": 1157 + "loss_rank_avg": 0.1429363489151001, + "step": 2130, + "valid_targets_mean": 4846.5, + "valid_targets_min": 1233 }, { - "epoch": 2.729113924050633, - "grad_norm": 0.28381380010863383, - "learning_rate": 2.0367582403231033e-05, - "loss": 0.2656, + "epoch": 2.161012658227848, + "grad_norm": 0.29099253723913165, + "learning_rate": 2.800801665756833e-05, + "loss": 0.3076, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03436291217803955, - "step": 674, - "valid_targets_mean": 5062.5, - "valid_targets_min": 1572 + "loss_rank_avg": 0.1667858213186264, + "step": 2135, + "valid_targets_mean": 5250.2, + "valid_targets_min": 1352 }, { - "epoch": 2.7331645569620253, - "grad_norm": 0.3107063992450581, - "learning_rate": 2.0311036238407023e-05, - "loss": 0.2571, + "epoch": 2.1660759493670887, + "grad_norm": 0.31033307443976493, + "learning_rate": 2.794321721012013e-05, + "loss": 0.3174, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030485117807984352, - "step": 675, - "valid_targets_mean": 4593.1, - "valid_targets_min": 1677 + "loss_rank_avg": 0.15342216193675995, + "step": 2140, + "valid_targets_mean": 4172.6, + "valid_targets_min": 959 }, { - "epoch": 2.737215189873418, - "grad_norm": 0.30380115780354106, - "learning_rate": 2.0254487586545534e-05, - "loss": 0.2629, + "epoch": 2.1711392405063292, + "grad_norm": 0.32002549909894445, + "learning_rate": 2.78783186117346e-05, + "loss": 0.3136, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03379993885755539, - "step": 676, - "valid_targets_mean": 4763.4, - "valid_targets_min": 1064 + "loss_rank_avg": 0.1935744285583496, + "step": 2145, + "valid_targets_mean": 5775.0, + "valid_targets_min": 1696 }, { - "epoch": 2.7412658227848103, - "grad_norm": 0.3116411566769037, - "learning_rate": 2.0197936899808088e-05, - "loss": 0.2451, + "epoch": 2.1762025316455698, + "grad_norm": 0.3337487659466844, + "learning_rate": 2.7813321672506268e-05, + "loss": 0.3176, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0327700711786747, - "step": 677, - "valid_targets_mean": 5130.9, - "valid_targets_min": 1643 + "loss_rank_avg": 0.19408701360225677, + "step": 2150, + "valid_targets_mean": 5285.5, + "valid_targets_min": 1837 }, { - "epoch": 2.7453164556962024, - "grad_norm": 0.2834334524400567, - "learning_rate": 2.0141384630372466e-05, - "loss": 0.2582, + "epoch": 2.1812658227848103, + "grad_norm": 0.30999599957531293, + "learning_rate": 2.77482272037572e-05, + "loss": 0.3085, "loss_nan_ranks": 0, - "loss_rank_avg": 0.022598637267947197, - "step": 678, - "valid_targets_mean": 3332.0, - "valid_targets_min": 1457 + "loss_rank_avg": 0.16555137932300568, + "step": 2155, + "valid_targets_mean": 4545.2, + "valid_targets_min": 1035 }, { - "epoch": 2.749367088607595, - "grad_norm": 0.2954144930897787, - "learning_rate": 2.0084831230429106e-05, - "loss": 0.2484, + "epoch": 2.186329113924051, + "grad_norm": 0.32893298945558314, + "learning_rate": 2.768303601802689e-05, + "loss": 0.3059, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02883639745414257, - "step": 679, - "valid_targets_mean": 4062.5, - "valid_targets_min": 842 + "loss_rank_avg": 0.15684665739536285, + "step": 2160, + "valid_targets_mean": 3914.8, + "valid_targets_min": 1135 }, { - "epoch": 2.7534177215189874, - "grad_norm": 0.28743408729324493, - "learning_rate": 2.0028277152177492e-05, - "loss": 0.2482, + "epoch": 2.191392405063291, + "grad_norm": 0.32177861173163685, + "learning_rate": 2.7617748929062084e-05, + "loss": 0.3113, "loss_nan_ranks": 0, - "loss_rank_avg": 0.032943643629550934, - "step": 680, - "valid_targets_mean": 5628.5, - "valid_targets_min": 897 + "loss_rank_avg": 0.13835042715072632, + "step": 2165, + "valid_targets_mean": 4624.2, + "valid_targets_min": 911 }, { - "epoch": 2.7574683544303795, - "grad_norm": 0.2661613276585499, - "learning_rate": 1.9971722847822518e-05, - "loss": 0.243, + "epoch": 2.1964556962025314, + "grad_norm": 0.2604316010544582, + "learning_rate": 2.7552366751806624e-05, + "loss": 0.296, "loss_nan_ranks": 0, - "loss_rank_avg": 0.042223528027534485, - "step": 681, - "valid_targets_mean": 5950.6, - "valid_targets_min": 1513 + "loss_rank_avg": 0.14997389912605286, + "step": 2170, + "valid_targets_mean": 6796.2, + "valid_targets_min": 1292 }, { - "epoch": 2.761518987341772, - "grad_norm": 0.32422655919166515, - "learning_rate": 1.99151687695709e-05, - "loss": 0.2567, + "epoch": 2.201518987341772, + "grad_norm": 0.30476092949327993, + "learning_rate": 2.7486890302391316e-05, + "loss": 0.3064, "loss_nan_ranks": 0, - "loss_rank_avg": 0.038189806044101715, - "step": 682, - "valid_targets_mean": 4266.8, - "valid_targets_min": 957 + "loss_rank_avg": 0.1621738076210022, + "step": 2175, + "valid_targets_mean": 5079.0, + "valid_targets_min": 1369 }, { - "epoch": 2.7655696202531646, - "grad_norm": 0.28388437468562894, - "learning_rate": 1.9858615369627537e-05, - "loss": 0.2598, + "epoch": 2.2065822784810125, + "grad_norm": 0.3322580301103298, + "learning_rate": 2.7421320398123702e-05, + "loss": 0.3199, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029368355870246887, - "step": 683, - "valid_targets_mean": 4718.3, - "valid_targets_min": 915 + "loss_rank_avg": 0.1476869136095047, + "step": 2180, + "valid_targets_mean": 3784.2, + "valid_targets_min": 1640 }, { - "epoch": 2.769620253164557, - "grad_norm": 0.27844480657474535, - "learning_rate": 1.9802063100191916e-05, - "loss": 0.2679, + "epoch": 2.211645569620253, + "grad_norm": 0.2990722276821268, + "learning_rate": 2.735565785747787e-05, + "loss": 0.3077, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030729448422789574, - "step": 684, - "valid_targets_mean": 4384.0, - "valid_targets_min": 1178 + "loss_rank_avg": 0.13568082451820374, + "step": 2185, + "valid_targets_mean": 4330.4, + "valid_targets_min": 815 }, { - "epoch": 2.7736708860759496, - "grad_norm": 0.30984977356577154, - "learning_rate": 1.974551241345447e-05, - "loss": 0.2539, + "epoch": 2.2167088607594936, + "grad_norm": 0.30616718547526817, + "learning_rate": 2.728990350008423e-05, + "loss": 0.3215, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02934480458498001, - "step": 685, - "valid_targets_mean": 3798.8, - "valid_targets_min": 1493 + "loss_rank_avg": 0.17533037066459656, + "step": 2190, + "valid_targets_mean": 5132.6, + "valid_targets_min": 949 }, { - "epoch": 2.7777215189873417, - "grad_norm": 0.27448884582560484, - "learning_rate": 1.9688963761592984e-05, - "loss": 0.2547, + "epoch": 2.221772151898734, + "grad_norm": 0.28127500881615763, + "learning_rate": 2.722405814671931e-05, + "loss": 0.2974, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02799350395798683, - "step": 686, - "valid_targets_mean": 3781.4, - "valid_targets_min": 1855 + "loss_rank_avg": 0.134933739900589, + "step": 2195, + "valid_targets_mean": 5636.6, + "valid_targets_min": 732 }, { - "epoch": 2.781772151898734, - "grad_norm": 0.3077186286572531, - "learning_rate": 1.963241759676897e-05, - "loss": 0.2604, + "epoch": 2.2268354430379746, + "grad_norm": 0.27748602615715384, + "learning_rate": 2.715812261929548e-05, + "loss": 0.3208, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03678155690431595, - "step": 687, - "valid_targets_mean": 5446.1, - "valid_targets_min": 1235 + "loss_rank_avg": 0.1636897474527359, + "step": 2200, + "valid_targets_mean": 5998.6, + "valid_targets_min": 1351 }, { - "epoch": 2.7858227848101267, - "grad_norm": 0.273877614979815, - "learning_rate": 1.9575874371124066e-05, - "loss": 0.2405, + "epoch": 2.231898734177215, + "grad_norm": 0.3255757575300295, + "learning_rate": 2.7092097740850712e-05, + "loss": 0.3094, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029743792489171028, - "step": 688, - "valid_targets_mean": 4165.7, - "valid_targets_min": 915 + "loss_rank_avg": 0.1682029664516449, + "step": 2205, + "valid_targets_mean": 4255.6, + "valid_targets_min": 1238 }, { - "epoch": 2.7898734177215188, - "grad_norm": 0.2939071744294542, - "learning_rate": 1.951933453677638e-05, - "loss": 0.2543, + "epoch": 2.2369620253164557, + "grad_norm": 0.30015032841488015, + "learning_rate": 2.7025984335538297e-05, + "loss": 0.3163, "loss_nan_ranks": 0, - "loss_rank_avg": 0.040034182369709015, - "step": 689, - "valid_targets_mean": 5826.3, - "valid_targets_min": 1081 + "loss_rank_avg": 0.1712615191936493, + "step": 2210, + "valid_targets_mean": 5242.7, + "valid_targets_min": 968 }, { - "epoch": 2.7939240506329113, - "grad_norm": 0.338200305666527, - "learning_rate": 1.9462798545816932e-05, - "loss": 0.2635, + "epoch": 2.2420253164556962, + "grad_norm": 0.35257346632541897, + "learning_rate": 2.6959783228616543e-05, + "loss": 0.3258, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020607519894838333, - "step": 690, - "valid_targets_mean": 3353.9, - "valid_targets_min": 1111 + "loss_rank_avg": 0.14988690614700317, + "step": 2215, + "valid_targets_mean": 3744.8, + "valid_targets_min": 1368 }, { - "epoch": 2.797974683544304, - "grad_norm": 0.2693609927482392, - "learning_rate": 1.9406266850305996e-05, - "loss": 0.2556, + "epoch": 2.247088607594937, + "grad_norm": 0.34270763531215714, + "learning_rate": 2.6893495246438512e-05, + "loss": 0.3099, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030511412769556046, - "step": 691, - "valid_targets_mean": 5535.2, - "valid_targets_min": 1391 + "loss_rank_avg": 0.13658785820007324, + "step": 2220, + "valid_targets_mean": 3438.7, + "valid_targets_min": 1173 }, { - "epoch": 2.8020253164556963, - "grad_norm": 0.32107879586411486, - "learning_rate": 1.9349739902269503e-05, - "loss": 0.2447, + "epoch": 2.2521518987341773, + "grad_norm": 0.3370660380071177, + "learning_rate": 2.682712121644168e-05, + "loss": 0.3137, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025336556136608124, - "step": 692, - "valid_targets_mean": 4009.9, - "valid_targets_min": 1289 + "loss_rank_avg": 0.1949615478515625, + "step": 2225, + "valid_targets_mean": 4599.9, + "valid_targets_min": 1016 }, { - "epoch": 2.806075949367089, - "grad_norm": 0.2821704052610864, - "learning_rate": 1.9293218153695425e-05, - "loss": 0.2623, + "epoch": 2.257215189873418, + "grad_norm": 0.348355336462014, + "learning_rate": 2.6760661967137597e-05, + "loss": 0.3047, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03870566934347153, - "step": 693, - "valid_targets_mean": 6418.9, - "valid_targets_min": 913 + "loss_rank_avg": 0.16294927895069122, + "step": 2230, + "valid_targets_mean": 4107.5, + "valid_targets_min": 1022 }, { - "epoch": 2.810126582278481, - "grad_norm": 0.2947465266050635, - "learning_rate": 1.9236702056530153e-05, - "loss": 0.2515, + "epoch": 2.2622784810126584, + "grad_norm": 0.3249186301351007, + "learning_rate": 2.6694118328101592e-05, + "loss": 0.3144, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03403696417808533, - "step": 694, - "valid_targets_mean": 4783.3, - "valid_targets_min": 1247 + "loss_rank_avg": 0.09672749042510986, + "step": 2235, + "valid_targets_mean": 2920.3, + "valid_targets_min": 950 }, { - "epoch": 2.8141772151898734, - "grad_norm": 0.3224972914915537, - "learning_rate": 1.9180192062674907e-05, - "loss": 0.2639, + "epoch": 2.267341772151899, + "grad_norm": 0.30303571071751967, + "learning_rate": 2.6627491129962343e-05, + "loss": 0.2928, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04721870645880699, - "step": 695, - "valid_targets_mean": 6279.4, - "valid_targets_min": 1563 + "loss_rank_avg": 0.1442355215549469, + "step": 2240, + "valid_targets_mean": 4454.2, + "valid_targets_min": 1369 }, { - "epoch": 2.818227848101266, - "grad_norm": 0.2885491555150038, - "learning_rate": 1.9123688623982076e-05, - "loss": 0.2522, + "epoch": 2.2724050632911394, + "grad_norm": 0.31715305103924707, + "learning_rate": 2.6560781204391584e-05, + "loss": 0.3084, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018630696460604668, - "step": 696, - "valid_targets_mean": 3067.3, - "valid_targets_min": 957 + "loss_rank_avg": 0.10361987352371216, + "step": 2245, + "valid_targets_mean": 3143.1, + "valid_targets_min": 1447 }, { - "epoch": 2.822278481012658, - "grad_norm": 0.2705441196421664, - "learning_rate": 1.906719219225165e-05, - "loss": 0.2406, + "epoch": 2.27746835443038, + "grad_norm": 0.28484792889639327, + "learning_rate": 2.6493989384093674e-05, + "loss": 0.3142, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01688503846526146, - "step": 697, - "valid_targets_mean": 2701.2, - "valid_targets_min": 973 + "loss_rank_avg": 0.14160794019699097, + "step": 2250, + "valid_targets_mean": 4650.6, + "valid_targets_min": 954 }, { - "epoch": 2.8263291139240505, - "grad_norm": 0.3168281330219779, - "learning_rate": 1.9010703219227603e-05, - "loss": 0.2527, + "epoch": 2.28253164556962, + "grad_norm": 0.2878082890562957, + "learning_rate": 2.642711650279523e-05, + "loss": 0.3094, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03765212744474411, - "step": 698, - "valid_targets_mean": 4885.8, - "valid_targets_min": 1524 + "loss_rank_avg": 0.1957184076309204, + "step": 2255, + "valid_targets_mean": 5700.1, + "valid_targets_min": 1450 }, { - "epoch": 2.830379746835443, - "grad_norm": 0.2807198194935173, - "learning_rate": 1.8954222156594256e-05, - "loss": 0.265, + "epoch": 2.2875949367088606, + "grad_norm": 0.28921634890872305, + "learning_rate": 2.636016339523472e-05, + "loss": 0.3086, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03330278396606445, - "step": 699, - "valid_targets_mean": 4268.5, - "valid_targets_min": 1146 + "loss_rank_avg": 0.1497688591480255, + "step": 2260, + "valid_targets_mean": 5892.9, + "valid_targets_min": 1182 }, { - "epoch": 2.8344303797468355, - "grad_norm": 0.3256073602246422, - "learning_rate": 1.8897749455972673e-05, - "loss": 0.2527, + "epoch": 2.292658227848101, + "grad_norm": 0.374304372121314, + "learning_rate": 2.6293130897152005e-05, + "loss": 0.3046, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028569743037223816, - "step": 700, - "valid_targets_mean": 3776.6, - "valid_targets_min": 733 + "loss_rank_avg": 0.10483106970787048, + "step": 2265, + "valid_targets_mean": 2732.1, + "valid_targets_min": 909 }, { - "epoch": 2.838481012658228, - "grad_norm": 0.27646963358428234, - "learning_rate": 1.884128556891707e-05, - "loss": 0.2478, + "epoch": 2.2977215189873417, + "grad_norm": 0.360265224368032, + "learning_rate": 2.6226019845277954e-05, + "loss": 0.3026, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04450641945004463, - "step": 701, - "valid_targets_mean": 4887.8, - "valid_targets_min": 1283 + "loss_rank_avg": 0.15251410007476807, + "step": 2270, + "valid_targets_mean": 3089.3, + "valid_targets_min": 1121 }, { - "epoch": 2.84253164556962, - "grad_norm": 0.2741209798208164, - "learning_rate": 1.878483094691119e-05, - "loss": 0.2482, + "epoch": 2.302784810126582, + "grad_norm": 0.3076018448534471, + "learning_rate": 2.615883107732398e-05, + "loss": 0.2996, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0251961387693882, - "step": 702, - "valid_targets_mean": 3683.0, - "valid_targets_min": 1118 + "loss_rank_avg": 0.10678346455097198, + "step": 2275, + "valid_targets_mean": 3471.9, + "valid_targets_min": 1050 }, { - "epoch": 2.8465822784810126, - "grad_norm": 0.28110770884148384, - "learning_rate": 1.8728386041364664e-05, - "loss": 0.2544, + "epoch": 2.3078481012658227, + "grad_norm": 0.30671877576562406, + "learning_rate": 2.609156543197158e-05, + "loss": 0.3101, "loss_nan_ranks": 0, - "loss_rank_avg": 0.038397304713726044, - "step": 703, - "valid_targets_mean": 5510.7, - "valid_targets_min": 1309 + "loss_rank_avg": 0.18611571192741394, + "step": 2280, + "valid_targets_mean": 5213.1, + "valid_targets_min": 1379 }, { - "epoch": 2.850632911392405, - "grad_norm": 0.25860615600142395, - "learning_rate": 1.867195130360947e-05, - "loss": 0.2436, + "epoch": 2.3129113924050633, + "grad_norm": 0.42457395280342314, + "learning_rate": 2.6024223748861883e-05, + "loss": 0.3039, "loss_nan_ranks": 0, - "loss_rank_avg": 0.045691635459661484, - "step": 704, - "valid_targets_mean": 6492.9, - "valid_targets_min": 1402 + "loss_rank_avg": 0.17773228883743286, + "step": 2285, + "valid_targets_mean": 5313.4, + "valid_targets_min": 929 }, { - "epoch": 2.8546835443037972, - "grad_norm": 0.2910171774254887, - "learning_rate": 1.8615527184896263e-05, - "loss": 0.2527, + "epoch": 2.317974683544304, + "grad_norm": 0.343397113662205, + "learning_rate": 2.5956806868585136e-05, + "loss": 0.305, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03464601933956146, - "step": 705, - "valid_targets_mean": 4857.5, - "valid_targets_min": 961 + "loss_rank_avg": 0.1606595367193222, + "step": 2290, + "valid_targets_mean": 3827.6, + "valid_targets_min": 1248 }, { - "epoch": 2.8587341772151897, - "grad_norm": 0.2658774941052945, - "learning_rate": 1.8559114136390795e-05, - "loss": 0.248, + "epoch": 2.3230379746835443, + "grad_norm": 0.2808278753890521, + "learning_rate": 2.5889315632670247e-05, + "loss": 0.3132, "loss_nan_ranks": 0, - "loss_rank_avg": 0.039772506803274155, - "step": 706, - "valid_targets_mean": 5122.4, - "valid_targets_min": 1059 + "loss_rank_avg": 0.11440195143222809, + "step": 2295, + "valid_targets_mean": 3889.9, + "valid_targets_min": 1216 }, { - "epoch": 2.8627848101265823, - "grad_norm": 0.25838343326049434, - "learning_rate": 1.8502712609170298e-05, - "loss": 0.2539, + "epoch": 2.328101265822785, + "grad_norm": 0.3112053561319972, + "learning_rate": 2.582175088357426e-05, + "loss": 0.3108, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03241926431655884, - "step": 707, - "valid_targets_mean": 4657.2, - "valid_targets_min": 992 + "loss_rank_avg": 0.14229443669319153, + "step": 2300, + "valid_targets_mean": 3906.9, + "valid_targets_min": 1129 }, { - "epoch": 2.8668354430379748, - "grad_norm": 0.29961837015268405, - "learning_rate": 1.8446323054219876e-05, - "loss": 0.2603, + "epoch": 2.3331645569620254, + "grad_norm": 0.29738973838136984, + "learning_rate": 2.575411346467185e-05, + "loss": 0.304, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03099638596177101, - "step": 708, - "valid_targets_mean": 4535.9, - "valid_targets_min": 1544 + "loss_rank_avg": 0.12370048463344574, + "step": 2305, + "valid_targets_mean": 4034.3, + "valid_targets_min": 1620 }, { - "epoch": 2.8708860759493673, - "grad_norm": 0.2826577983451744, - "learning_rate": 1.838994592242891e-05, - "loss": 0.2494, + "epoch": 2.338227848101266, + "grad_norm": 0.3012147807699108, + "learning_rate": 2.568640422024478e-05, + "loss": 0.3076, "loss_nan_ranks": 0, - "loss_rank_avg": 0.022791540250182152, - "step": 709, - "valid_targets_mean": 3683.8, - "valid_targets_min": 1101 + "loss_rank_avg": 0.2278967797756195, + "step": 2310, + "valid_targets_mean": 5961.9, + "valid_targets_min": 1399 }, { - "epoch": 2.8749367088607594, - "grad_norm": 0.2789431626612818, - "learning_rate": 1.8333581664587453e-05, - "loss": 0.2399, + "epoch": 2.3432911392405065, + "grad_norm": 0.2922942701946769, + "learning_rate": 2.5618623995471394e-05, + "loss": 0.3133, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03397709131240845, - "step": 710, - "valid_targets_mean": 4410.2, - "valid_targets_min": 1095 + "loss_rank_avg": 0.16622743010520935, + "step": 2315, + "valid_targets_mean": 5404.4, + "valid_targets_min": 1235 }, { - "epoch": 2.878987341772152, - "grad_norm": 0.30434622336390565, - "learning_rate": 1.8277230731382593e-05, - "loss": 0.2535, + "epoch": 2.348354430379747, + "grad_norm": 0.36432427585365534, + "learning_rate": 2.5550773636416008e-05, + "loss": 0.3196, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028042972087860107, - "step": 711, - "valid_targets_mean": 4426.9, - "valid_targets_min": 1666 + "loss_rank_avg": 0.1854952722787857, + "step": 2320, + "valid_targets_mean": 3965.4, + "valid_targets_min": 1201 }, { - "epoch": 2.8830379746835444, - "grad_norm": 0.27371088369925034, - "learning_rate": 1.82208935733949e-05, - "loss": 0.2419, + "epoch": 2.353417721518987, + "grad_norm": 0.33468505049542446, + "learning_rate": 2.548285399001843e-05, + "loss": 0.3099, "loss_nan_ranks": 0, - "loss_rank_avg": 0.031526222825050354, - "step": 712, - "valid_targets_mean": 4659.2, - "valid_targets_min": 784 + "loss_rank_avg": 0.15984870493412018, + "step": 2325, + "valid_targets_mean": 3767.3, + "valid_targets_min": 900 }, { - "epoch": 2.8870886075949365, - "grad_norm": 0.2685540373810642, - "learning_rate": 1.8164570641094793e-05, - "loss": 0.2588, + "epoch": 2.3584810126582276, + "grad_norm": 0.30206170793614096, + "learning_rate": 2.5414865904083314e-05, + "loss": 0.3161, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029739882797002792, - "step": 713, - "valid_targets_mean": 4144.9, - "valid_targets_min": 915 + "loss_rank_avg": 0.18496757745742798, + "step": 2330, + "valid_targets_mean": 5605.0, + "valid_targets_min": 1244 }, { - "epoch": 2.891139240506329, - "grad_norm": 0.2759997112656724, - "learning_rate": 1.810826238483892e-05, - "loss": 0.2436, + "epoch": 2.363544303797468, + "grad_norm": 0.3069574368019458, + "learning_rate": 2.534681022726962e-05, + "loss": 0.3143, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02822181209921837, - "step": 714, - "valid_targets_mean": 4396.9, - "valid_targets_min": 1350 + "loss_rank_avg": 0.20892611145973206, + "step": 2335, + "valid_targets_mean": 5443.6, + "valid_targets_min": 1904 }, { - "epoch": 2.8951898734177215, - "grad_norm": 0.2643249992462491, - "learning_rate": 1.8051969254866608e-05, - "loss": 0.2537, + "epoch": 2.3686075949367087, + "grad_norm": 0.324406187683953, + "learning_rate": 2.5278687809080017e-05, + "loss": 0.3157, "loss_nan_ranks": 0, - "loss_rank_avg": 0.033480800688266754, - "step": 715, - "valid_targets_mean": 4895.6, - "valid_targets_min": 1502 + "loss_rank_avg": 0.13418112695217133, + "step": 2340, + "valid_targets_mean": 3362.5, + "valid_targets_min": 1043 }, { - "epoch": 2.899240506329114, - "grad_norm": 0.2646348469873884, - "learning_rate": 1.7995691701296226e-05, - "loss": 0.2657, + "epoch": 2.3736708860759492, + "grad_norm": 0.32283224636579144, + "learning_rate": 2.5210499499850252e-05, + "loss": 0.3236, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030401896685361862, - "step": 716, - "valid_targets_mean": 4391.8, - "valid_targets_min": 1157 + "loss_rank_avg": 0.14372019469738007, + "step": 2345, + "valid_targets_mean": 3757.2, + "valid_targets_min": 1046 }, { - "epoch": 2.9032911392405065, - "grad_norm": 0.2597089268403161, - "learning_rate": 1.7939430174121593e-05, - "loss": 0.2472, + "epoch": 2.3787341772151898, + "grad_norm": 0.2954201993306155, + "learning_rate": 2.5142246150738566e-05, + "loss": 0.3132, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02844076231122017, - "step": 717, - "valid_targets_mean": 4140.0, - "valid_targets_min": 1646 + "loss_rank_avg": 0.16226419806480408, + "step": 2350, + "valid_targets_mean": 5083.2, + "valid_targets_min": 1909 }, { - "epoch": 2.9073417721518986, - "grad_norm": 0.26283592691048274, - "learning_rate": 1.788318512320838e-05, - "loss": 0.262, + "epoch": 2.3837974683544303, + "grad_norm": 0.278033728430402, + "learning_rate": 2.5073928613715053e-05, + "loss": 0.3122, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03905324637889862, - "step": 718, - "valid_targets_mean": 5344.9, - "valid_targets_min": 1717 + "loss_rank_avg": 0.13732075691223145, + "step": 2355, + "valid_targets_mean": 5177.6, + "valid_targets_min": 1378 }, { - "epoch": 2.911392405063291, - "grad_norm": 0.2716776578167852, - "learning_rate": 1.782695699829051e-05, - "loss": 0.239, + "epoch": 2.388860759493671, + "grad_norm": 0.3406821151852307, + "learning_rate": 2.5005547741551045e-05, + "loss": 0.3207, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021692726761102676, - "step": 719, - "valid_targets_mean": 2800.7, - "valid_targets_min": 1130 + "loss_rank_avg": 0.1702435463666916, + "step": 2360, + "valid_targets_mean": 3885.0, + "valid_targets_min": 1146 }, { - "epoch": 2.9154430379746836, - "grad_norm": 0.26509502419429243, - "learning_rate": 1.7770746248966587e-05, - "loss": 0.2532, + "epoch": 2.3939240506329114, + "grad_norm": 0.2924901782943758, + "learning_rate": 2.4937104387808434e-05, + "loss": 0.3117, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030680546537041664, - "step": 720, - "valid_targets_mean": 4364.1, - "valid_targets_min": 899 + "loss_rank_avg": 0.17096886038780212, + "step": 2365, + "valid_targets_mean": 5283.7, + "valid_targets_min": 1374 }, { - "epoch": 2.9194936708860757, - "grad_norm": 0.28772245040534483, - "learning_rate": 1.7714553324696243e-05, - "loss": 0.2698, + "epoch": 2.398987341772152, + "grad_norm": 0.30986779863862535, + "learning_rate": 2.4868599406829045e-05, + "loss": 0.308, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03688710182905197, - "step": 721, - "valid_targets_mean": 4688.4, - "valid_targets_min": 1281 + "loss_rank_avg": 0.17868509888648987, + "step": 2370, + "valid_targets_mean": 4610.5, + "valid_targets_min": 824 }, { - "epoch": 2.923544303797468, - "grad_norm": 0.2810229272917195, - "learning_rate": 1.7658378674796614e-05, - "loss": 0.2622, + "epoch": 2.4040506329113924, + "grad_norm": 0.2951664773380052, + "learning_rate": 2.4800033653723962e-05, + "loss": 0.3137, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02813960425555706, - "step": 722, - "valid_targets_mean": 4541.4, - "valid_targets_min": 1352 + "loss_rank_avg": 0.14138385653495789, + "step": 2375, + "valid_targets_mean": 4383.0, + "valid_targets_min": 1267 }, { - "epoch": 2.9275949367088607, - "grad_norm": 0.30002042412455127, - "learning_rate": 1.76022227484387e-05, - "loss": 0.2644, + "epoch": 2.409113924050633, + "grad_norm": 0.3206066119244924, + "learning_rate": 2.473140798436285e-05, + "loss": 0.3221, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03764427825808525, - "step": 723, - "valid_targets_mean": 5619.6, - "valid_targets_min": 1286 + "loss_rank_avg": 0.150266632437706, + "step": 2380, + "valid_targets_mean": 3835.7, + "valid_targets_min": 1476 }, { - "epoch": 2.9316455696202532, - "grad_norm": 0.25999917059624383, - "learning_rate": 1.7546085994643807e-05, - "loss": 0.2476, + "epoch": 2.4141772151898735, + "grad_norm": 0.30473370037176944, + "learning_rate": 2.466272325536329e-05, + "loss": 0.3015, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020120546221733093, - "step": 724, - "valid_targets_mean": 3131.7, - "valid_targets_min": 1088 + "loss_rank_avg": 0.19058401882648468, + "step": 2385, + "valid_targets_mean": 6384.8, + "valid_targets_min": 935 }, { - "epoch": 2.9356962025316458, - "grad_norm": 0.24301031977739251, - "learning_rate": 1.7489968862279902e-05, - "loss": 0.2401, + "epoch": 2.419240506329114, + "grad_norm": 0.31479938811797176, + "learning_rate": 2.4593980324080054e-05, + "loss": 0.3267, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030156666412949562, - "step": 725, - "valid_targets_mean": 4536.0, - "valid_targets_min": 889 + "loss_rank_avg": 0.11872898042201996, + "step": 2390, + "valid_targets_mean": 3557.1, + "valid_targets_min": 1099 }, { - "epoch": 2.939746835443038, - "grad_norm": 0.2834351965141156, - "learning_rate": 1.743387180005809e-05, - "loss": 0.2653, + "epoch": 2.4243037974683546, + "grad_norm": 0.3392666541666469, + "learning_rate": 2.4525180048594452e-05, + "loss": 0.3163, "loss_nan_ranks": 0, - "loss_rank_avg": 0.052154481410980225, - "step": 726, - "valid_targets_mean": 7368.1, - "valid_targets_min": 1318 + "loss_rank_avg": 0.15524475276470184, + "step": 2395, + "valid_targets_mean": 4042.5, + "valid_targets_min": 1334 }, { - "epoch": 2.9437974683544303, - "grad_norm": 0.27572670971938895, - "learning_rate": 1.737779525652899e-05, - "loss": 0.2646, + "epoch": 2.429367088607595, + "grad_norm": 0.3116160865837986, + "learning_rate": 2.445632328770354e-05, + "loss": 0.3079, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024445312097668648, - "step": 727, - "valid_targets_mean": 3959.4, - "valid_targets_min": 1066 + "loss_rank_avg": 0.14736303687095642, + "step": 2400, + "valid_targets_mean": 4778.8, + "valid_targets_min": 1194 }, { - "epoch": 2.947848101265823, - "grad_norm": 0.2704592646522639, - "learning_rate": 1.732173968007914e-05, - "loss": 0.2597, + "epoch": 2.4344303797468356, + "grad_norm": 0.3248761370284341, + "learning_rate": 2.438741090090951e-05, + "loss": 0.3148, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02838369458913803, - "step": 728, - "valid_targets_mean": 3935.4, - "valid_targets_min": 1243 + "loss_rank_avg": 0.1985064595937729, + "step": 2405, + "valid_targets_mean": 6140.9, + "valid_targets_min": 1429 }, { - "epoch": 2.951898734177215, - "grad_norm": 0.2685193260978328, - "learning_rate": 1.7265705518927437e-05, - "loss": 0.2485, + "epoch": 2.439493670886076, + "grad_norm": 0.2819598660386808, + "learning_rate": 2.431844374840886e-05, + "loss": 0.3066, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02375240996479988, - "step": 729, - "valid_targets_mean": 3453.5, - "valid_targets_min": 1300 + "loss_rank_avg": 0.13578057289123535, + "step": 2410, + "valid_targets_mean": 4853.0, + "valid_targets_min": 1602 }, { - "epoch": 2.9559493670886074, - "grad_norm": 0.2578427303145989, - "learning_rate": 1.7209693221121542e-05, - "loss": 0.254, + "epoch": 2.4445569620253167, + "grad_norm": 0.31885986982040326, + "learning_rate": 2.4249422691081722e-05, + "loss": 0.3057, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023774366825819016, - "step": 730, - "valid_targets_mean": 3456.4, - "valid_targets_min": 1458 + "loss_rank_avg": 0.15936186909675598, + "step": 2415, + "valid_targets_mean": 4181.2, + "valid_targets_min": 1349 }, { - "epoch": 2.96, - "grad_norm": 0.2546217929799125, - "learning_rate": 1.7153703234534302e-05, - "loss": 0.2468, + "epoch": 2.449620253164557, + "grad_norm": 0.30296726343845826, + "learning_rate": 2.4180348590481075e-05, + "loss": 0.3026, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02758769877254963, - "step": 731, - "valid_targets_mean": 4289.4, - "valid_targets_min": 976 + "loss_rank_avg": 0.12165558338165283, + "step": 2420, + "valid_targets_mean": 3419.1, + "valid_targets_min": 883 }, { - "epoch": 2.9640506329113925, - "grad_norm": 0.2654879948700078, - "learning_rate": 1.709773600686015e-05, - "loss": 0.2595, + "epoch": 2.4546835443037973, + "grad_norm": 0.309483869681844, + "learning_rate": 2.411122230882203e-05, + "loss": 0.3102, "loss_nan_ranks": 0, - "loss_rank_avg": 0.04343331605195999, - "step": 732, - "valid_targets_mean": 5717.3, - "valid_targets_min": 1722 + "loss_rank_avg": 0.1562597155570984, + "step": 2425, + "valid_targets_mean": 4920.2, + "valid_targets_min": 983 }, { - "epoch": 2.968101265822785, - "grad_norm": 0.25748033286051897, - "learning_rate": 1.7041791985611548e-05, - "loss": 0.2487, + "epoch": 2.459746835443038, + "grad_norm": 1.2885193054997446, + "learning_rate": 2.4042044708971027e-05, + "loss": 0.3045, "loss_nan_ranks": 0, - "loss_rank_avg": 0.050244733691215515, - "step": 733, - "valid_targets_mean": 6772.3, - "valid_targets_min": 1419 + "loss_rank_avg": 0.1394486129283905, + "step": 2430, + "valid_targets_mean": 4328.4, + "valid_targets_min": 1162 }, { - "epoch": 2.972151898734177, - "grad_norm": 0.2542798539074249, - "learning_rate": 1.69858716181154e-05, - "loss": 0.2452, + "epoch": 2.4648101265822784, + "grad_norm": 0.27206002510649857, + "learning_rate": 2.3972816654435103e-05, + "loss": 0.3161, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026083718985319138, - "step": 734, - "valid_targets_mean": 3278.6, - "valid_targets_min": 770 + "loss_rank_avg": 0.16065040230751038, + "step": 2435, + "valid_targets_mean": 6525.3, + "valid_targets_min": 1098 }, { - "epoch": 2.9762025316455696, - "grad_norm": 0.2572335702204434, - "learning_rate": 1.692997535150948e-05, - "loss": 0.2444, + "epoch": 2.469873417721519, + "grad_norm": 0.3427940884240252, + "learning_rate": 2.390353900935107e-05, + "loss": 0.3198, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03589995950460434, - "step": 735, - "valid_targets_mean": 5458.4, - "valid_targets_min": 1202 + "loss_rank_avg": 0.19091764092445374, + "step": 2440, + "valid_targets_mean": 4360.0, + "valid_targets_min": 1133 }, { - "epoch": 2.980253164556962, - "grad_norm": 0.254374544775352, - "learning_rate": 1.687410363273884e-05, - "loss": 0.2538, + "epoch": 2.4749367088607594, + "grad_norm": 0.42327373841330385, + "learning_rate": 2.3834212638474773e-05, + "loss": 0.306, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029865849763154984, - "step": 736, - "valid_targets_mean": 4263.7, - "valid_targets_min": 1376 + "loss_rank_avg": 0.12319037318229675, + "step": 2445, + "valid_targets_mean": 3837.3, + "valid_targets_min": 873 }, { - "epoch": 2.984303797468354, - "grad_norm": 0.26497805309307315, - "learning_rate": 1.6818256908552257e-05, - "loss": 0.2593, + "epoch": 2.48, + "grad_norm": 0.49287136211086113, + "learning_rate": 2.376483840717026e-05, + "loss": 0.3236, "loss_nan_ranks": 0, - "loss_rank_avg": 0.038646139204502106, - "step": 737, - "valid_targets_mean": 5068.5, - "valid_targets_min": 1461 + "loss_rank_avg": 0.15457025170326233, + "step": 2450, + "valid_targets_mean": 3929.6, + "valid_targets_min": 1205 }, { - "epoch": 2.9883544303797467, - "grad_norm": 0.2569838835553243, - "learning_rate": 1.6762435625498646e-05, - "loss": 0.2453, + "epoch": 2.4850632911392405, + "grad_norm": 0.32265608836896076, + "learning_rate": 2.3695417181399004e-05, + "loss": 0.3138, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02871629409492016, - "step": 738, - "valid_targets_mean": 4765.9, - "valid_targets_min": 1531 + "loss_rank_avg": 0.13117042183876038, + "step": 2455, + "valid_targets_mean": 3683.8, + "valid_targets_min": 1015 }, { - "epoch": 2.992405063291139, - "grad_norm": 0.2530215798640709, - "learning_rate": 1.67066402299235e-05, - "loss": 0.2662, + "epoch": 2.490126582278481, + "grad_norm": 0.28497626230687617, + "learning_rate": 2.362594982770909e-05, + "loss": 0.3071, "loss_nan_ranks": 0, - "loss_rank_avg": 0.036961380392313004, - "step": 739, - "valid_targets_mean": 5032.2, - "valid_targets_min": 1194 + "loss_rank_avg": 0.13846123218536377, + "step": 2460, + "valid_targets_mean": 4725.8, + "valid_targets_min": 1651 }, { - "epoch": 2.9964556962025317, - "grad_norm": 0.26245999391543, - "learning_rate": 1.6650871167965313e-05, - "loss": 0.2544, + "epoch": 2.4951898734177216, + "grad_norm": 0.30556606202877784, + "learning_rate": 2.3556437213224378e-05, + "loss": 0.3149, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02777264080941677, - "step": 740, - "valid_targets_mean": 3679.3, - "valid_targets_min": 1109 + "loss_rank_avg": 0.1903924196958542, + "step": 2465, + "valid_targets_mean": 5195.9, + "valid_targets_min": 1317 }, { - "epoch": 3.0, - "grad_norm": 0.2793670359330504, - "learning_rate": 1.6595128885552028e-05, - "loss": 0.2518, + "epoch": 2.500253164556962, + "grad_norm": 0.2957801020557521, + "learning_rate": 2.348688020563371e-05, + "loss": 0.3202, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03139267489314079, - "step": 741, - "valid_targets_mean": 3354.4, - "valid_targets_min": 977 + "loss_rank_avg": 0.15225407481193542, + "step": 2470, + "valid_targets_mean": 4614.5, + "valid_targets_min": 892 }, { - "epoch": 3.0040506329113925, - "grad_norm": 0.4518355029730338, - "learning_rate": 1.653941382839745e-05, - "loss": 0.1875, + "epoch": 2.5053164556962026, + "grad_norm": 0.29068264222113155, + "learning_rate": 2.3417279673180048e-05, + "loss": 0.312, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029589787125587463, - "step": 742, - "valid_targets_mean": 5513.1, - "valid_targets_min": 1195 + "loss_rank_avg": 0.15005727112293243, + "step": 2475, + "valid_targets_mean": 5253.2, + "valid_targets_min": 1364 }, { - "epoch": 3.008101265822785, - "grad_norm": 0.3131036035935897, - "learning_rate": 1.6483726441997673e-05, - "loss": 0.1707, + "epoch": 2.510379746835443, + "grad_norm": 0.34824655792906656, + "learning_rate": 2.3347636484649662e-05, + "loss": 0.3107, "loss_nan_ranks": 0, - "loss_rank_avg": 0.022383853793144226, - "step": 743, - "valid_targets_mean": 5414.9, + "loss_rank_avg": 0.11774547398090363, + "step": 2480, + "valid_targets_mean": 3077.8, "valid_targets_min": 1062 }, { - "epoch": 3.012151898734177, - "grad_norm": 0.43578496818594536, - "learning_rate": 1.642806717162757e-05, - "loss": 0.1798, + "epoch": 2.5154430379746833, + "grad_norm": 0.3255931573908884, + "learning_rate": 2.3277951509361273e-05, + "loss": 0.3317, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018505379557609558, - "step": 744, - "valid_targets_mean": 3892.1, - "valid_targets_min": 1224 + "loss_rank_avg": 0.20479710400104523, + "step": 2485, + "valid_targets_mean": 5133.7, + "valid_targets_min": 1091 }, { - "epoch": 3.0162025316455696, - "grad_norm": 0.5303532489283547, - "learning_rate": 1.637243646233718e-05, - "loss": 0.1797, + "epoch": 2.520506329113924, + "grad_norm": 0.30263280385351665, + "learning_rate": 2.3208225617155206e-05, + "loss": 0.3121, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02654384821653366, - "step": 745, - "valid_targets_mean": 4359.8, - "valid_targets_min": 1391 + "loss_rank_avg": 0.14676184952259064, + "step": 2490, + "valid_targets_mean": 4788.9, + "valid_targets_min": 1004 }, { - "epoch": 3.020253164556962, - "grad_norm": 0.36041463705304083, - "learning_rate": 1.6316834758948174e-05, - "loss": 0.1723, + "epoch": 2.5255696202531643, + "grad_norm": 0.34344447055299104, + "learning_rate": 2.3138459678382524e-05, + "loss": 0.3156, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01781952753663063, - "step": 746, - "valid_targets_mean": 4052.8, - "valid_targets_min": 1279 + "loss_rank_avg": 0.18363483250141144, + "step": 2495, + "valid_targets_mean": 4170.4, + "valid_targets_min": 1007 }, { - "epoch": 3.0243037974683546, - "grad_norm": 0.3558315031597273, - "learning_rate": 1.6261262506050282e-05, - "loss": 0.1742, + "epoch": 2.530632911392405, + "grad_norm": 0.2844384480121892, + "learning_rate": 2.3068654563894183e-05, + "loss": 0.3172, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01695730909705162, - "step": 747, - "valid_targets_mean": 3121.8, - "valid_targets_min": 788 + "loss_rank_avg": 0.12592104077339172, + "step": 2500, + "valid_targets_mean": 4493.2, + "valid_targets_min": 1334 }, { - "epoch": 3.0283544303797467, - "grad_norm": 0.38372608971688293, - "learning_rate": 1.620572014799777e-05, - "loss": 0.1711, + "epoch": 2.5356962025316454, + "grad_norm": 0.32631729959150463, + "learning_rate": 2.2998811145030117e-05, + "loss": 0.3101, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026282865554094315, - "step": 748, - "valid_targets_mean": 5316.9, - "valid_targets_min": 1490 + "loss_rank_avg": 0.15357106924057007, + "step": 2505, + "valid_targets_mean": 4361.8, + "valid_targets_min": 1071 }, { - "epoch": 3.0324050632911392, - "grad_norm": 0.3293468845617277, - "learning_rate": 1.6150208128905857e-05, - "loss": 0.1635, + "epoch": 2.540759493670886, + "grad_norm": 0.2650853449197667, + "learning_rate": 2.2928930293608435e-05, + "loss": 0.307, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02892507240176201, - "step": 749, - "valid_targets_mean": 5845.9, - "valid_targets_min": 1405 + "loss_rank_avg": 0.13759788870811462, + "step": 2510, + "valid_targets_mean": 5130.8, + "valid_targets_min": 1080 }, { - "epoch": 3.0364556962025318, - "grad_norm": 0.31716354074304054, - "learning_rate": 1.6094726892647147e-05, - "loss": 0.1717, + "epoch": 2.5458227848101265, + "grad_norm": 0.3267807848788021, + "learning_rate": 2.2859012881914464e-05, + "loss": 0.3037, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024220801889896393, - "step": 750, - "valid_targets_mean": 5615.8, - "valid_targets_min": 1425 + "loss_rank_avg": 0.16794613003730774, + "step": 2515, + "valid_targets_mean": 4673.6, + "valid_targets_min": 1083 }, { - "epoch": 3.0405063291139243, - "grad_norm": 0.3664828568121349, - "learning_rate": 1.6039276882848135e-05, - "loss": 0.1762, + "epoch": 2.550886075949367, + "grad_norm": 0.278879282582265, + "learning_rate": 2.278905978268992e-05, + "loss": 0.2928, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013677317649126053, - "step": 751, - "valid_targets_mean": 3208.0, - "valid_targets_min": 970 + "loss_rank_avg": 0.11442182958126068, + "step": 2520, + "valid_targets_mean": 4874.6, + "valid_targets_min": 1101 }, { - "epoch": 3.0445569620253163, - "grad_norm": 0.33903502187956835, - "learning_rate": 1.5983858542885635e-05, - "loss": 0.1735, + "epoch": 2.5559493670886075, + "grad_norm": 0.32085560175743244, + "learning_rate": 2.271907186912196e-05, + "loss": 0.3119, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018305784091353416, - "step": 752, - "valid_targets_mean": 3494.2, - "valid_targets_min": 1546 + "loss_rank_avg": 0.12187394499778748, + "step": 2525, + "valid_targets_mean": 3503.1, + "valid_targets_min": 882 }, { - "epoch": 3.048607594936709, - "grad_norm": 0.3498757139361586, - "learning_rate": 1.5928472315883203e-05, - "loss": 0.1754, + "epoch": 2.561012658227848, + "grad_norm": 0.3284259732307929, + "learning_rate": 2.2649050014832326e-05, + "loss": 0.3112, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014736996963620186, - "step": 753, - "valid_targets_mean": 3807.8, - "valid_targets_min": 1288 + "loss_rank_avg": 0.16201844811439514, + "step": 2530, + "valid_targets_mean": 4727.4, + "valid_targets_min": 1505 }, { - "epoch": 3.0526582278481014, - "grad_norm": 0.34193040921745776, - "learning_rate": 1.5873118644707633e-05, - "loss": 0.1822, + "epoch": 2.5660759493670886, + "grad_norm": 0.2991569094124765, + "learning_rate": 2.2578995093866424e-05, + "loss": 0.3126, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02990785986185074, - "step": 754, - "valid_targets_mean": 5325.8, - "valid_targets_min": 1245 + "loss_rank_avg": 0.1367436945438385, + "step": 2535, + "valid_targets_mean": 5286.4, + "valid_targets_min": 1301 }, { - "epoch": 3.056708860759494, - "grad_norm": 0.33397160073921883, - "learning_rate": 1.5817797971965413e-05, - "loss": 0.1821, + "epoch": 2.571139240506329, + "grad_norm": 0.2923839601296598, + "learning_rate": 2.2508907980682406e-05, + "loss": 0.3066, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014044428244233131, - "step": 755, - "valid_targets_mean": 3148.8, - "valid_targets_min": 1382 + "loss_rank_avg": 0.15937817096710205, + "step": 2540, + "valid_targets_mean": 5033.3, + "valid_targets_min": 1409 }, { - "epoch": 3.060759493670886, - "grad_norm": 0.3323577994417775, - "learning_rate": 1.576251073999917e-05, - "loss": 0.1682, + "epoch": 2.5762025316455697, + "grad_norm": 0.29834149017437644, + "learning_rate": 2.2438789550140272e-05, + "loss": 0.3208, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01589152216911316, - "step": 756, - "valid_targets_mean": 3347.2, - "valid_targets_min": 1044 + "loss_rank_avg": 0.1662355363368988, + "step": 2545, + "valid_targets_mean": 4474.9, + "valid_targets_min": 1458 }, { - "epoch": 3.0648101265822785, - "grad_norm": 0.33502996544962327, - "learning_rate": 1.5707257390884126e-05, - "loss": 0.1638, + "epoch": 2.58126582278481, + "grad_norm": 0.31248285204558157, + "learning_rate": 2.2368640677490935e-05, + "loss": 0.3111, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017870556563138962, - "step": 757, - "valid_targets_mean": 3450.2, - "valid_targets_min": 745 + "loss_rank_avg": 0.12987586855888367, + "step": 2550, + "valid_targets_mean": 4253.7, + "valid_targets_min": 1334 }, { - "epoch": 3.068860759493671, - "grad_norm": 0.30464635685328645, - "learning_rate": 1.5652038366424595e-05, - "loss": 0.1663, + "epoch": 2.5863291139240507, + "grad_norm": 0.2913592791130016, + "learning_rate": 2.2298462238365293e-05, + "loss": 0.3037, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024972693994641304, - "step": 758, - "valid_targets_mean": 5675.8, - "valid_targets_min": 1103 + "loss_rank_avg": 0.20642492175102234, + "step": 2555, + "valid_targets_mean": 6304.3, + "valid_targets_min": 1349 }, { - "epoch": 3.0729113924050635, - "grad_norm": 0.317761137059346, - "learning_rate": 1.5596854108150424e-05, - "loss": 0.1715, + "epoch": 2.5913924050632913, + "grad_norm": 0.35349413261981677, + "learning_rate": 2.222825510876332e-05, + "loss": 0.3202, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01955476403236389, - "step": 759, - "valid_targets_mean": 4004.8, - "valid_targets_min": 1108 + "loss_rank_avg": 0.17145583033561707, + "step": 2560, + "valid_targets_mean": 4713.6, + "valid_targets_min": 1409 }, { - "epoch": 3.0769620253164556, - "grad_norm": 0.29358636528638604, - "learning_rate": 1.5541705057313476e-05, - "loss": 0.1712, + "epoch": 2.596455696202532, + "grad_norm": 0.3451688511848506, + "learning_rate": 2.2158020165043114e-05, + "loss": 0.3105, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019088895991444588, - "step": 760, - "valid_targets_mean": 4483.4, - "valid_targets_min": 1176 + "loss_rank_avg": 0.16764461994171143, + "step": 2565, + "valid_targets_mean": 3924.4, + "valid_targets_min": 1285 }, { - "epoch": 3.081012658227848, - "grad_norm": 0.3073330115701503, - "learning_rate": 1.5486591654884086e-05, - "loss": 0.1718, + "epoch": 2.6015189873417723, + "grad_norm": 0.2797909726976478, + "learning_rate": 2.2087758283909954e-05, + "loss": 0.3196, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015020224265754223, - "step": 761, - "valid_targets_mean": 3669.2, - "valid_targets_min": 1121 + "loss_rank_avg": 0.1547449380159378, + "step": 2570, + "valid_targets_mean": 5481.8, + "valid_targets_min": 1376 }, { - "epoch": 3.0850632911392406, - "grad_norm": 0.299608173186423, - "learning_rate": 1.543151434154755e-05, - "loss": 0.1778, + "epoch": 2.606582278481013, + "grad_norm": 0.3101010476898593, + "learning_rate": 2.201747034240537e-05, + "loss": 0.3135, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02530589886009693, - "step": 762, - "valid_targets_mean": 5061.1, - "valid_targets_min": 1440 + "loss_rank_avg": 0.1627534031867981, + "step": 2575, + "valid_targets_mean": 4466.9, + "valid_targets_min": 987 }, { - "epoch": 3.089113924050633, - "grad_norm": 0.27825960482122103, - "learning_rate": 1.5376473557700615e-05, - "loss": 0.1802, + "epoch": 2.6116455696202534, + "grad_norm": 1.1618173704395123, + "learning_rate": 2.1947157217896188e-05, + "loss": 0.3095, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015576534904539585, - "step": 763, - "valid_targets_mean": 3578.2, - "valid_targets_min": 1242 + "loss_rank_avg": 0.165956050157547, + "step": 2580, + "valid_targets_mean": 4300.5, + "valid_targets_min": 778 }, { - "epoch": 3.093164556962025, - "grad_norm": 0.3316565116707584, - "learning_rate": 1.5321469743447893e-05, - "loss": 0.1749, + "epoch": 2.616708860759494, + "grad_norm": 0.31858634458407287, + "learning_rate": 2.1876819788063586e-05, + "loss": 0.3115, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024011485278606415, - "step": 764, - "valid_targets_mean": 4485.5, - "valid_targets_min": 1289 + "loss_rank_avg": 0.15230509638786316, + "step": 2585, + "valid_targets_mean": 3920.5, + "valid_targets_min": 1479 }, { - "epoch": 3.0972151898734177, - "grad_norm": 0.29137811908746475, - "learning_rate": 1.5266503338598434e-05, - "loss": 0.1722, + "epoch": 2.621772151898734, + "grad_norm": 0.30557996470586596, + "learning_rate": 2.1806458930892127e-05, + "loss": 0.3005, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025936177000403404, - "step": 765, - "valid_targets_mean": 4847.2, - "valid_targets_min": 1336 + "loss_rank_avg": 0.18883772194385529, + "step": 2590, + "valid_targets_mean": 6008.9, + "valid_targets_min": 1408 }, { - "epoch": 3.1012658227848102, - "grad_norm": 0.29389891839150223, - "learning_rate": 1.5211574782662149e-05, - "loss": 0.1728, + "epoch": 2.6268354430379746, + "grad_norm": 0.2883450262202285, + "learning_rate": 2.173607552465881e-05, + "loss": 0.3166, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021893944591283798, - "step": 766, - "valid_targets_mean": 5340.1, - "valid_targets_min": 1699 + "loss_rank_avg": 0.17338699102401733, + "step": 2595, + "valid_targets_mean": 5570.2, + "valid_targets_min": 1526 }, { - "epoch": 3.1053164556962027, - "grad_norm": 0.29807222422096724, - "learning_rate": 1.5156684514846307e-05, - "loss": 0.1695, + "epoch": 2.631898734177215, + "grad_norm": 0.33234113394369846, + "learning_rate": 2.1665670447922084e-05, + "loss": 0.3129, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02523096464574337, - "step": 767, - "valid_targets_mean": 4752.1, - "valid_targets_min": 875 + "loss_rank_avg": 0.12816452980041504, + "step": 2600, + "valid_targets_mean": 4282.2, + "valid_targets_min": 812 }, { - "epoch": 3.109367088607595, - "grad_norm": 0.30795542223986466, - "learning_rate": 1.5101832974052012e-05, - "loss": 0.1809, + "epoch": 2.6369620253164556, + "grad_norm": 0.31643919888427663, + "learning_rate": 2.1595244579510933e-05, + "loss": 0.3073, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026390738785266876, - "step": 768, - "valid_targets_mean": 4690.3, - "valid_targets_min": 885 + "loss_rank_avg": 0.17621192336082458, + "step": 2605, + "valid_targets_mean": 5060.8, + "valid_targets_min": 1369 }, { - "epoch": 3.1134177215189873, - "grad_norm": 0.31729949382152767, - "learning_rate": 1.5047020598870737e-05, - "loss": 0.18, + "epoch": 2.642025316455696, + "grad_norm": 0.30039427757871706, + "learning_rate": 2.1524798798513835e-05, + "loss": 0.3023, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02073775604367256, - "step": 769, - "valid_targets_mean": 4297.0, - "valid_targets_min": 1332 + "loss_rank_avg": 0.16294711828231812, + "step": 2610, + "valid_targets_mean": 4472.4, + "valid_targets_min": 1304 }, { - "epoch": 3.11746835443038, - "grad_norm": 0.3112664533988072, - "learning_rate": 1.4992247827580778e-05, - "loss": 0.1779, + "epoch": 2.6470886075949367, + "grad_norm": 0.3231328181820057, + "learning_rate": 2.1454333984267852e-05, + "loss": 0.322, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02091725915670395, - "step": 770, - "valid_targets_mean": 3788.1, - "valid_targets_min": 778 + "loss_rank_avg": 0.16857695579528809, + "step": 2615, + "valid_targets_mean": 5128.5, + "valid_targets_min": 1215 }, { - "epoch": 3.1215189873417724, - "grad_norm": 0.30766965175005956, - "learning_rate": 1.493751509814374e-05, - "loss": 0.1772, + "epoch": 2.6521518987341772, + "grad_norm": 0.331376941255876, + "learning_rate": 2.1383851016347614e-05, + "loss": 0.3105, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016842763870954514, - "step": 771, - "valid_targets_mean": 3957.2, - "valid_targets_min": 1020 + "loss_rank_avg": 0.1293996274471283, + "step": 2620, + "valid_targets_mean": 3696.2, + "valid_targets_min": 781 }, { - "epoch": 3.1255696202531644, - "grad_norm": 0.3033325549159383, - "learning_rate": 1.4882822848201073e-05, - "loss": 0.1714, + "epoch": 2.6572151898734178, + "grad_norm": 0.2898986778677013, + "learning_rate": 2.1313350774554366e-05, + "loss": 0.308, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03626658022403717, - "step": 772, - "valid_targets_mean": 7330.2, - "valid_targets_min": 1320 + "loss_rank_avg": 0.14066889882087708, + "step": 2625, + "valid_targets_mean": 4409.5, + "valid_targets_min": 1181 }, { - "epoch": 3.129620253164557, - "grad_norm": 0.3087107442257974, - "learning_rate": 1.4828171515070553e-05, - "loss": 0.1776, + "epoch": 2.6622784810126583, + "grad_norm": 0.3792545064079998, + "learning_rate": 2.1242834138904962e-05, + "loss": 0.3034, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02217162773013115, - "step": 773, - "valid_targets_mean": 4305.2, - "valid_targets_min": 1379 + "loss_rank_avg": 0.14408881962299347, + "step": 2630, + "valid_targets_mean": 3892.1, + "valid_targets_min": 1313 }, { - "epoch": 3.1336708860759495, - "grad_norm": 0.2828472592017574, - "learning_rate": 1.4773561535742793e-05, - "loss": 0.1567, + "epoch": 2.667341772151899, + "grad_norm": 0.34582044479522395, + "learning_rate": 2.1172301989620898e-05, + "loss": 0.3087, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025130566209554672, - "step": 774, - "valid_targets_mean": 5140.2, - "valid_targets_min": 1524 + "loss_rank_avg": 0.15445685386657715, + "step": 2635, + "valid_targets_mean": 4015.2, + "valid_targets_min": 743 }, { - "epoch": 3.137721518987342, - "grad_norm": 0.2879324146158943, - "learning_rate": 1.4718993346877715e-05, - "loss": 0.1734, + "epoch": 2.6724050632911394, + "grad_norm": 0.31734348855671907, + "learning_rate": 2.110175520711731e-05, + "loss": 0.3182, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01935396157205105, - "step": 775, - "valid_targets_mean": 4015.8, - "valid_targets_min": 1750 + "loss_rank_avg": 0.14247769117355347, + "step": 2640, + "valid_targets_mean": 4152.1, + "valid_targets_min": 1532 }, { - "epoch": 3.141772151898734, - "grad_norm": 0.2861796742039433, - "learning_rate": 1.466446738480111e-05, - "loss": 0.1625, + "epoch": 2.67746835443038, + "grad_norm": 0.3108375470698165, + "learning_rate": 2.1031194671992013e-05, + "loss": 0.3145, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023186706006526947, - "step": 776, - "valid_targets_mean": 5214.1, - "valid_targets_min": 1322 + "loss_rank_avg": 0.13973818719387054, + "step": 2645, + "valid_targets_mean": 4321.8, + "valid_targets_min": 1096 }, { - "epoch": 3.1458227848101266, - "grad_norm": 0.2797425568919168, - "learning_rate": 1.460998408550112e-05, - "loss": 0.1775, + "epoch": 2.68253164556962, + "grad_norm": 0.3378134291776038, + "learning_rate": 2.0960621265014462e-05, + "loss": 0.3105, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02018989436328411, - "step": 777, - "valid_targets_mean": 4186.8, - "valid_targets_min": 1263 + "loss_rank_avg": 0.17127710580825806, + "step": 2650, + "valid_targets_mean": 4009.1, + "valid_targets_min": 1241 }, { - "epoch": 3.149873417721519, - "grad_norm": 0.28560455048945466, - "learning_rate": 1.4555543884624751e-05, - "loss": 0.1679, + "epoch": 2.6875949367088605, + "grad_norm": 0.29939660412855834, + "learning_rate": 2.0890035867114808e-05, + "loss": 0.3131, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014386219903826714, - "step": 778, - "valid_targets_mean": 3198.4, - "valid_targets_min": 979 + "loss_rank_avg": 0.15695559978485107, + "step": 2655, + "valid_targets_mean": 5238.8, + "valid_targets_min": 1128 }, { - "epoch": 3.1539240506329116, - "grad_norm": 0.29510716090586336, - "learning_rate": 1.4501147217474402e-05, - "loss": 0.1673, + "epoch": 2.692658227848101, + "grad_norm": 0.3230482978021318, + "learning_rate": 2.081943935937286e-05, + "loss": 0.3075, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018298868089914322, - "step": 779, - "valid_targets_mean": 3162.6, - "valid_targets_min": 1179 + "loss_rank_avg": 0.1587713658809662, + "step": 2660, + "valid_targets_mean": 4748.3, + "valid_targets_min": 1323 }, { - "epoch": 3.1579746835443037, - "grad_norm": 0.29166498949590175, - "learning_rate": 1.444679451900437e-05, - "loss": 0.1723, + "epoch": 2.6977215189873416, + "grad_norm": 0.29402248645547446, + "learning_rate": 2.0748832623007117e-05, + "loss": 0.3217, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03829861432313919, - "step": 780, - "valid_targets_mean": 8741.9, - "valid_targets_min": 1108 + "loss_rank_avg": 0.10896088182926178, + "step": 2665, + "valid_targets_mean": 3750.2, + "valid_targets_min": 1426 }, { - "epoch": 3.162025316455696, - "grad_norm": 0.27182783348812456, - "learning_rate": 1.4392486223817397e-05, - "loss": 0.173, + "epoch": 2.702784810126582, + "grad_norm": 0.27936531158953143, + "learning_rate": 2.067821653936375e-05, + "loss": 0.3005, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024232111871242523, - "step": 781, - "valid_targets_mean": 5093.5, - "valid_targets_min": 1537 + "loss_rank_avg": 0.1711559295654297, + "step": 2670, + "valid_targets_mean": 6852.6, + "valid_targets_min": 1209 }, { - "epoch": 3.1660759493670887, - "grad_norm": 0.29577602878611126, - "learning_rate": 1.4338222766161143e-05, - "loss": 0.1805, + "epoch": 2.7078481012658226, + "grad_norm": 0.28924656402783777, + "learning_rate": 2.060759198990561e-05, + "loss": 0.3086, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01894107460975647, - "step": 782, - "valid_targets_mean": 3929.9, - "valid_targets_min": 1014 + "loss_rank_avg": 0.14176371693611145, + "step": 2675, + "valid_targets_mean": 5084.8, + "valid_targets_min": 1430 }, { - "epoch": 3.170126582278481, - "grad_norm": 0.2899090365502566, - "learning_rate": 1.4284004579924777e-05, - "loss": 0.1637, + "epoch": 2.712911392405063, + "grad_norm": 0.3123266627731589, + "learning_rate": 2.053695985620123e-05, + "loss": 0.3074, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021671906113624573, - "step": 783, - "valid_targets_mean": 4770.2, - "valid_targets_min": 1293 + "loss_rank_avg": 0.15624725818634033, + "step": 2680, + "valid_targets_mean": 4276.9, + "valid_targets_min": 1150 }, { - "epoch": 3.1741772151898733, - "grad_norm": 0.27733370216002134, - "learning_rate": 1.4229832098635479e-05, - "loss": 0.1721, + "epoch": 2.7179746835443037, + "grad_norm": 0.30428837341523135, + "learning_rate": 2.0466321019913787e-05, + "loss": 0.3083, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024629831314086914, - "step": 784, - "valid_targets_mean": 6038.6, - "valid_targets_min": 1443 + "loss_rank_avg": 0.12444409728050232, + "step": 2685, + "valid_targets_mean": 5104.2, + "valid_targets_min": 1036 }, { - "epoch": 3.178227848101266, - "grad_norm": 0.3000817308258268, - "learning_rate": 1.4175705755454963e-05, - "loss": 0.1679, + "epoch": 2.7230379746835442, + "grad_norm": 0.2899541063761625, + "learning_rate": 2.0395676362790157e-05, + "loss": 0.3139, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0261150561273098, - "step": 785, - "valid_targets_mean": 4464.9, - "valid_targets_min": 1302 + "loss_rank_avg": 0.18925632536411285, + "step": 2690, + "valid_targets_mean": 6929.8, + "valid_targets_min": 1156 }, { - "epoch": 3.1822784810126583, - "grad_norm": 0.2785628374210658, - "learning_rate": 1.412162598317602e-05, - "loss": 0.1725, + "epoch": 2.728101265822785, + "grad_norm": 0.3000387772557557, + "learning_rate": 2.032502676664985e-05, + "loss": 0.3256, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020167309790849686, - "step": 786, - "valid_targets_mean": 4205.3, - "valid_targets_min": 1347 + "loss_rank_avg": 0.15346036851406097, + "step": 2695, + "valid_targets_mean": 4285.7, + "valid_targets_min": 1379 }, { - "epoch": 3.186329113924051, - "grad_norm": 0.2766193388956131, - "learning_rate": 1.406759321421907e-05, - "loss": 0.1722, + "epoch": 2.7331645569620253, + "grad_norm": 0.3021057118413619, + "learning_rate": 2.025437311337404e-05, + "loss": 0.3162, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012921443209052086, - "step": 787, - "valid_targets_mean": 3072.4, - "valid_targets_min": 1502 + "loss_rank_avg": 0.14610332250595093, + "step": 2700, + "valid_targets_mean": 4591.1, + "valid_targets_min": 1675 }, { - "epoch": 3.190379746835443, - "grad_norm": 0.2835705291271444, - "learning_rate": 1.401360788062871e-05, - "loss": 0.1718, + "epoch": 2.738227848101266, + "grad_norm": 0.32691877516784146, + "learning_rate": 2.0183716284894533e-05, + "loss": 0.3225, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012393220327794552, - "step": 788, - "valid_targets_mean": 3018.7, - "valid_targets_min": 879 + "loss_rank_avg": 0.14551448822021484, + "step": 2705, + "valid_targets_mean": 3989.2, + "valid_targets_min": 919 }, { - "epoch": 3.1944303797468354, - "grad_norm": 0.29540883185162925, - "learning_rate": 1.3959670414070204e-05, - "loss": 0.1649, + "epoch": 2.7432911392405064, + "grad_norm": 0.3657473594680315, + "learning_rate": 2.011305716318278e-05, + "loss": 0.3074, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026593632996082306, - "step": 789, - "valid_targets_mean": 4636.0, - "valid_targets_min": 1379 + "loss_rank_avg": 0.22241473197937012, + "step": 2710, + "valid_targets_mean": 6030.1, + "valid_targets_min": 1316 }, { - "epoch": 3.198481012658228, - "grad_norm": 0.2911916198878864, - "learning_rate": 1.3905781245826108e-05, - "loss": 0.1701, + "epoch": 2.748354430379747, + "grad_norm": 0.3038961961226564, + "learning_rate": 2.004239663023885e-05, + "loss": 0.3028, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027367331087589264, - "step": 790, - "valid_targets_mean": 4947.6, - "valid_targets_min": 1267 + "loss_rank_avg": 0.16947516798973083, + "step": 2715, + "valid_targets_mean": 5248.6, + "valid_targets_min": 1555 }, { - "epoch": 3.2025316455696204, - "grad_norm": 0.29329356188410366, - "learning_rate": 1.3851940806792778e-05, - "loss": 0.1624, + "epoch": 2.7534177215189874, + "grad_norm": 0.3050248442776484, + "learning_rate": 1.997173556808043e-05, + "loss": 0.294, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017680518329143524, - "step": 791, - "valid_targets_mean": 3998.1, - "valid_targets_min": 870 + "loss_rank_avg": 0.1630948781967163, + "step": 2720, + "valid_targets_mean": 5884.7, + "valid_targets_min": 895 }, { - "epoch": 3.2065822784810125, - "grad_norm": 0.3083830348494984, - "learning_rate": 1.379814952747693e-05, - "loss": 0.1696, + "epoch": 2.758481012658228, + "grad_norm": 0.35402206211471576, + "learning_rate": 1.9901074858731825e-05, + "loss": 0.2996, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025335486978292465, - "step": 792, - "valid_targets_mean": 5041.7, - "valid_targets_min": 1133 + "loss_rank_avg": 0.1823733150959015, + "step": 2725, + "valid_targets_mean": 4029.6, + "valid_targets_min": 1404 }, { - "epoch": 3.210632911392405, - "grad_norm": 0.2798046695321331, - "learning_rate": 1.3744407837992193e-05, - "loss": 0.1762, + "epoch": 2.7635443037974685, + "grad_norm": 0.2902629337004639, + "learning_rate": 1.98304153842129e-05, + "loss": 0.313, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020378734916448593, - "step": 793, - "valid_targets_mean": 4000.8, - "valid_targets_min": 1305 + "loss_rank_avg": 0.1647607535123825, + "step": 2730, + "valid_targets_mean": 5231.3, + "valid_targets_min": 1100 }, { - "epoch": 3.2146835443037975, - "grad_norm": 0.2781061493945947, - "learning_rate": 1.3690716168055692e-05, - "loss": 0.1723, + "epoch": 2.768607594936709, + "grad_norm": 0.2844550099011252, + "learning_rate": 1.9759758026528162e-05, + "loss": 0.3181, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029875535517930984, - "step": 794, - "valid_targets_mean": 6269.0, - "valid_targets_min": 1199 + "loss_rank_avg": 0.20809857547283173, + "step": 2735, + "valid_targets_mean": 6735.6, + "valid_targets_min": 1644 }, { - "epoch": 3.21873417721519, - "grad_norm": 0.28425247889799193, - "learning_rate": 1.363707494698459e-05, - "loss": 0.1747, + "epoch": 2.7736708860759496, + "grad_norm": 0.3348579330081765, + "learning_rate": 1.9689103667655634e-05, + "loss": 0.3088, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01849631406366825, - "step": 795, - "valid_targets_mean": 4173.1, - "valid_targets_min": 1121 + "loss_rank_avg": 0.1527232825756073, + "step": 2740, + "valid_targets_mean": 3877.4, + "valid_targets_min": 1491 }, { - "epoch": 3.222784810126582, - "grad_norm": 0.29167790078536165, - "learning_rate": 1.358348460369265e-05, - "loss": 0.1648, + "epoch": 2.77873417721519, + "grad_norm": 0.33857120124730805, + "learning_rate": 1.9618453189535958e-05, + "loss": 0.3168, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016716711223125458, - "step": 796, - "valid_targets_mean": 3498.2, - "valid_targets_min": 1028 + "loss_rank_avg": 0.20601877570152283, + "step": 2745, + "valid_targets_mean": 4548.2, + "valid_targets_min": 1232 }, { - "epoch": 3.2268354430379746, - "grad_norm": 0.2706793655937343, - "learning_rate": 1.352994556668684e-05, - "loss": 0.176, + "epoch": 2.7837974683544306, + "grad_norm": 0.3346701582946249, + "learning_rate": 1.9547807474061303e-05, + "loss": 0.2983, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02109239064157009, - "step": 797, - "valid_targets_mean": 4928.1, - "valid_targets_min": 1692 + "loss_rank_avg": 0.13511428236961365, + "step": 2750, + "valid_targets_mean": 3345.6, + "valid_targets_min": 1298 }, { - "epoch": 3.230886075949367, - "grad_norm": 0.2858053934845192, - "learning_rate": 1.3476458264063865e-05, - "loss": 0.1646, + "epoch": 2.7888607594936707, + "grad_norm": 0.3375591584241796, + "learning_rate": 1.947716740306439e-05, + "loss": 0.3089, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012462573125958443, - "step": 798, - "valid_targets_mean": 3088.8, - "valid_targets_min": 875 + "loss_rank_avg": 0.16758695244789124, + "step": 2755, + "valid_targets_mean": 3964.9, + "valid_targets_min": 1047 }, { - "epoch": 3.2349367088607597, - "grad_norm": 0.30039221272535604, - "learning_rate": 1.342302312350678e-05, - "loss": 0.17, + "epoch": 2.7939240506329113, + "grad_norm": 0.340384343325027, + "learning_rate": 1.9406533858307503e-05, + "loss": 0.3178, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016949564218521118, - "step": 799, - "valid_targets_mean": 3712.4, - "valid_targets_min": 1407 + "loss_rank_avg": 0.1304406374692917, + "step": 2760, + "valid_targets_mean": 3351.9, + "valid_targets_min": 1109 }, { - "epoch": 3.2389873417721518, - "grad_norm": 0.2787693166001655, - "learning_rate": 1.3369640572281537e-05, - "loss": 0.1693, + "epoch": 2.798987341772152, + "grad_norm": 0.29188887212643894, + "learning_rate": 1.9335907721471438e-05, + "loss": 0.3042, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020679127424955368, - "step": 800, - "valid_targets_mean": 4078.4, - "valid_targets_min": 1072 + "loss_rank_avg": 0.15948455035686493, + "step": 2765, + "valid_targets_mean": 4508.9, + "valid_targets_min": 970 }, { - "epoch": 3.2430379746835443, - "grad_norm": 0.29520428907315616, - "learning_rate": 1.3316311037233596e-05, - "loss": 0.1719, + "epoch": 2.8040506329113923, + "grad_norm": 0.2914363526376243, + "learning_rate": 1.9265289874144554e-05, + "loss": 0.3112, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018096426501870155, - "step": 801, - "valid_targets_mean": 3506.6, - "valid_targets_min": 1152 + "loss_rank_avg": 0.18582302331924438, + "step": 2770, + "valid_targets_mean": 6648.4, + "valid_targets_min": 2048 }, { - "epoch": 3.247088607594937, - "grad_norm": 0.2667786897026877, - "learning_rate": 1.326303494478451e-05, - "loss": 0.1734, + "epoch": 2.809113924050633, + "grad_norm": 0.3447263204953251, + "learning_rate": 1.9194681197811703e-05, + "loss": 0.3154, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023975923657417297, - "step": 802, - "valid_targets_mean": 5160.4, - "valid_targets_min": 1551 + "loss_rank_avg": 0.11537902057170868, + "step": 2775, + "valid_targets_mean": 3061.1, + "valid_targets_min": 1261 }, { - "epoch": 3.2511392405063293, - "grad_norm": 0.2770929210536329, - "learning_rate": 1.3209812720928472e-05, - "loss": 0.1756, + "epoch": 2.8141772151898734, + "grad_norm": 0.28824906654455074, + "learning_rate": 1.912408257384327e-05, + "loss": 0.3173, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027329906821250916, - "step": 803, - "valid_targets_mean": 5768.4, - "valid_targets_min": 1146 + "loss_rank_avg": 0.18753370642662048, + "step": 2780, + "valid_targets_mean": 6277.4, + "valid_targets_min": 1561 }, { - "epoch": 3.2551898734177214, - "grad_norm": 0.29416739016414467, - "learning_rate": 1.315664479122898e-05, - "loss": 0.1738, + "epoch": 2.819240506329114, + "grad_norm": 0.31551057795248955, + "learning_rate": 1.9053494883484177e-05, + "loss": 0.3012, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023470068350434303, - "step": 804, - "valid_targets_mean": 3871.2, - "valid_targets_min": 1226 + "loss_rank_avg": 0.1113462746143341, + "step": 2785, + "valid_targets_mean": 3603.0, + "valid_targets_min": 966 }, { - "epoch": 3.259240506329114, - "grad_norm": 0.2620201865177205, - "learning_rate": 1.3103531580815378e-05, - "loss": 0.1752, + "epoch": 2.8243037974683545, + "grad_norm": 0.3130169957194376, + "learning_rate": 1.898291900784284e-05, + "loss": 0.3045, "loss_nan_ranks": 0, - "loss_rank_avg": 0.022001316770911217, - "step": 805, - "valid_targets_mean": 5060.7, - "valid_targets_min": 1445 + "loss_rank_avg": 0.17591753602027893, + "step": 2790, + "valid_targets_mean": 4849.0, + "valid_targets_min": 1241 }, { - "epoch": 3.2632911392405064, - "grad_norm": 0.28078996386465055, - "learning_rate": 1.3050473514379488e-05, - "loss": 0.1706, + "epoch": 2.829367088607595, + "grad_norm": 0.36153291519489483, + "learning_rate": 1.8912355827880237e-05, + "loss": 0.3166, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02083248645067215, - "step": 806, - "valid_targets_mean": 4420.8, - "valid_targets_min": 1243 + "loss_rank_avg": 0.13046428561210632, + "step": 2795, + "valid_targets_mean": 3138.6, + "valid_targets_min": 1174 }, { - "epoch": 3.267341772151899, - "grad_norm": 0.27940811510151586, - "learning_rate": 1.2997471016172188e-05, - "loss": 0.1715, + "epoch": 2.8344303797468355, + "grad_norm": 0.35818567026601295, + "learning_rate": 1.884180622439884e-05, + "loss": 0.3105, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023604262620210648, - "step": 807, - "valid_targets_mean": 5010.2, - "valid_targets_min": 1028 + "loss_rank_avg": 0.15154902637004852, + "step": 2800, + "valid_targets_mean": 3774.6, + "valid_targets_min": 731 }, { - "epoch": 3.271392405063291, - "grad_norm": 0.2792666068611182, - "learning_rate": 1.2944524510000042e-05, - "loss": 0.1678, + "epoch": 2.839493670886076, + "grad_norm": 0.33153674409005546, + "learning_rate": 1.8771271078031648e-05, + "loss": 0.3064, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02203407883644104, - "step": 808, - "valid_targets_mean": 4796.6, - "valid_targets_min": 1529 + "loss_rank_avg": 0.17227354645729065, + "step": 2805, + "valid_targets_mean": 5559.2, + "valid_targets_min": 981 }, { - "epoch": 3.2754430379746835, - "grad_norm": 0.2770883931596026, - "learning_rate": 1.2891634419221908e-05, - "loss": 0.1798, + "epoch": 2.8445569620253166, + "grad_norm": 0.30646195273462673, + "learning_rate": 1.8700751269231243e-05, + "loss": 0.3028, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024457737803459167, - "step": 809, - "valid_targets_mean": 5265.9, - "valid_targets_min": 1340 + "loss_rank_avg": 0.17971369624137878, + "step": 2810, + "valid_targets_mean": 5315.5, + "valid_targets_min": 1235 }, { - "epoch": 3.279493670886076, - "grad_norm": 0.28475756468903735, - "learning_rate": 1.2838801166745545e-05, - "loss": 0.1717, + "epoch": 2.8496202531645567, + "grad_norm": 0.3147470597121468, + "learning_rate": 1.86302476782587e-05, + "loss": 0.3005, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0277986042201519, - "step": 810, - "valid_targets_mean": 4750.3, - "valid_targets_min": 1003 + "loss_rank_avg": 0.1408463716506958, + "step": 2815, + "valid_targets_mean": 3962.4, + "valid_targets_min": 820 }, { - "epoch": 3.2835443037974685, - "grad_norm": 0.28703035190856646, - "learning_rate": 1.2786025175024216e-05, - "loss": 0.172, + "epoch": 2.8546835443037972, + "grad_norm": 0.35549269402291417, + "learning_rate": 1.855976118517271e-05, + "loss": 0.3075, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028684845194220543, - "step": 811, - "valid_targets_mean": 5778.2, - "valid_targets_min": 1094 + "loss_rank_avg": 0.19130544364452362, + "step": 2820, + "valid_targets_mean": 4855.5, + "valid_targets_min": 959 }, { - "epoch": 3.2875949367088606, - "grad_norm": 0.2575044676747565, - "learning_rate": 1.2733306866053357e-05, - "loss": 0.1786, + "epoch": 2.8597468354430378, + "grad_norm": 0.2823602428424915, + "learning_rate": 1.8489292669818494e-05, + "loss": 0.3066, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016220424324274063, - "step": 812, - "valid_targets_mean": 3783.6, - "valid_targets_min": 939 + "loss_rank_avg": 0.1796441674232483, + "step": 2825, + "valid_targets_mean": 5933.9, + "valid_targets_min": 1652 }, { - "epoch": 3.291645569620253, - "grad_norm": 0.2668553861153468, - "learning_rate": 1.2680646661367163e-05, - "loss": 0.1779, + "epoch": 2.8648101265822783, + "grad_norm": 0.3086469012489834, + "learning_rate": 1.8418843011816894e-05, + "loss": 0.3113, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0175221785902977, - "step": 813, - "valid_targets_mean": 4105.9, - "valid_targets_min": 1251 + "loss_rank_avg": 0.14876966178417206, + "step": 2830, + "valid_targets_mean": 4459.1, + "valid_targets_min": 1431 }, { - "epoch": 3.2956962025316456, - "grad_norm": 0.2898200450052223, - "learning_rate": 1.2628044982035219e-05, - "loss": 0.1715, + "epoch": 2.869873417721519, + "grad_norm": 0.33219619271405165, + "learning_rate": 1.8348413090553356e-05, + "loss": 0.3147, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021266207098960876, - "step": 814, - "valid_targets_mean": 4000.8, - "valid_targets_min": 1101 + "loss_rank_avg": 0.1837439239025116, + "step": 2835, + "valid_targets_mean": 5042.5, + "valid_targets_min": 1015 }, { - "epoch": 3.299746835443038, - "grad_norm": 0.2777976709411515, - "learning_rate": 1.2575502248659155e-05, - "loss": 0.1737, + "epoch": 2.8749367088607594, + "grad_norm": 0.28160060670414677, + "learning_rate": 1.8278003785166967e-05, + "loss": 0.2942, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01444375328719616, - "step": 815, - "valid_targets_mean": 3121.9, - "valid_targets_min": 1417 + "loss_rank_avg": 0.13023114204406738, + "step": 2840, + "valid_targets_mean": 4408.2, + "valid_targets_min": 1093 }, { - "epoch": 3.3037974683544302, - "grad_norm": 0.2556688244111565, - "learning_rate": 1.2523018881369269e-05, - "loss": 0.1747, + "epoch": 2.88, + "grad_norm": 0.2701611603230975, + "learning_rate": 1.8207615974539476e-05, + "loss": 0.3064, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02820611745119095, - "step": 816, - "valid_targets_mean": 6372.2, - "valid_targets_min": 1123 + "loss_rank_avg": 0.12874723970890045, + "step": 2845, + "valid_targets_mean": 4858.8, + "valid_targets_min": 863 }, { - "epoch": 3.3078481012658227, - "grad_norm": 0.2828983328250475, - "learning_rate": 1.2470595299821176e-05, - "loss": 0.1729, + "epoch": 2.8850632911392404, + "grad_norm": 0.33079935847646824, + "learning_rate": 1.8137250537284325e-05, + "loss": 0.3093, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02478950098156929, - "step": 817, - "valid_targets_mean": 5102.2, - "valid_targets_min": 515 + "loss_rank_avg": 0.17800703644752502, + "step": 2850, + "valid_targets_mean": 4857.1, + "valid_targets_min": 1713 }, { - "epoch": 3.3118987341772153, - "grad_norm": 0.2663885169819304, - "learning_rate": 1.2418231923192416e-05, - "loss": 0.1725, + "epoch": 2.890126582278481, + "grad_norm": 0.23595757958591787, + "learning_rate": 1.8066908351735666e-05, + "loss": 0.3019, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03402366489171982, - "step": 818, - "valid_targets_mean": 7563.4, - "valid_targets_min": 1038 + "loss_rank_avg": 0.1269092708826065, + "step": 2855, + "valid_targets_mean": 5766.2, + "valid_targets_min": 1183 }, { - "epoch": 3.3159493670886078, - "grad_norm": 0.28119186515935485, - "learning_rate": 1.2365929170179165e-05, - "loss": 0.1658, + "epoch": 2.8951898734177215, + "grad_norm": 0.33538165041242207, + "learning_rate": 1.7996590295937448e-05, + "loss": 0.3038, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018654121086001396, - "step": 819, - "valid_targets_mean": 4249.9, - "valid_targets_min": 1297 + "loss_rank_avg": 0.1899298131465912, + "step": 2860, + "valid_targets_mean": 5260.9, + "valid_targets_min": 1500 }, { - "epoch": 3.32, - "grad_norm": 0.2650192587281139, - "learning_rate": 1.2313687458992848e-05, - "loss": 0.1682, + "epoch": 2.900253164556962, + "grad_norm": 0.3124645131733068, + "learning_rate": 1.7926297247632383e-05, + "loss": 0.3207, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014561069197952747, - "step": 820, - "valid_targets_mean": 3717.8, - "valid_targets_min": 1111 + "loss_rank_avg": 0.1435328722000122, + "step": 2865, + "valid_targets_mean": 3785.1, + "valid_targets_min": 1048 }, { - "epoch": 3.3240506329113924, - "grad_norm": 0.2677650578515307, - "learning_rate": 1.2261507207356785e-05, - "loss": 0.1667, + "epoch": 2.9053164556962026, + "grad_norm": 0.3305614452127962, + "learning_rate": 1.7856030084251045e-05, + "loss": 0.3098, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018807832151651382, - "step": 821, - "valid_targets_mean": 4245.8, - "valid_targets_min": 1257 + "loss_rank_avg": 0.1552894562482834, + "step": 2870, + "valid_targets_mean": 4255.3, + "valid_targets_min": 844 }, { - "epoch": 3.328101265822785, - "grad_norm": 0.28086766974582916, - "learning_rate": 1.220938883250288e-05, - "loss": 0.1779, + "epoch": 2.910379746835443, + "grad_norm": 0.34874837954634086, + "learning_rate": 1.7785789682900908e-05, + "loss": 0.3082, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02263430505990982, - "step": 822, - "valid_targets_mean": 4172.3, - "valid_targets_min": 1631 + "loss_rank_avg": 0.14497388899326324, + "step": 2875, + "valid_targets_mean": 3286.0, + "valid_targets_min": 1279 }, { - "epoch": 3.3321518987341774, - "grad_norm": 0.26787693516554345, - "learning_rate": 1.215733275116827e-05, - "loss": 0.1644, + "epoch": 2.9154430379746836, + "grad_norm": 0.3321474887097409, + "learning_rate": 1.771557692035537e-05, + "loss": 0.3062, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020411396399140358, - "step": 823, - "valid_targets_mean": 4443.4, - "valid_targets_min": 1111 + "loss_rank_avg": 0.14671984314918518, + "step": 2880, + "valid_targets_mean": 4362.1, + "valid_targets_min": 897 }, { - "epoch": 3.3362025316455695, - "grad_norm": 0.291485421895171, - "learning_rate": 1.2105339379592005e-05, - "loss": 0.1754, + "epoch": 2.920506329113924, + "grad_norm": 0.36664765144869893, + "learning_rate": 1.7645392673042853e-05, + "loss": 0.3298, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023871038109064102, - "step": 824, - "valid_targets_mean": 4766.5, - "valid_targets_min": 1127 + "loss_rank_avg": 0.20450638234615326, + "step": 2885, + "valid_targets_mean": 3744.9, + "valid_targets_min": 536 }, { - "epoch": 3.340253164556962, - "grad_norm": 0.2620819618225538, - "learning_rate": 1.2053409133511681e-05, - "loss": 0.1691, + "epoch": 2.9255696202531647, + "grad_norm": 0.30800064070091415, + "learning_rate": 1.757523781703581e-05, + "loss": 0.3135, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019396033138036728, - "step": 825, - "valid_targets_mean": 4995.4, - "valid_targets_min": 1031 + "loss_rank_avg": 0.13620543479919434, + "step": 2890, + "valid_targets_mean": 4010.2, + "valid_targets_min": 739 }, { - "epoch": 3.3443037974683545, - "grad_norm": 0.26617063956170284, - "learning_rate": 1.2001542428160173e-05, - "loss": 0.1668, + "epoch": 2.9306329113924052, + "grad_norm": 0.39874064817665694, + "learning_rate": 1.7505113228039848e-05, + "loss": 0.3146, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01592746004462242, - "step": 826, - "valid_targets_mean": 3031.6, - "valid_targets_min": 1360 + "loss_rank_avg": 0.17097340524196625, + "step": 2895, + "valid_targets_mean": 4104.1, + "valid_targets_min": 1037 }, { - "epoch": 3.348354430379747, - "grad_norm": 0.2791441439661935, - "learning_rate": 1.1949739678262278e-05, - "loss": 0.159, + "epoch": 2.9356962025316458, + "grad_norm": 0.30804629166037306, + "learning_rate": 1.7435019781382737e-05, + "loss": 0.297, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01481454074382782, - "step": 827, - "valid_targets_mean": 3479.9, - "valid_targets_min": 1489 + "loss_rank_avg": 0.14079098403453827, + "step": 2900, + "valid_targets_mean": 4534.0, + "valid_targets_min": 887 }, { - "epoch": 3.352405063291139, - "grad_norm": 0.2788019727068902, - "learning_rate": 1.189800129803139e-05, - "loss": 0.1652, + "epoch": 2.9407594936708863, + "grad_norm": 0.306220364172818, + "learning_rate": 1.7364958352003548e-05, + "loss": 0.3174, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014004124328494072, - "step": 828, - "valid_targets_mean": 3138.6, - "valid_targets_min": 1117 + "loss_rank_avg": 0.17645467817783356, + "step": 2905, + "valid_targets_mean": 4994.2, + "valid_targets_min": 1148 }, { - "epoch": 3.3564556962025316, - "grad_norm": 0.27843872235351913, - "learning_rate": 1.1846327701166228e-05, - "loss": 0.1662, + "epoch": 2.945822784810127, + "grad_norm": 0.3179633647838338, + "learning_rate": 1.7294929814441666e-05, + "loss": 0.3231, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018882954493165016, - "step": 829, - "valid_targets_mean": 3859.1, - "valid_targets_min": 1149 + "loss_rank_avg": 0.16111071407794952, + "step": 2910, + "valid_targets_mean": 4938.6, + "valid_targets_min": 1387 }, { - "epoch": 3.360506329113924, - "grad_norm": 0.27454904391431767, - "learning_rate": 1.1794719300847493e-05, - "loss": 0.1706, + "epoch": 2.9508860759493674, + "grad_norm": 0.31641771807614866, + "learning_rate": 1.7224935042825912e-05, + "loss": 0.3152, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014589566737413406, - "step": 830, - "valid_targets_mean": 3264.4, - "valid_targets_min": 1556 + "loss_rank_avg": 0.16131725907325745, + "step": 2915, + "valid_targets_mean": 4423.9, + "valid_targets_min": 810 }, { - "epoch": 3.3645569620253166, - "grad_norm": 0.28428328898750455, - "learning_rate": 1.1743176509734582e-05, - "loss": 0.1635, + "epoch": 2.9559493670886074, + "grad_norm": 0.2900729323232742, + "learning_rate": 1.7154974910863646e-05, + "loss": 0.3077, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021249011158943176, - "step": 831, - "valid_targets_mean": 4815.2, - "valid_targets_min": 1592 + "loss_rank_avg": 0.10794432461261749, + "step": 2920, + "valid_targets_mean": 3454.4, + "valid_targets_min": 1456 }, { - "epoch": 3.3686075949367087, - "grad_norm": 0.2925951397601006, - "learning_rate": 1.1691699739962275e-05, - "loss": 0.1711, + "epoch": 2.961012658227848, + "grad_norm": 0.3432477720018123, + "learning_rate": 1.7085050291829813e-05, + "loss": 0.3036, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016693107783794403, - "step": 832, - "valid_targets_mean": 3980.7, - "valid_targets_min": 1435 + "loss_rank_avg": 0.15215110778808594, + "step": 2925, + "valid_targets_mean": 4236.8, + "valid_targets_min": 1238 }, { - "epoch": 3.372658227848101, - "grad_norm": 0.2907892696205683, - "learning_rate": 1.1640289403137456e-05, - "loss": 0.1617, + "epoch": 2.9660759493670885, + "grad_norm": 0.31507659068073507, + "learning_rate": 1.7015162058556088e-05, + "loss": 0.3208, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02736719697713852, - "step": 833, - "valid_targets_mean": 5829.9, - "valid_targets_min": 913 + "loss_rank_avg": 0.16133321821689606, + "step": 2930, + "valid_targets_mean": 4263.9, + "valid_targets_min": 795 }, { - "epoch": 3.3767088607594937, - "grad_norm": 0.2887674410665316, - "learning_rate": 1.1588945910335802e-05, - "loss": 0.1661, + "epoch": 2.971139240506329, + "grad_norm": 0.29285158348676216, + "learning_rate": 1.6945311083419947e-05, + "loss": 0.2976, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029096435755491257, - "step": 834, - "valid_targets_mean": 5260.5, - "valid_targets_min": 1214 + "loss_rank_avg": 0.1742202341556549, + "step": 2935, + "valid_targets_mean": 5417.0, + "valid_targets_min": 1360 }, { - "epoch": 3.3807594936708862, - "grad_norm": 0.2724871828550331, - "learning_rate": 1.1537669672098517e-05, - "loss": 0.168, + "epoch": 2.9762025316455696, + "grad_norm": 0.2918602342182722, + "learning_rate": 1.68754982383338e-05, + "loss": 0.3007, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02586859092116356, - "step": 835, - "valid_targets_mean": 5010.1, - "valid_targets_min": 1034 + "loss_rank_avg": 0.1684986650943756, + "step": 2940, + "valid_targets_mean": 5456.4, + "valid_targets_min": 1200 }, { - "epoch": 3.3848101265822783, - "grad_norm": 0.28626628867865767, - "learning_rate": 1.1486461098429036e-05, - "loss": 0.1749, + "epoch": 2.98126582278481, + "grad_norm": 0.30561630865463585, + "learning_rate": 1.6805724394734107e-05, + "loss": 0.3008, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015445518307387829, - "step": 836, - "valid_targets_mean": 4448.1, - "valid_targets_min": 819 + "loss_rank_avg": 0.10106837749481201, + "step": 2945, + "valid_targets_mean": 3017.5, + "valid_targets_min": 1279 }, { - "epoch": 3.388860759493671, - "grad_norm": 0.26727985163226237, - "learning_rate": 1.1435320598789747e-05, - "loss": 0.181, + "epoch": 2.9863291139240506, + "grad_norm": 0.29442534234659645, + "learning_rate": 1.6735990423570477e-05, + "loss": 0.3234, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020043276250362396, - "step": 837, - "valid_targets_mean": 4645.7, - "valid_targets_min": 1255 + "loss_rank_avg": 0.13497650623321533, + "step": 2950, + "valid_targets_mean": 3993.2, + "valid_targets_min": 1015 }, { - "epoch": 3.3929113924050633, - "grad_norm": 0.2726651228835583, - "learning_rate": 1.1384248582098736e-05, - "loss": 0.1696, + "epoch": 2.991392405063291, + "grad_norm": 0.2700507094407274, + "learning_rate": 1.666629719529483e-05, + "loss": 0.3185, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016197284683585167, - "step": 838, - "valid_targets_mean": 3677.2, - "valid_targets_min": 1308 + "loss_rank_avg": 0.15681613981723785, + "step": 2955, + "valid_targets_mean": 4630.2, + "valid_targets_min": 1176 }, { - "epoch": 3.396962025316456, - "grad_norm": 0.27571953261817456, - "learning_rate": 1.1333245456726467e-05, - "loss": 0.1722, + "epoch": 2.9964556962025317, + "grad_norm": 0.3447693878105072, + "learning_rate": 1.6596645579850505e-05, + "loss": 0.3107, "loss_nan_ranks": 0, - "loss_rank_avg": 0.029869263991713524, - "step": 839, - "valid_targets_mean": 5654.1, - "valid_targets_min": 1572 + "loss_rank_avg": 0.1600763201713562, + "step": 2960, + "valid_targets_mean": 3677.3, + "valid_targets_min": 1107 }, { - "epoch": 3.401012658227848, - "grad_norm": 0.27600908329749335, - "learning_rate": 1.128231163049258e-05, - "loss": 0.1731, + "epoch": 3.001012658227848, + "grad_norm": 0.29622484390463744, + "learning_rate": 1.6527036446661396e-05, + "loss": 0.3123, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016275284811854362, - "step": 840, - "valid_targets_mean": 4025.7, - "valid_targets_min": 1054 + "loss_rank_avg": 0.14927029609680176, + "step": 2965, + "valid_targets_mean": 4862.4, + "valid_targets_min": 1322 }, { - "epoch": 3.4050632911392404, - "grad_norm": 0.25713871920251963, - "learning_rate": 1.123144751066259e-05, - "loss": 0.1746, + "epoch": 3.0060759493670886, + "grad_norm": 0.29785828676100873, + "learning_rate": 1.645747066462115e-05, + "loss": 0.3021, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027817074209451675, - "step": 841, - "valid_targets_mean": 5710.4, - "valid_targets_min": 1580 + "loss_rank_avg": 0.19526249170303345, + "step": 2970, + "valid_targets_mean": 5885.6, + "valid_targets_min": 1447 }, { - "epoch": 3.409113924050633, - "grad_norm": 0.2825201455489712, - "learning_rate": 1.1180653503944657e-05, - "loss": 0.1723, + "epoch": 3.011139240506329, + "grad_norm": 0.35112230405979555, + "learning_rate": 1.638794910208225e-05, + "loss": 0.2898, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027706153690814972, - "step": 842, - "valid_targets_mean": 5161.7, - "valid_targets_min": 1480 + "loss_rank_avg": 0.10469535738229752, + "step": 2975, + "valid_targets_mean": 2940.2, + "valid_targets_min": 1159 }, { - "epoch": 3.4131645569620255, - "grad_norm": 0.2858818538810382, - "learning_rate": 1.1129930016486287e-05, - "loss": 0.1712, + "epoch": 3.0162025316455696, + "grad_norm": 0.3772315133305102, + "learning_rate": 1.631847262684523e-05, + "loss": 0.3108, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023515108972787857, - "step": 843, - "valid_targets_mean": 4839.2, - "valid_targets_min": 1786 + "loss_rank_avg": 0.16602087020874023, + "step": 2980, + "valid_targets_mean": 4357.8, + "valid_targets_min": 1389 }, { - "epoch": 3.4172151898734175, - "grad_norm": 0.2638128074444224, - "learning_rate": 1.107927745387114e-05, - "loss": 0.1707, + "epoch": 3.02126582278481, + "grad_norm": 0.30911220110322923, + "learning_rate": 1.624904210614782e-05, + "loss": 0.3033, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01564435288310051, - "step": 844, - "valid_targets_mean": 3269.4, - "valid_targets_min": 920 + "loss_rank_avg": 0.1612570881843567, + "step": 2985, + "valid_targets_mean": 4494.7, + "valid_targets_min": 1244 }, { - "epoch": 3.42126582278481, - "grad_norm": 0.2695433501417177, - "learning_rate": 1.1028696221115766e-05, - "loss": 0.1731, + "epoch": 3.0263291139240507, + "grad_norm": 0.3517033326754291, + "learning_rate": 1.6179658406654113e-05, + "loss": 0.2958, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03138864040374756, - "step": 845, - "valid_targets_mean": 6159.3, - "valid_targets_min": 1067 + "loss_rank_avg": 0.17958399653434753, + "step": 2990, + "valid_targets_mean": 4044.6, + "valid_targets_min": 1324 }, { - "epoch": 3.4253164556962026, - "grad_norm": 0.266064573836415, - "learning_rate": 1.0978186722666341e-05, - "loss": 0.1696, + "epoch": 3.031392405063291, + "grad_norm": 0.28226563989162823, + "learning_rate": 1.6110322394443773e-05, + "loss": 0.2853, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026126345619559288, - "step": 846, - "valid_targets_mean": 5643.2, - "valid_targets_min": 1514 + "loss_rank_avg": 0.11023816466331482, + "step": 2995, + "valid_targets_mean": 4105.8, + "valid_targets_min": 1267 }, { - "epoch": 3.429367088607595, - "grad_norm": 0.2758817540755521, - "learning_rate": 1.0927749362395475e-05, - "loss": 0.1726, + "epoch": 3.0364556962025318, + "grad_norm": 0.3395674314450887, + "learning_rate": 1.60410349350012e-05, + "loss": 0.3034, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015297709964215755, - "step": 847, - "valid_targets_mean": 3850.0, - "valid_targets_min": 1189 + "loss_rank_avg": 0.17015889286994934, + "step": 3000, + "valid_targets_mean": 5613.8, + "valid_targets_min": 1423 }, { - "epoch": 3.433417721518987, - "grad_norm": 0.26901779395475367, - "learning_rate": 1.0877384543598954e-05, - "loss": 0.1774, + "epoch": 3.0415189873417723, + "grad_norm": 0.3328019254619737, + "learning_rate": 1.597179689320474e-05, + "loss": 0.3068, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021081728860735893, - "step": 848, - "valid_targets_mean": 4777.1, - "valid_targets_min": 1144 + "loss_rank_avg": 0.16885250806808472, + "step": 3005, + "valid_targets_mean": 4626.2, + "valid_targets_min": 1034 }, { - "epoch": 3.4374683544303797, - "grad_norm": 0.27724722669699176, - "learning_rate": 1.0827092668992529e-05, - "loss": 0.178, + "epoch": 3.046582278481013, + "grad_norm": 0.30137258092800007, + "learning_rate": 1.5902609133315888e-05, + "loss": 0.3026, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02179073728621006, - "step": 849, - "valid_targets_mean": 5052.2, - "valid_targets_min": 1071 + "loss_rank_avg": 0.14555740356445312, + "step": 3010, + "valid_targets_mean": 5118.1, + "valid_targets_min": 1378 }, { - "epoch": 3.441518987341772, - "grad_norm": 0.2641162290763693, - "learning_rate": 1.077687414070869e-05, - "loss": 0.1687, + "epoch": 3.0516455696202534, + "grad_norm": 0.32251822208809916, + "learning_rate": 1.5833472518968478e-05, + "loss": 0.3049, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02662193402647972, - "step": 850, - "valid_targets_mean": 5878.2, - "valid_targets_min": 1193 + "loss_rank_avg": 0.13226641714572906, + "step": 3015, + "valid_targets_mean": 4081.9, + "valid_targets_min": 1428 }, { - "epoch": 3.4455696202531647, - "grad_norm": 0.25669905999278697, - "learning_rate": 1.0726729360293451e-05, - "loss": 0.1698, + "epoch": 3.056708860759494, + "grad_norm": 0.3211101552341726, + "learning_rate": 1.5764387913157952e-05, + "loss": 0.3185, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021715868264436722, - "step": 851, - "valid_targets_mean": 5160.4, - "valid_targets_min": 1104 + "loss_rank_avg": 0.0971713736653328, + "step": 3020, + "valid_targets_mean": 3146.8, + "valid_targets_min": 1380 }, { - "epoch": 3.449620253164557, - "grad_norm": 0.25970259826495146, - "learning_rate": 1.0676658728703147e-05, - "loss": 0.1628, + "epoch": 3.061772151898734, + "grad_norm": 0.3360390373855527, + "learning_rate": 1.5695356178230527e-05, + "loss": 0.3062, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018533840775489807, - "step": 852, - "valid_targets_mean": 3411.4, - "valid_targets_min": 1243 + "loss_rank_avg": 0.12469463050365448, + "step": 3025, + "valid_targets_mean": 3152.8, + "valid_targets_min": 1004 }, { - "epoch": 3.4536708860759493, - "grad_norm": 0.25457353336282595, - "learning_rate": 1.0626662646301198e-05, - "loss": 0.1716, + "epoch": 3.0668354430379745, + "grad_norm": 0.29385960506365055, + "learning_rate": 1.5626378175872486e-05, + "loss": 0.2921, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014588287100195885, - "step": 853, - "valid_targets_mean": 4137.9, - "valid_targets_min": 1310 + "loss_rank_avg": 0.16584698855876923, + "step": 3030, + "valid_targets_mean": 5578.9, + "valid_targets_min": 1238 }, { - "epoch": 3.457721518987342, - "grad_norm": 0.24824852868665856, - "learning_rate": 1.0576741512854958e-05, - "loss": 0.1697, + "epoch": 3.071898734177215, + "grad_norm": 0.32789137808560437, + "learning_rate": 1.5557454767099382e-05, + "loss": 0.2935, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017550557851791382, - "step": 854, - "valid_targets_mean": 4064.2, - "valid_targets_min": 1137 + "loss_rank_avg": 0.17390216886997223, + "step": 3035, + "valid_targets_mean": 4741.9, + "valid_targets_min": 1219 }, { - "epoch": 3.4617721518987343, - "grad_norm": 0.26572522596932885, - "learning_rate": 1.052689572753247e-05, - "loss": 0.1725, + "epoch": 3.0769620253164556, + "grad_norm": 0.3234941533877886, + "learning_rate": 1.5488586812245302e-05, + "loss": 0.3027, "loss_nan_ranks": 0, - "loss_rank_avg": 0.028516778722405434, - "step": 855, - "valid_targets_mean": 5678.3, - "valid_targets_min": 1583 + "loss_rank_avg": 0.17046749591827393, + "step": 3040, + "valid_targets_mean": 4481.4, + "valid_targets_min": 1174 }, { - "epoch": 3.4658227848101264, - "grad_norm": 0.26457860513561665, - "learning_rate": 1.0477125688899328e-05, - "loss": 0.1692, + "epoch": 3.082025316455696, + "grad_norm": 0.28075430375987626, + "learning_rate": 1.541977517095215e-05, + "loss": 0.3003, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020979564636945724, - "step": 856, - "valid_targets_mean": 4732.6, - "valid_targets_min": 886 + "loss_rank_avg": 0.14547553658485413, + "step": 3045, + "valid_targets_mean": 5410.0, + "valid_targets_min": 1228 }, { - "epoch": 3.469873417721519, - "grad_norm": 0.25261090215661935, - "learning_rate": 1.0427431794915406e-05, - "loss": 0.1619, + "epoch": 3.0870886075949366, + "grad_norm": 0.28635813947594835, + "learning_rate": 1.5351020702158875e-05, + "loss": 0.3099, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01421372964978218, - "step": 857, - "valid_targets_mean": 2682.5, - "valid_targets_min": 1229 + "loss_rank_avg": 0.13441359996795654, + "step": 3050, + "valid_targets_mean": 5051.8, + "valid_targets_min": 960 }, { - "epoch": 3.4739240506329114, - "grad_norm": 0.25156130882787403, - "learning_rate": 1.0377814442931769e-05, - "loss": 0.1707, + "epoch": 3.092151898734177, + "grad_norm": 0.34800039286889395, + "learning_rate": 1.52823242640908e-05, + "loss": 0.3096, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0210129264742136, - "step": 858, - "valid_targets_mean": 4471.9, - "valid_targets_min": 1063 + "loss_rank_avg": 0.15158754587173462, + "step": 3055, + "valid_targets_mean": 3697.4, + "valid_targets_min": 1086 }, { - "epoch": 3.477974683544304, - "grad_norm": 0.274113112378801, - "learning_rate": 1.0328274029687439e-05, - "loss": 0.1753, + "epoch": 3.0972151898734177, + "grad_norm": 0.32310977392742785, + "learning_rate": 1.5213686714248852e-05, + "loss": 0.3082, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021651387214660645, - "step": 859, - "valid_targets_mean": 4218.9, - "valid_targets_min": 772 + "loss_rank_avg": 0.16765525937080383, + "step": 3060, + "valid_targets_mean": 4845.2, + "valid_targets_min": 1334 }, { - "epoch": 3.482025316455696, - "grad_norm": 0.26377135539628055, - "learning_rate": 1.0278810951306253e-05, - "loss": 0.1658, + "epoch": 3.1022784810126582, + "grad_norm": 0.296724917732042, + "learning_rate": 1.5145108909398896e-05, + "loss": 0.3042, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02407141774892807, - "step": 860, - "valid_targets_mean": 5740.3, - "valid_targets_min": 1286 + "loss_rank_avg": 0.14354108273983002, + "step": 3065, + "valid_targets_mean": 4783.9, + "valid_targets_min": 1333 }, { - "epoch": 3.4860759493670885, - "grad_norm": 0.25760012748068395, - "learning_rate": 1.0229425603293647e-05, - "loss": 0.1673, + "epoch": 3.1073417721518988, + "grad_norm": 0.42288485229336514, + "learning_rate": 1.5076591705561056e-05, + "loss": 0.3054, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016751490533351898, - "step": 861, - "valid_targets_mean": 3865.2, - "valid_targets_min": 1616 + "loss_rank_avg": 0.16323769092559814, + "step": 3070, + "valid_targets_mean": 3961.5, + "valid_targets_min": 1451 }, { - "epoch": 3.490126582278481, - "grad_norm": 0.2536224368663142, - "learning_rate": 1.0180118380533558e-05, - "loss": 0.1765, + "epoch": 3.1124050632911393, + "grad_norm": 0.29888536735808585, + "learning_rate": 1.5008135957998968e-05, + "loss": 0.3097, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019702903926372528, - "step": 862, - "valid_targets_mean": 4204.1, - "valid_targets_min": 1170 + "loss_rank_avg": 0.16281160712242126, + "step": 3075, + "valid_targets_mean": 5095.7, + "valid_targets_min": 989 }, { - "epoch": 3.4941772151898736, - "grad_norm": 0.2858251933318063, - "learning_rate": 1.0130889677285225e-05, - "loss": 0.1702, + "epoch": 3.11746835443038, + "grad_norm": 0.32246738234879885, + "learning_rate": 1.4939742521209178e-05, + "loss": 0.3136, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030851341784000397, - "step": 863, - "valid_targets_mean": 4783.8, - "valid_targets_min": 1175 + "loss_rank_avg": 0.142366424202919, + "step": 3080, + "valid_targets_mean": 3786.1, + "valid_targets_min": 776 }, { - "epoch": 3.4982278481012656, - "grad_norm": 0.2588318742109123, - "learning_rate": 1.008173988718005e-05, - "loss": 0.1736, + "epoch": 3.1225316455696204, + "grad_norm": 0.2885458143447489, + "learning_rate": 1.4871412248910416e-05, + "loss": 0.3112, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01750990003347397, - "step": 864, - "valid_targets_mean": 3935.4, - "valid_targets_min": 921 + "loss_rank_avg": 0.1399984061717987, + "step": 3085, + "valid_targets_mean": 5652.7, + "valid_targets_min": 1058 }, { - "epoch": 3.502278481012658, - "grad_norm": 0.2761022183429568, - "learning_rate": 1.0032669403218439e-05, - "loss": 0.1706, + "epoch": 3.127594936708861, + "grad_norm": 0.3436698843544968, + "learning_rate": 1.480314599403296e-05, + "loss": 0.3051, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023192841559648514, - "step": 865, - "valid_targets_mean": 4564.4, - "valid_targets_min": 1572 + "loss_rank_avg": 0.12947720289230347, + "step": 3090, + "valid_targets_mean": 3113.7, + "valid_targets_min": 1129 }, { - "epoch": 3.5063291139240507, - "grad_norm": 0.2769011221878136, - "learning_rate": 9.983678617766677e-06, - "loss": 0.1741, + "epoch": 3.1326582278481014, + "grad_norm": 0.3034361762747695, + "learning_rate": 1.4734944608708022e-05, + "loss": 0.2819, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014613568782806396, - "step": 866, - "valid_targets_mean": 3809.8, - "valid_targets_min": 1867 + "loss_rank_avg": 0.1270906925201416, + "step": 3095, + "valid_targets_mean": 4283.6, + "valid_targets_min": 1371 }, { - "epoch": 3.510379746835443, - "grad_norm": 0.27145474600848807, - "learning_rate": 9.934767922553788e-06, - "loss": 0.1683, + "epoch": 3.137721518987342, + "grad_norm": 0.2910061490791564, + "learning_rate": 1.4666808944257044e-05, + "loss": 0.3011, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016787836328148842, - "step": 867, - "valid_targets_mean": 3861.8, - "valid_targets_min": 980 + "loss_rank_avg": 0.1065504252910614, + "step": 3100, + "valid_targets_mean": 4013.8, + "valid_targets_min": 1748 }, { - "epoch": 3.5144303797468357, - "grad_norm": 0.28588768149583893, - "learning_rate": 9.885937708668371e-06, - "loss": 0.1724, + "epoch": 3.1427848101265825, + "grad_norm": 0.29375562777524983, + "learning_rate": 1.459873985118115e-05, + "loss": 0.2913, "loss_nan_ranks": 0, - "loss_rank_avg": 0.030413717031478882, - "step": 868, - "valid_targets_mean": 5795.8, - "valid_targets_min": 1129 + "loss_rank_avg": 0.20948265492916107, + "step": 3105, + "valid_targets_mean": 6254.6, + "valid_targets_min": 855 }, { - "epoch": 3.5184810126582278, - "grad_norm": 0.27730137605077443, - "learning_rate": 9.837188366555524e-06, - "loss": 0.1703, + "epoch": 3.1478481012658226, + "grad_norm": 0.32610939195307226, + "learning_rate": 1.4530738179150445e-05, + "loss": 0.3091, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023630110546946526, - "step": 869, - "valid_targets_mean": 4854.9, - "valid_targets_min": 1130 + "loss_rank_avg": 0.16477391123771667, + "step": 3110, + "valid_targets_mean": 4672.6, + "valid_targets_min": 1423 }, { - "epoch": 3.5225316455696203, - "grad_norm": 0.26416487489880813, - "learning_rate": 9.788520286013691e-06, - "loss": 0.1707, + "epoch": 3.152911392405063, + "grad_norm": 0.34376610077811187, + "learning_rate": 1.44628047769935e-05, + "loss": 0.2952, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018917936831712723, - "step": 870, - "valid_targets_mean": 3930.0, - "valid_targets_min": 1135 + "loss_rank_avg": 0.12125621736049652, + "step": 3115, + "valid_targets_mean": 3024.9, + "valid_targets_min": 1154 }, { - "epoch": 3.526582278481013, - "grad_norm": 0.2705474279313182, - "learning_rate": 9.739933856191537e-06, - "loss": 0.1753, + "epoch": 3.1579746835443037, + "grad_norm": 0.24961470242170858, + "learning_rate": 1.4394940492686666e-05, + "loss": 0.3028, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01810012012720108, - "step": 871, - "valid_targets_mean": 3565.3, - "valid_targets_min": 1006 + "loss_rank_avg": 0.18956081569194794, + "step": 3120, + "valid_targets_mean": 8850.2, + "valid_targets_min": 1106 }, { - "epoch": 3.530632911392405, - "grad_norm": 0.2634513861085076, - "learning_rate": 9.691429465584866e-06, - "loss": 0.1716, + "epoch": 3.163037974683544, + "grad_norm": 0.3213177133533683, + "learning_rate": 1.4327146173343561e-05, + "loss": 0.3098, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024609217420220375, - "step": 872, - "valid_targets_mean": 4994.6, - "valid_targets_min": 1070 + "loss_rank_avg": 0.16393819451332092, + "step": 3125, + "valid_targets_mean": 4351.1, + "valid_targets_min": 1322 }, { - "epoch": 3.5346835443037974, - "grad_norm": 0.2701987987515959, - "learning_rate": 9.643007502033492e-06, - "loss": 0.1644, + "epoch": 3.1681012658227847, + "grad_norm": 0.3207864682050949, + "learning_rate": 1.4259422665204458e-05, + "loss": 0.3097, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019655343145132065, - "step": 873, - "valid_targets_mean": 4291.4, - "valid_targets_min": 1074 + "loss_rank_avg": 0.19772255420684814, + "step": 3130, + "valid_targets_mean": 5234.6, + "valid_targets_min": 1247 }, { - "epoch": 3.53873417721519, - "grad_norm": 0.2845364092602511, - "learning_rate": 9.594668352718152e-06, - "loss": 0.1774, + "epoch": 3.1731645569620253, + "grad_norm": 0.3306680712851493, + "learning_rate": 1.4191770813625732e-05, + "loss": 0.3019, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014933746308088303, - "step": 874, - "valid_targets_mean": 3257.0, - "valid_targets_min": 820 + "loss_rank_avg": 0.1288958340883255, + "step": 3135, + "valid_targets_mean": 3816.3, + "valid_targets_min": 1339 }, { - "epoch": 3.5427848101265824, - "grad_norm": 0.27663014137321357, - "learning_rate": 9.546412404157383e-06, - "loss": 0.1707, + "epoch": 3.178227848101266, + "grad_norm": 0.33226796696180216, + "learning_rate": 1.4124191463069305e-05, + "loss": 0.2918, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01676180027425289, - "step": 875, - "valid_targets_mean": 3740.1, - "valid_targets_min": 910 + "loss_rank_avg": 0.1820456087589264, + "step": 3140, + "valid_targets_mean": 4462.9, + "valid_targets_min": 1300 }, { - "epoch": 3.546835443037975, - "grad_norm": 0.2869278908470468, - "learning_rate": 9.498240042204471e-06, - "loss": 0.1826, + "epoch": 3.1832911392405063, + "grad_norm": 0.25066525871506057, + "learning_rate": 1.4056685457092122e-05, + "loss": 0.2981, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017435681074857712, - "step": 876, - "valid_targets_mean": 3564.0, - "valid_targets_min": 942 + "loss_rank_avg": 0.108408123254776, + "step": 3145, + "valid_targets_mean": 4348.1, + "valid_targets_min": 954 }, { - "epoch": 3.550886075949367, - "grad_norm": 0.2525536913472693, - "learning_rate": 9.450151652044339e-06, - "loss": 0.1682, + "epoch": 3.188354430379747, + "grad_norm": 0.2799683914898738, + "learning_rate": 1.3989253638335577e-05, + "loss": 0.3048, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02018481120467186, - "step": 877, - "valid_targets_mean": 5247.9, - "valid_targets_min": 1322 + "loss_rank_avg": 0.140926331281662, + "step": 3150, + "valid_targets_mean": 5334.3, + "valid_targets_min": 686 }, { - "epoch": 3.5549367088607595, - "grad_norm": 0.26135470090742097, - "learning_rate": 9.402147618190472e-06, - "loss": 0.1752, + "epoch": 3.1934177215189874, + "grad_norm": 0.364904271269176, + "learning_rate": 1.3921896848515064e-05, + "loss": 0.3009, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024542193859815598, - "step": 878, - "valid_targets_mean": 4611.9, - "valid_targets_min": 1269 + "loss_rank_avg": 0.1611071527004242, + "step": 3155, + "valid_targets_mean": 3846.9, + "valid_targets_min": 1050 }, { - "epoch": 3.558987341772152, - "grad_norm": 0.26586642166295227, - "learning_rate": 9.354228324481841e-06, - "loss": 0.1678, + "epoch": 3.198481012658228, + "grad_norm": 0.31469570152651777, + "learning_rate": 1.385461592840939e-05, + "loss": 0.2914, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026726705953478813, - "step": 879, - "valid_targets_mean": 5407.9, - "valid_targets_min": 1536 + "loss_rank_avg": 0.16711470484733582, + "step": 3160, + "valid_targets_mean": 4945.6, + "valid_targets_min": 1265 }, { - "epoch": 3.563037974683544, - "grad_norm": 0.27983180794676255, - "learning_rate": 9.306394154079852e-06, - "loss": 0.1763, + "epoch": 3.2035443037974685, + "grad_norm": 0.40871004437684494, + "learning_rate": 1.3787411717850358e-05, + "loss": 0.2874, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015666600316762924, - "step": 880, - "valid_targets_mean": 3015.9, - "valid_targets_min": 1245 + "loss_rank_avg": 0.14797160029411316, + "step": 3165, + "valid_targets_mean": 3367.9, + "valid_targets_min": 1380 }, { - "epoch": 3.5670886075949366, - "grad_norm": 0.2575519741288266, - "learning_rate": 9.258645489465254e-06, - "loss": 0.17, + "epoch": 3.208607594936709, + "grad_norm": 0.3063676754341602, + "learning_rate": 1.3720285055712222e-05, + "loss": 0.3, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020282434299588203, - "step": 881, - "valid_targets_mean": 3948.4, - "valid_targets_min": 1163 + "loss_rank_avg": 0.12202514708042145, + "step": 3170, + "valid_targets_mean": 3846.6, + "valid_targets_min": 1062 }, { - "epoch": 3.571139240506329, - "grad_norm": 0.2653082828382865, - "learning_rate": 9.21098271243509e-06, - "loss": 0.178, + "epoch": 3.2136708860759495, + "grad_norm": 0.33920699542879207, + "learning_rate": 1.3653236779901236e-05, + "loss": 0.3022, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01743317022919655, - "step": 882, - "valid_targets_mean": 4008.4, - "valid_targets_min": 1868 + "loss_rank_avg": 0.08881708979606628, + "step": 3175, + "valid_targets_mean": 2589.6, + "valid_targets_min": 1310 }, { - "epoch": 3.5751898734177217, - "grad_norm": 0.27186636332643244, - "learning_rate": 9.163406204099659e-06, - "loss": 0.1708, + "epoch": 3.21873417721519, + "grad_norm": 0.3118478710909973, + "learning_rate": 1.3586267727345232e-05, + "loss": 0.3031, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019136447459459305, - "step": 883, - "valid_targets_mean": 4040.6, - "valid_targets_min": 897 + "loss_rank_avg": 0.13673335313796997, + "step": 3180, + "valid_targets_mean": 4171.1, + "valid_targets_min": 1119 }, { - "epoch": 3.579240506329114, - "grad_norm": 0.257092667145981, - "learning_rate": 9.115916344879452e-06, - "loss": 0.158, + "epoch": 3.22379746835443, + "grad_norm": 0.3657648017370462, + "learning_rate": 1.35193787339831e-05, + "loss": 0.2944, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014980191364884377, - "step": 884, - "valid_targets_mean": 3076.8, - "valid_targets_min": 1085 + "loss_rank_avg": 0.13656803965568542, + "step": 3185, + "valid_targets_mean": 4186.0, + "valid_targets_min": 1616 }, { - "epoch": 3.5832911392405062, - "grad_norm": 0.27330926886995605, - "learning_rate": 9.068513514502126e-06, - "loss": 0.1807, + "epoch": 3.2288607594936707, + "grad_norm": 0.29575714520451474, + "learning_rate": 1.3452570634754434e-05, + "loss": 0.2919, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03542338311672211, - "step": 885, - "valid_targets_mean": 4332.9, - "valid_targets_min": 1017 + "loss_rank_avg": 0.14770013093948364, + "step": 3190, + "valid_targets_mean": 5272.9, + "valid_targets_min": 1046 }, { - "epoch": 3.5873417721518988, - "grad_norm": 0.2668971182685187, - "learning_rate": 9.021198091999439e-06, - "loss": 0.1725, + "epoch": 3.233924050632911, + "grad_norm": 0.29496009702650783, + "learning_rate": 1.3385844263589033e-05, + "loss": 0.2984, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015454728156328201, - "step": 886, - "valid_targets_mean": 3425.2, - "valid_targets_min": 1040 + "loss_rank_avg": 0.17479810118675232, + "step": 3195, + "valid_targets_mean": 5597.1, + "valid_targets_min": 1196 }, { - "epoch": 3.5913924050632913, - "grad_norm": 0.27206850542620964, - "learning_rate": 8.973970455704251e-06, - "loss": 0.1787, + "epoch": 3.2389873417721518, + "grad_norm": 0.35581177858551544, + "learning_rate": 1.3319200453396548e-05, + "loss": 0.294, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014722486957907677, - "step": 887, - "valid_targets_mean": 3363.7, - "valid_targets_min": 893 + "loss_rank_avg": 0.15500982105731964, + "step": 3200, + "valid_targets_mean": 4076.4, + "valid_targets_min": 1070 }, { - "epoch": 3.5954430379746833, - "grad_norm": 0.2575898216184259, - "learning_rate": 8.926830983247495e-06, - "loss": 0.1633, + "epoch": 3.2440506329113923, + "grad_norm": 0.292951167708079, + "learning_rate": 1.3252640036056068e-05, + "loss": 0.3027, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009300259873270988, - "step": 888, - "valid_targets_mean": 2336.5, - "valid_targets_min": 1117 + "loss_rank_avg": 0.1567637026309967, + "step": 3205, + "valid_targets_mean": 5367.4, + "valid_targets_min": 1424 }, { - "epoch": 3.599493670886076, - "grad_norm": 0.28148287174224407, - "learning_rate": 8.879780051555116e-06, - "loss": 0.1656, + "epoch": 3.249113924050633, + "grad_norm": 0.3182836504556952, + "learning_rate": 1.318616384240572e-05, + "loss": 0.3043, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01779111847281456, - "step": 889, - "valid_targets_mean": 3630.5, - "valid_targets_min": 1085 + "loss_rank_avg": 0.1862029880285263, + "step": 3210, + "valid_targets_mean": 5235.2, + "valid_targets_min": 1150 }, { - "epoch": 3.6035443037974684, - "grad_norm": 0.26848570602365607, - "learning_rate": 8.83281803684512e-06, - "loss": 0.1776, + "epoch": 3.2541772151898734, + "grad_norm": 0.5962356962936214, + "learning_rate": 1.3119772702232325e-05, + "loss": 0.3019, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012078113853931427, - "step": 890, - "valid_targets_mean": 2879.1, - "valid_targets_min": 1218 + "loss_rank_avg": 0.15173543989658356, + "step": 3215, + "valid_targets_mean": 3896.9, + "valid_targets_min": 1293 }, { - "epoch": 3.607594936708861, - "grad_norm": 0.26625379992305637, - "learning_rate": 8.785945314624532e-06, - "loss": 0.1697, + "epoch": 3.259240506329114, + "grad_norm": 0.30446119131111943, + "learning_rate": 1.3053467444261021e-05, + "loss": 0.3035, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01545766368508339, - "step": 891, - "valid_targets_mean": 3677.1, - "valid_targets_min": 1415 + "loss_rank_avg": 0.16218072175979614, + "step": 3220, + "valid_targets_mean": 5058.7, + "valid_targets_min": 1443 }, { - "epoch": 3.6116455696202534, - "grad_norm": 0.2796316691103621, - "learning_rate": 8.739162259686385e-06, - "loss": 0.1756, + "epoch": 3.2643037974683544, + "grad_norm": 0.30100968288795926, + "learning_rate": 1.2987248896144915e-05, + "loss": 0.3025, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023361319676041603, - "step": 892, - "valid_targets_mean": 4513.4, - "valid_targets_min": 1128 + "loss_rank_avg": 0.1766858994960785, + "step": 3225, + "valid_targets_mean": 5638.7, + "valid_targets_min": 1494 }, { - "epoch": 3.6156962025316455, - "grad_norm": 0.2732692790433263, - "learning_rate": 8.692469246106734e-06, - "loss": 0.1774, + "epoch": 3.269367088607595, + "grad_norm": 0.3043172862585619, + "learning_rate": 1.2921117884454784e-05, + "loss": 0.3111, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021602444350719452, - "step": 893, - "valid_targets_mean": 4360.2, - "valid_targets_min": 996 + "loss_rank_avg": 0.13346105813980103, + "step": 3230, + "valid_targets_mean": 4191.2, + "valid_targets_min": 886 }, { - "epoch": 3.619746835443038, - "grad_norm": 0.254548372810956, - "learning_rate": 8.645866647241677e-06, - "loss": 0.1716, + "epoch": 3.2744303797468355, + "grad_norm": 0.30601688850305675, + "learning_rate": 1.2855075234668708e-05, + "loss": 0.2969, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014873235486447811, - "step": 894, - "valid_targets_mean": 4151.7, - "valid_targets_min": 1002 + "loss_rank_avg": 0.17442500591278076, + "step": 3235, + "valid_targets_mean": 5185.5, + "valid_targets_min": 1463 }, { - "epoch": 3.6237974683544305, - "grad_norm": 0.25854002807195725, - "learning_rate": 8.599354835724348e-06, - "loss": 0.1719, + "epoch": 3.279493670886076, + "grad_norm": 0.32710194023512384, + "learning_rate": 1.278912177116182e-05, + "loss": 0.2987, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01778283342719078, - "step": 895, - "valid_targets_mean": 3965.2, - "valid_targets_min": 1304 + "loss_rank_avg": 0.18517401814460754, + "step": 3240, + "valid_targets_mean": 4748.3, + "valid_targets_min": 1001 }, { - "epoch": 3.6278481012658226, - "grad_norm": 0.25770645960418515, - "learning_rate": 8.552934183461958e-06, - "loss": 0.1647, + "epoch": 3.2845569620253166, + "grad_norm": 0.2832311914633299, + "learning_rate": 1.2723258317195965e-05, + "loss": 0.3118, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016672223806381226, - "step": 896, - "valid_targets_mean": 4062.9, - "valid_targets_min": 949 + "loss_rank_avg": 0.1336306631565094, + "step": 3245, + "valid_targets_mean": 4846.2, + "valid_targets_min": 1184 }, { - "epoch": 3.631898734177215, - "grad_norm": 0.2760455013295443, - "learning_rate": 8.506605061632805e-06, - "loss": 0.1717, + "epoch": 3.289620253164557, + "grad_norm": 0.2859981419544709, + "learning_rate": 1.2657485694909451e-05, + "loss": 0.3031, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03020300716161728, - "step": 897, - "valid_targets_mean": 5710.8, - "valid_targets_min": 1398 + "loss_rank_avg": 0.12190467119216919, + "step": 3250, + "valid_targets_mean": 4372.2, + "valid_targets_min": 1012 }, { - "epoch": 3.6359493670886076, - "grad_norm": 0.2601624470475137, - "learning_rate": 8.460367840683308e-06, - "loss": 0.1638, + "epoch": 3.2946835443037976, + "grad_norm": 0.3133461443068576, + "learning_rate": 1.2591804725306802e-05, + "loss": 0.3056, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027632739394903183, - "step": 898, - "valid_targets_mean": 6040.9, - "valid_targets_min": 1168 + "loss_rank_avg": 0.09984530508518219, + "step": 3255, + "valid_targets_mean": 3387.1, + "valid_targets_min": 1020 }, { - "epoch": 3.64, - "grad_norm": 0.25673009327136503, - "learning_rate": 8.414222890325065e-06, - "loss": 0.1712, + "epoch": 3.299746835443038, + "grad_norm": 0.3066068642940143, + "learning_rate": 1.252621622824846e-05, + "loss": 0.3049, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02003858983516693, - "step": 899, - "valid_targets_mean": 4638.9, - "valid_targets_min": 1044 + "loss_rank_avg": 0.10505147278308868, + "step": 3260, + "valid_targets_mean": 3119.9, + "valid_targets_min": 1415 }, { - "epoch": 3.6440506329113926, - "grad_norm": 0.267767166225715, - "learning_rate": 8.368170579531855e-06, - "loss": 0.1731, + "epoch": 3.3048101265822787, + "grad_norm": 0.30638690017522713, + "learning_rate": 1.2460721022440617e-05, + "loss": 0.3023, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02521287277340889, - "step": 900, - "valid_targets_mean": 4739.3, - "valid_targets_min": 982 + "loss_rank_avg": 0.1329905241727829, + "step": 3265, + "valid_targets_mean": 3749.8, + "valid_targets_min": 1247 }, { - "epoch": 3.6481012658227847, - "grad_norm": 0.2682121961553667, - "learning_rate": 8.322211276536733e-06, - "loss": 0.1734, + "epoch": 3.309873417721519, + "grad_norm": 0.3893581538313894, + "learning_rate": 1.2395319925424922e-05, + "loss": 0.301, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023730196058750153, - "step": 901, - "valid_targets_mean": 4744.9, - "valid_targets_min": 1014 + "loss_rank_avg": 0.12106384336948395, + "step": 3270, + "valid_targets_mean": 3750.2, + "valid_targets_min": 1170 }, { - "epoch": 3.6521518987341772, - "grad_norm": 0.253369373219694, - "learning_rate": 8.27634534882907e-06, - "loss": 0.1686, + "epoch": 3.3149367088607593, + "grad_norm": 0.2915580190067732, + "learning_rate": 1.2330013753568345e-05, + "loss": 0.3046, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021989701315760612, - "step": 902, - "valid_targets_mean": 4789.8, - "valid_targets_min": 940 + "loss_rank_avg": 0.15587478876113892, + "step": 3275, + "valid_targets_mean": 5153.9, + "valid_targets_min": 884 }, { - "epoch": 3.6562025316455697, - "grad_norm": 0.27301076748001013, - "learning_rate": 8.230573163151585e-06, - "loss": 0.1746, + "epoch": 3.32, + "grad_norm": 0.32454051814127005, + "learning_rate": 1.2264803322052938e-05, + "loss": 0.2902, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02975517511367798, - "step": 903, - "valid_targets_mean": 4628.7, - "valid_targets_min": 929 + "loss_rank_avg": 0.14347532391548157, + "step": 3280, + "valid_targets_mean": 3715.8, + "valid_targets_min": 1109 }, { - "epoch": 3.660253164556962, - "grad_norm": 0.2497290882389554, - "learning_rate": 8.184895085497466e-06, - "loss": 0.1635, + "epoch": 3.3250632911392404, + "grad_norm": 0.3014718100183408, + "learning_rate": 1.2199689444865688e-05, + "loss": 0.2952, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01518967468291521, - "step": 904, - "valid_targets_mean": 3753.2, - "valid_targets_min": 992 + "loss_rank_avg": 0.11024832725524902, + "step": 3285, + "valid_targets_mean": 3356.6, + "valid_targets_min": 1346 }, { - "epoch": 3.6643037974683543, - "grad_norm": 0.2598974280536631, - "learning_rate": 8.139311481107401e-06, - "loss": 0.1602, + "epoch": 3.330126582278481, + "grad_norm": 0.3180605843827969, + "learning_rate": 1.2134672934788338e-05, + "loss": 0.2996, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02183011919260025, - "step": 905, - "valid_targets_mean": 4992.8, - "valid_targets_min": 1164 + "loss_rank_avg": 0.1348501592874527, + "step": 3290, + "valid_targets_mean": 4180.2, + "valid_targets_min": 1405 }, { - "epoch": 3.668354430379747, - "grad_norm": 0.2643137313815488, - "learning_rate": 8.093822714466686e-06, - "loss": 0.1768, + "epoch": 3.3351898734177214, + "grad_norm": 0.31070898369108957, + "learning_rate": 1.206975460338726e-05, + "loss": 0.2962, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025400429964065552, - "step": 906, - "valid_targets_mean": 4411.9, - "valid_targets_min": 992 + "loss_rank_avg": 0.14836883544921875, + "step": 3295, + "valid_targets_mean": 4112.2, + "valid_targets_min": 1401 }, { - "epoch": 3.6724050632911394, - "grad_norm": 0.2653001465753879, - "learning_rate": 8.048429149302271e-06, - "loss": 0.1683, + "epoch": 3.340253164556962, + "grad_norm": 0.28528395978629406, + "learning_rate": 1.2004935261003292e-05, + "loss": 0.3012, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02079555206000805, - "step": 907, - "valid_targets_mean": 4470.9, - "valid_targets_min": 1324 + "loss_rank_avg": 0.11711958050727844, + "step": 3300, + "valid_targets_mean": 5100.1, + "valid_targets_min": 1029 }, { - "epoch": 3.676455696202532, - "grad_norm": 0.26895487702039644, - "learning_rate": 8.003131148579902e-06, - "loss": 0.1873, + "epoch": 3.3453164556962025, + "grad_norm": 0.3708665183674031, + "learning_rate": 1.194021571674168e-05, + "loss": 0.2891, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024797216057777405, - "step": 908, - "valid_targets_mean": 5130.2, - "valid_targets_min": 946 + "loss_rank_avg": 0.13884302973747253, + "step": 3305, + "valid_targets_mean": 4049.6, + "valid_targets_min": 974 }, { - "epoch": 3.680506329113924, - "grad_norm": 0.2666756575494181, - "learning_rate": 7.957929074501182e-06, - "loss": 0.167, + "epoch": 3.350379746835443, + "grad_norm": 0.3298858637588618, + "learning_rate": 1.1875596778461903e-05, + "loss": 0.2893, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02008213847875595, - "step": 909, - "valid_targets_mean": 4111.8, - "valid_targets_min": 932 + "loss_rank_avg": 0.1451665610074997, + "step": 3310, + "valid_targets_mean": 4987.6, + "valid_targets_min": 1624 }, { - "epoch": 3.6845569620253165, - "grad_norm": 0.2540007439992368, - "learning_rate": 7.912823288500695e-06, - "loss": 0.1675, + "epoch": 3.3554430379746836, + "grad_norm": 0.30292570807469854, + "learning_rate": 1.1811079252767663e-05, + "loss": 0.2858, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025224491953849792, - "step": 910, - "valid_targets_mean": 5069.3, - "valid_targets_min": 1430 + "loss_rank_avg": 0.12376505136489868, + "step": 3315, + "valid_targets_mean": 3583.1, + "valid_targets_min": 1257 }, { - "epoch": 3.688607594936709, - "grad_norm": 0.26073487502324005, - "learning_rate": 7.867814151243102e-06, - "loss": 0.1705, + "epoch": 3.360506329113924, + "grad_norm": 0.32294869729997133, + "learning_rate": 1.174666394499677e-05, + "loss": 0.2914, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018188446760177612, - "step": 911, - "valid_targets_mean": 4627.6, - "valid_targets_min": 1025 + "loss_rank_avg": 0.11844268441200256, + "step": 3320, + "valid_targets_mean": 3262.4, + "valid_targets_min": 1554 }, { - "epoch": 3.692658227848101, - "grad_norm": 0.2728741992837641, - "learning_rate": 7.822902022620267e-06, - "loss": 0.1703, + "epoch": 3.3655696202531646, + "grad_norm": 0.3438389100269291, + "learning_rate": 1.1682351659211088e-05, + "loss": 0.2965, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023914583027362823, - "step": 912, - "valid_targets_mean": 4483.4, - "valid_targets_min": 1166 + "loss_rank_avg": 0.1457480937242508, + "step": 3325, + "valid_targets_mean": 3745.7, + "valid_targets_min": 1190 }, { - "epoch": 3.6967088607594936, - "grad_norm": 0.26399975045933377, - "learning_rate": 7.778087261748377e-06, - "loss": 0.1774, + "epoch": 3.370632911392405, + "grad_norm": 0.33249792748827744, + "learning_rate": 1.1618143198186544e-05, + "loss": 0.2956, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01853068359196186, - "step": 913, - "valid_targets_mean": 4006.7, - "valid_targets_min": 1153 + "loss_rank_avg": 0.15856462717056274, + "step": 3330, + "valid_targets_mean": 4305.5, + "valid_targets_min": 910 }, { - "epoch": 3.700759493670886, - "grad_norm": 0.2533150709773523, - "learning_rate": 7.733370226965056e-06, - "loss": 0.1746, + "epoch": 3.3756962025316457, + "grad_norm": 0.3173034049275177, + "learning_rate": 1.155403936340304e-05, + "loss": 0.2947, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01628236100077629, - "step": 914, - "valid_targets_mean": 4182.9, - "valid_targets_min": 1054 + "loss_rank_avg": 0.18826264142990112, + "step": 3335, + "valid_targets_mean": 5404.1, + "valid_targets_min": 1023 }, { - "epoch": 3.7048101265822786, - "grad_norm": 0.26525652186299153, - "learning_rate": 7.688751275826527e-06, - "loss": 0.1716, + "epoch": 3.3807594936708862, + "grad_norm": 0.3178276101310362, + "learning_rate": 1.149004095503452e-05, + "loss": 0.297, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020979510620236397, - "step": 915, - "valid_targets_mean": 4063.8, - "valid_targets_min": 1211 + "loss_rank_avg": 0.15395784378051758, + "step": 3340, + "valid_targets_mean": 5008.1, + "valid_targets_min": 1032 }, { - "epoch": 3.708860759493671, - "grad_norm": 0.25620575937396894, - "learning_rate": 7.644230765104735e-06, - "loss": 0.1615, + "epoch": 3.3858227848101268, + "grad_norm": 0.3025604254884791, + "learning_rate": 1.1426148771938915e-05, + "loss": 0.3065, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02342154271900654, - "step": 916, - "valid_targets_mean": 5562.9, - "valid_targets_min": 1385 + "loss_rank_avg": 0.1488351970911026, + "step": 3345, + "valid_targets_mean": 4703.1, + "valid_targets_min": 1506 }, { - "epoch": 3.712911392405063, - "grad_norm": 0.2725936348721527, - "learning_rate": 7.599809050784502e-06, - "loss": 0.1635, + "epoch": 3.390886075949367, + "grad_norm": 0.3037731465395097, + "learning_rate": 1.1362363611648228e-05, + "loss": 0.3069, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01897498033940792, - "step": 917, - "valid_targets_mean": 4265.4, - "valid_targets_min": 1208 + "loss_rank_avg": 0.11465154588222504, + "step": 3350, + "valid_targets_mean": 3727.4, + "valid_targets_min": 921 }, { - "epoch": 3.7169620253164557, - "grad_norm": 0.271718242795319, - "learning_rate": 7.555486488060661e-06, - "loss": 0.1695, + "epoch": 3.3959493670886074, + "grad_norm": 0.3441348411116797, + "learning_rate": 1.1298686270358542e-05, + "loss": 0.301, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02727007493376732, - "step": 918, - "valid_targets_mean": 4823.9, - "valid_targets_min": 1198 + "loss_rank_avg": 0.17264041304588318, + "step": 3355, + "valid_targets_mean": 3737.4, + "valid_targets_min": 1599 }, { - "epoch": 3.721012658227848, - "grad_norm": 0.25621091668575013, - "learning_rate": 7.511263431335249e-06, - "loss": 0.1715, + "epoch": 3.401012658227848, + "grad_norm": 0.2922925569041438, + "learning_rate": 1.1235117542920077e-05, + "loss": 0.2984, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017735548317432404, - "step": 919, - "valid_targets_mean": 4271.2, - "valid_targets_min": 1546 + "loss_rank_avg": 0.11297699809074402, + "step": 3360, + "valid_targets_mean": 4023.7, + "valid_targets_min": 1052 }, { - "epoch": 3.7250632911392403, - "grad_norm": 0.2530190312257486, - "learning_rate": 7.467140234214656e-06, - "loss": 0.1695, + "epoch": 3.4060759493670885, + "grad_norm": 0.3304527382358981, + "learning_rate": 1.1171658222827321e-05, + "loss": 0.308, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01537634152919054, - "step": 920, - "valid_targets_mean": 4448.5, - "valid_targets_min": 1213 + "loss_rank_avg": 0.21652498841285706, + "step": 3365, + "valid_targets_mean": 5499.5, + "valid_targets_min": 949 }, { - "epoch": 3.729113924050633, - "grad_norm": 0.2721300806889746, - "learning_rate": 7.423117249506777e-06, - "loss": 0.1735, + "epoch": 3.411139240506329, + "grad_norm": 0.34098724945316766, + "learning_rate": 1.110830910220905e-05, + "loss": 0.3065, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02418111450970173, - "step": 921, - "valid_targets_mean": 5360.5, - "valid_targets_min": 1055 + "loss_rank_avg": 0.17756357789039612, + "step": 3370, + "valid_targets_mean": 3859.3, + "valid_targets_min": 961 }, { - "epoch": 3.7331645569620253, - "grad_norm": 0.2780701753257114, - "learning_rate": 7.379194829218231e-06, - "loss": 0.1636, + "epoch": 3.4162025316455695, + "grad_norm": 0.29556792434414897, + "learning_rate": 1.1045070971818515e-05, + "loss": 0.2959, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01901012286543846, - "step": 922, - "valid_targets_mean": 3735.7, - "valid_targets_min": 897 + "loss_rank_avg": 0.130351722240448, + "step": 3375, + "valid_targets_mean": 3860.4, + "valid_targets_min": 1126 }, { - "epoch": 3.737215189873418, - "grad_norm": 0.25856564395672654, - "learning_rate": 7.335373324551518e-06, - "loss": 0.1756, + "epoch": 3.42126582278481, + "grad_norm": 0.2817393837564735, + "learning_rate": 1.0981944621023508e-05, + "loss": 0.2989, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01937105692923069, - "step": 923, - "valid_targets_mean": 4076.9, - "valid_targets_min": 945 + "loss_rank_avg": 0.172260582447052, + "step": 3380, + "valid_targets_mean": 6195.1, + "valid_targets_min": 1065 }, { - "epoch": 3.7412658227848103, - "grad_norm": 0.2629253903683393, - "learning_rate": 7.291653085902224e-06, - "loss": 0.1733, + "epoch": 3.4263291139240506, + "grad_norm": 0.3019678707113778, + "learning_rate": 1.0918930837796538e-05, + "loss": 0.2909, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012313663959503174, - "step": 924, - "valid_targets_mean": 3137.3, - "valid_targets_min": 1172 + "loss_rank_avg": 0.14187800884246826, + "step": 3385, + "valid_targets_mean": 4729.5, + "valid_targets_min": 1024 }, { - "epoch": 3.7453164556962024, - "grad_norm": 0.2787227707451909, - "learning_rate": 7.248034462856213e-06, - "loss": 0.1679, + "epoch": 3.431392405063291, + "grad_norm": 0.32014190449807, + "learning_rate": 1.0856030408705016e-05, + "loss": 0.3174, "loss_nan_ranks": 0, - "loss_rank_avg": 0.022976160049438477, - "step": 925, - "valid_targets_mean": 4435.9, - "valid_targets_min": 1428 + "loss_rank_avg": 0.15517304837703705, + "step": 3390, + "valid_targets_mean": 4351.6, + "valid_targets_min": 1381 }, { - "epoch": 3.749367088607595, - "grad_norm": 0.2688756768110731, - "learning_rate": 7.20451780418683e-06, - "loss": 0.1656, + "epoch": 3.4364556962025317, + "grad_norm": 0.28577315150998106, + "learning_rate": 1.0793244118901383e-05, + "loss": 0.3001, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02019648440182209, - "step": 926, - "valid_targets_mean": 3506.9, - "valid_targets_min": 861 + "loss_rank_avg": 0.16560965776443481, + "step": 3395, + "valid_targets_mean": 5708.4, + "valid_targets_min": 1323 }, { - "epoch": 3.7534177215189874, - "grad_norm": 0.24455271551147317, - "learning_rate": 7.161103457852128e-06, - "loss": 0.1653, + "epoch": 3.441518987341772, + "grad_norm": 0.2895330117490245, + "learning_rate": 1.0730572752113368e-05, + "loss": 0.3004, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021572839468717575, - "step": 927, - "valid_targets_mean": 5081.4, - "valid_targets_min": 1330 + "loss_rank_avg": 0.16116517782211304, + "step": 3400, + "valid_targets_mean": 6122.2, + "valid_targets_min": 1191 }, { - "epoch": 3.7574683544303795, - "grad_norm": 0.25971636467078635, - "learning_rate": 7.117791770992046e-06, - "loss": 0.169, + "epoch": 3.4465822784810127, + "grad_norm": 0.3276147158732667, + "learning_rate": 1.0668017090634164e-05, + "loss": 0.2912, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02314162254333496, - "step": 928, - "valid_targets_mean": 4839.4, - "valid_targets_min": 1052 + "loss_rank_avg": 0.1463163197040558, + "step": 3405, + "valid_targets_mean": 3868.4, + "valid_targets_min": 1560 }, { - "epoch": 3.761518987341772, - "grad_norm": 0.280975447445582, - "learning_rate": 7.074583089925688e-06, - "loss": 0.182, + "epoch": 3.4516455696202533, + "grad_norm": 0.27522935687650096, + "learning_rate": 1.0605577915312662e-05, + "loss": 0.2973, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01538354717195034, - "step": 929, - "valid_targets_mean": 3658.2, - "valid_targets_min": 1409 + "loss_rank_avg": 0.14057275652885437, + "step": 3410, + "valid_targets_mean": 5501.4, + "valid_targets_min": 1198 }, { - "epoch": 3.7655696202531646, - "grad_norm": 0.26892822036099945, - "learning_rate": 7.0314777601485155e-06, - "loss": 0.1707, + "epoch": 3.456708860759494, + "grad_norm": 0.2546793488548092, + "learning_rate": 1.0543256005543752e-05, + "loss": 0.2931, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016586199402809143, - "step": 930, - "valid_targets_mean": 3859.4, - "valid_targets_min": 1343 + "loss_rank_avg": 0.17178791761398315, + "step": 3415, + "valid_targets_mean": 6257.9, + "valid_targets_min": 1159 }, { - "epoch": 3.769620253164557, - "grad_norm": 0.25282373989841805, - "learning_rate": 6.988476126329604e-06, - "loss": 0.1729, + "epoch": 3.4617721518987343, + "grad_norm": 0.2887049078859965, + "learning_rate": 1.048105213925853e-05, + "loss": 0.3008, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027788568288087845, - "step": 931, - "valid_targets_mean": 6460.5, - "valid_targets_min": 1253 + "loss_rank_avg": 0.15336082875728607, + "step": 3420, + "valid_targets_mean": 5676.3, + "valid_targets_min": 1581 }, { - "epoch": 3.7736708860759496, - "grad_norm": 0.2741741674704985, - "learning_rate": 6.945578532308857e-06, - "loss": 0.1757, + "epoch": 3.466835443037975, + "grad_norm": 0.3248981947583244, + "learning_rate": 1.0418967092914643e-05, + "loss": 0.296, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03157555311918259, - "step": 932, - "valid_targets_mean": 5928.9, - "valid_targets_min": 1063 + "loss_rank_avg": 0.12593376636505127, + "step": 3425, + "valid_targets_mean": 3572.3, + "valid_targets_min": 1200 }, { - "epoch": 3.7777215189873417, - "grad_norm": 0.25125778944512545, - "learning_rate": 6.902785321094301e-06, - "loss": 0.1628, + "epoch": 3.4718987341772154, + "grad_norm": 0.33097897236242024, + "learning_rate": 1.0357001641486556e-05, + "loss": 0.2969, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016016250476241112, - "step": 933, - "valid_targets_mean": 3399.4, - "valid_targets_min": 1410 + "loss_rank_avg": 0.16061154007911682, + "step": 3430, + "valid_targets_mean": 6276.2, + "valid_targets_min": 1499 }, { - "epoch": 3.781772151898734, - "grad_norm": 0.28015343257100844, - "learning_rate": 6.8600968348593115e-06, - "loss": 0.1786, + "epoch": 3.476962025316456, + "grad_norm": 0.29614677485829893, + "learning_rate": 1.0295156558455885e-05, + "loss": 0.3058, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012400506064295769, - "step": 934, - "valid_targets_mean": 2638.9, - "valid_targets_min": 899 + "loss_rank_avg": 0.1872052550315857, + "step": 3435, + "valid_targets_mean": 6640.8, + "valid_targets_min": 1160 }, { - "epoch": 3.7858227848101267, - "grad_norm": 0.24892331479213567, - "learning_rate": 6.817513414939892e-06, - "loss": 0.1511, + "epoch": 3.482025316455696, + "grad_norm": 0.27705637796463173, + "learning_rate": 1.0233432615801786e-05, + "loss": 0.2938, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013685108162462711, - "step": 935, - "valid_targets_mean": 3465.3, - "valid_targets_min": 1601 + "loss_rank_avg": 0.14229699969291687, + "step": 3440, + "valid_targets_mean": 5945.9, + "valid_targets_min": 1284 }, { - "epoch": 3.7898734177215188, - "grad_norm": 0.2646257029306078, - "learning_rate": 6.775035401831924e-06, - "loss": 0.1681, + "epoch": 3.4870886075949366, + "grad_norm": 0.26237225876364956, + "learning_rate": 1.0171830583991245e-05, + "loss": 0.3011, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012793563306331635, - "step": 936, - "valid_targets_mean": 3330.2, - "valid_targets_min": 1394 + "loss_rank_avg": 0.17002148926258087, + "step": 3445, + "valid_targets_mean": 6348.2, + "valid_targets_min": 1431 }, { - "epoch": 3.7939240506329113, - "grad_norm": 0.266412647881079, - "learning_rate": 6.732663135188473e-06, - "loss": 0.166, + "epoch": 3.492151898734177, + "grad_norm": 0.33818695188520737, + "learning_rate": 1.0110351231969532e-05, + "loss": 0.2999, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020142555236816406, - "step": 937, - "valid_targets_mean": 4423.8, - "valid_targets_min": 1298 + "loss_rank_avg": 0.19817857444286346, + "step": 3450, + "valid_targets_mean": 4845.8, + "valid_targets_min": 1227 }, { - "epoch": 3.797974683544304, - "grad_norm": 0.26291465669937136, - "learning_rate": 6.690396953817055e-06, - "loss": 0.1711, + "epoch": 3.4972151898734176, + "grad_norm": 0.27766136414994197, + "learning_rate": 1.0048995327150556e-05, + "loss": 0.2994, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013688749633729458, - "step": 938, - "valid_targets_mean": 3101.6, - "valid_targets_min": 1313 + "loss_rank_avg": 0.17715927958488464, + "step": 3455, + "valid_targets_mean": 6080.2, + "valid_targets_min": 1024 }, { - "epoch": 3.8020253164556963, - "grad_norm": 0.26546325021532996, - "learning_rate": 6.648237195676927e-06, - "loss": 0.171, + "epoch": 3.502278481012658, + "grad_norm": 0.3385690737736752, + "learning_rate": 9.987763635407293e-06, + "loss": 0.2976, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016537221148610115, - "step": 939, - "valid_targets_mean": 3412.6, - "valid_targets_min": 1393 + "loss_rank_avg": 0.18266132473945618, + "step": 3460, + "valid_targets_mean": 4562.4, + "valid_targets_min": 1570 }, { - "epoch": 3.806075949367089, - "grad_norm": 0.2567377674015171, - "learning_rate": 6.606184197876395e-06, - "loss": 0.1632, + "epoch": 3.5073417721518987, + "grad_norm": 0.35240157099060987, + "learning_rate": 9.926656921062254e-06, + "loss": 0.3082, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02425518073141575, - "step": 940, - "valid_targets_mean": 4925.3, - "valid_targets_min": 1084 + "loss_rank_avg": 0.18275323510169983, + "step": 3465, + "valid_targets_mean": 4161.0, + "valid_targets_min": 1086 }, { - "epoch": 3.810126582278481, - "grad_norm": 0.2616122882966271, - "learning_rate": 6.564238296670109e-06, - "loss": 0.1613, + "epoch": 3.512405063291139, + "grad_norm": 0.37082127798975295, + "learning_rate": 9.865675946877897e-06, + "loss": 0.2964, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0212604571133852, - "step": 941, - "valid_targets_mean": 4302.8, - "valid_targets_min": 1324 + "loss_rank_avg": 0.137416809797287, + "step": 3470, + "valid_targets_mean": 3108.2, + "valid_targets_min": 1222 }, { - "epoch": 3.8141772151898734, - "grad_norm": 0.259336042662043, - "learning_rate": 6.52239982745638e-06, - "loss": 0.1636, + "epoch": 3.5174683544303798, + "grad_norm": 0.32990352870182205, + "learning_rate": 9.804821474047166e-06, + "loss": 0.3071, "loss_nan_ranks": 0, - "loss_rank_avg": 0.025171024724841118, - "step": 942, - "valid_targets_mean": 5372.2, - "valid_targets_min": 1588 + "loss_rank_avg": 0.13373905420303345, + "step": 3475, + "valid_targets_mean": 3385.4, + "valid_targets_min": 1094 }, { - "epoch": 3.818227848101266, - "grad_norm": 0.26152603888356707, - "learning_rate": 6.480669124774484e-06, - "loss": 0.1749, + "epoch": 3.5225316455696203, + "grad_norm": 0.3159470821550788, + "learning_rate": 9.744094262183922e-06, + "loss": 0.2957, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02495972439646721, - "step": 943, - "valid_targets_mean": 4591.2, - "valid_targets_min": 958 + "loss_rank_avg": 0.1414027214050293, + "step": 3480, + "valid_targets_mean": 3928.0, + "valid_targets_min": 1133 }, { - "epoch": 3.822278481012658, - "grad_norm": 0.25718843187745966, - "learning_rate": 6.439046522302013e-06, - "loss": 0.1679, + "epoch": 3.527594936708861, + "grad_norm": 0.2903957000330617, + "learning_rate": 9.683495069313527e-06, + "loss": 0.3091, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017802279442548752, - "step": 944, - "valid_targets_mean": 4441.4, - "valid_targets_min": 1063 + "loss_rank_avg": 0.1404920518398285, + "step": 3485, + "valid_targets_mean": 4963.7, + "valid_targets_min": 1699 }, { - "epoch": 3.8263291139240505, - "grad_norm": 0.24864279505089587, - "learning_rate": 6.397532352852191e-06, - "loss": 0.1694, + "epoch": 3.5326582278481014, + "grad_norm": 0.28971614608089746, + "learning_rate": 9.623024651863317e-06, + "loss": 0.2979, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017683859914541245, - "step": 945, - "valid_targets_mean": 4570.5, - "valid_targets_min": 973 + "loss_rank_avg": 0.12896308302879333, + "step": 3490, + "valid_targets_mean": 4057.2, + "valid_targets_min": 1028 }, { - "epoch": 3.830379746835443, - "grad_norm": 0.24182498223350302, - "learning_rate": 6.356126948371202e-06, - "loss": 0.1662, + "epoch": 3.537721518987342, + "grad_norm": 0.32570265544900434, + "learning_rate": 9.56268376465323e-06, + "loss": 0.2962, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0314711257815361, - "step": 946, - "valid_targets_mean": 7331.1, - "valid_targets_min": 834 + "loss_rank_avg": 0.14747750759124756, + "step": 3495, + "valid_targets_mean": 3836.9, + "valid_targets_min": 858 }, { - "epoch": 3.8344303797468355, - "grad_norm": 0.26373174301768926, - "learning_rate": 6.314830639935559e-06, - "loss": 0.163, + "epoch": 3.5427848101265824, + "grad_norm": 0.3186509490983858, + "learning_rate": 9.502473160886309e-06, + "loss": 0.3055, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019661404192447662, - "step": 947, - "valid_targets_mean": 3925.5, - "valid_targets_min": 929 + "loss_rank_avg": 0.13043725490570068, + "step": 3500, + "valid_targets_mean": 3738.1, + "valid_targets_min": 908 }, { - "epoch": 3.838481012658228, - "grad_norm": 0.25151186493467503, - "learning_rate": 6.273643757749448e-06, - "loss": 0.1596, + "epoch": 3.547848101265823, + "grad_norm": 0.2738590358023834, + "learning_rate": 9.442393592139378e-06, + "loss": 0.31, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019310811534523964, - "step": 948, - "valid_targets_mean": 4121.4, - "valid_targets_min": 1296 + "loss_rank_avg": 0.14587613940238953, + "step": 3505, + "valid_targets_mean": 4865.2, + "valid_targets_min": 1410 }, { - "epoch": 3.84253164556962, - "grad_norm": 0.2525157270897604, - "learning_rate": 6.2325666311420856e-06, - "loss": 0.1795, + "epoch": 3.552911392405063, + "grad_norm": 0.3247037129452682, + "learning_rate": 9.382445808353582e-06, + "loss": 0.2935, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02261164039373398, - "step": 949, - "valid_targets_mean": 5320.1, - "valid_targets_min": 1075 + "loss_rank_avg": 0.1686377227306366, + "step": 3510, + "valid_targets_mean": 4436.3, + "valid_targets_min": 1010 }, { - "epoch": 3.8465822784810126, - "grad_norm": 0.2514022583146415, - "learning_rate": 6.1915995885650675e-06, - "loss": 0.1758, + "epoch": 3.5579746835443036, + "grad_norm": 0.3180620836755935, + "learning_rate": 9.322630557825099e-06, + "loss": 0.2905, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02208195999264717, - "step": 950, - "valid_targets_mean": 4716.1, - "valid_targets_min": 972 + "loss_rank_avg": 0.181858628988266, + "step": 3515, + "valid_targets_mean": 5233.6, + "valid_targets_min": 1452 }, { - "epoch": 3.850632911392405, - "grad_norm": 0.25474420781507917, - "learning_rate": 6.150742957589784e-06, - "loss": 0.1708, + "epoch": 3.563037974683544, + "grad_norm": 0.389889167724953, + "learning_rate": 9.262948587195733e-06, + "loss": 0.3152, "loss_nan_ranks": 0, - "loss_rank_avg": 0.026663821190595627, - "step": 951, - "valid_targets_mean": 5850.9, - "valid_targets_min": 1609 + "loss_rank_avg": 0.15717878937721252, + "step": 3520, + "valid_targets_mean": 3013.9, + "valid_targets_min": 1243 }, { - "epoch": 3.8546835443037972, - "grad_norm": 0.25778279169226703, - "learning_rate": 6.109997064904774e-06, - "loss": 0.1702, + "epoch": 3.5681012658227846, + "grad_norm": 0.29747778586225904, + "learning_rate": 9.203400641443659e-06, + "loss": 0.2996, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02077229507267475, - "step": 952, - "valid_targets_mean": 4348.4, - "valid_targets_min": 1185 + "loss_rank_avg": 0.1657101958990097, + "step": 3525, + "valid_targets_mean": 5011.9, + "valid_targets_min": 1171 }, { - "epoch": 3.8587341772151897, - "grad_norm": 0.26348476292614187, - "learning_rate": 6.069362236313092e-06, - "loss": 0.1659, + "epoch": 3.573164556962025, + "grad_norm": 0.32589494828857773, + "learning_rate": 9.143987463874067e-06, + "loss": 0.3156, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01722509041428566, - "step": 953, - "valid_targets_mean": 3411.2, - "valid_targets_min": 998 + "loss_rank_avg": 0.1313675493001938, + "step": 3530, + "valid_targets_mean": 3697.8, + "valid_targets_min": 1024 }, { - "epoch": 3.8627848101265823, - "grad_norm": 0.24558839076433475, - "learning_rate": 6.0288387967297565e-06, - "loss": 0.1672, + "epoch": 3.5782278481012657, + "grad_norm": 0.3014059728019354, + "learning_rate": 9.084709796109907e-06, + "loss": 0.2868, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021953541785478592, - "step": 954, - "valid_targets_mean": 4563.6, - "valid_targets_min": 1152 + "loss_rank_avg": 0.1120418906211853, + "step": 3535, + "valid_targets_mean": 3755.7, + "valid_targets_min": 1198 }, { - "epoch": 3.8668354430379748, - "grad_norm": 0.2562829506926769, - "learning_rate": 5.988427070179108e-06, - "loss": 0.1705, + "epoch": 3.5832911392405062, + "grad_norm": 0.2948674668221632, + "learning_rate": 9.025568378082656e-06, + "loss": 0.3131, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016076896339654922, - "step": 955, - "valid_targets_mean": 3906.6, - "valid_targets_min": 1348 + "loss_rank_avg": 0.2535429894924164, + "step": 3540, + "valid_targets_mean": 5356.3, + "valid_targets_min": 1015 }, { - "epoch": 3.8708860759493673, - "grad_norm": 0.26383340936593336, - "learning_rate": 5.948127379792234e-06, - "loss": 0.1723, + "epoch": 3.5883544303797468, + "grad_norm": 0.33956864916271323, + "learning_rate": 8.96656394802303e-06, + "loss": 0.3003, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020764939486980438, - "step": 956, - "valid_targets_mean": 4350.1, - "valid_targets_min": 1420 + "loss_rank_avg": 0.18608632683753967, + "step": 3545, + "valid_targets_mean": 4867.1, + "valid_targets_min": 1215 }, { - "epoch": 3.8749367088607594, - "grad_norm": 0.2712332484036904, - "learning_rate": 5.90794004780439e-06, - "loss": 0.1686, + "epoch": 3.5934177215189873, + "grad_norm": 0.2894977962731721, + "learning_rate": 8.907697242451825e-06, + "loss": 0.3074, "loss_nan_ranks": 0, - "loss_rank_avg": 0.022838331758975983, - "step": 957, - "valid_targets_mean": 4404.2, - "valid_targets_min": 1378 + "loss_rank_avg": 0.1488334834575653, + "step": 3550, + "valid_targets_mean": 4891.0, + "valid_targets_min": 1716 }, { - "epoch": 3.878987341772152, - "grad_norm": 0.2622111952679663, - "learning_rate": 5.867865395552405e-06, - "loss": 0.163, + "epoch": 3.598481012658228, + "grad_norm": 0.3343791939565901, + "learning_rate": 8.84896899617067e-06, + "loss": 0.2944, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02169284224510193, - "step": 958, - "valid_targets_mean": 3874.9, - "valid_targets_min": 1088 + "loss_rank_avg": 0.141134113073349, + "step": 3555, + "valid_targets_mean": 3341.7, + "valid_targets_min": 890 }, { - "epoch": 3.8830379746835444, - "grad_norm": 0.25352172374047266, - "learning_rate": 5.8279037434721364e-06, - "loss": 0.1682, + "epoch": 3.6035443037974684, + "grad_norm": 0.33452158631970796, + "learning_rate": 8.790379942252888e-06, + "loss": 0.3019, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019588781520724297, - "step": 959, - "valid_targets_mean": 4924.4, - "valid_targets_min": 1371 + "loss_rank_avg": 0.10691896080970764, + "step": 3560, + "valid_targets_mean": 2877.1, + "valid_targets_min": 1216 }, { - "epoch": 3.8870886075949365, - "grad_norm": 0.2544134305559441, - "learning_rate": 5.788055411095892e-06, - "loss": 0.16, + "epoch": 3.608607594936709, + "grad_norm": 0.36142037308609365, + "learning_rate": 8.73193081203436e-06, + "loss": 0.2975, "loss_nan_ranks": 0, - "loss_rank_avg": 0.022651158273220062, - "step": 960, - "valid_targets_mean": 5013.6, - "valid_targets_min": 1538 + "loss_rank_avg": 0.11998053640127182, + "step": 3565, + "valid_targets_mean": 2745.8, + "valid_targets_min": 1146 }, { - "epoch": 3.891139240506329, - "grad_norm": 0.2598511789733668, - "learning_rate": 5.7483207170498625e-06, - "loss": 0.1767, + "epoch": 3.6136708860759494, + "grad_norm": 0.29136587900708333, + "learning_rate": 8.673622335104335e-06, + "loss": 0.3143, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018454328179359436, - "step": 961, - "valid_targets_mean": 3996.5, - "valid_targets_min": 1382 + "loss_rank_avg": 0.1345549076795578, + "step": 3570, + "valid_targets_mean": 4353.7, + "valid_targets_min": 1408 }, { - "epoch": 3.8951898734177215, - "grad_norm": 0.2777136565674028, - "learning_rate": 5.708699979051613e-06, - "loss": 0.1695, + "epoch": 3.61873417721519, + "grad_norm": 0.3065976409246481, + "learning_rate": 8.6154552392964e-06, + "loss": 0.3027, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01723894290626049, - "step": 962, - "valid_targets_mean": 3204.9, - "valid_targets_min": 979 + "loss_rank_avg": 0.10774962604045868, + "step": 3575, + "valid_targets_mean": 3904.3, + "valid_targets_min": 1176 }, { - "epoch": 3.899240506329114, - "grad_norm": 0.24515877257866994, - "learning_rate": 5.66919351390751e-06, - "loss": 0.1708, + "epoch": 3.6237974683544305, + "grad_norm": 0.31612983755964325, + "learning_rate": 8.557430250679329e-06, + "loss": 0.302, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018098827451467514, - "step": 963, - "valid_targets_mean": 5105.1, - "valid_targets_min": 1065 + "loss_rank_avg": 0.13469217717647552, + "step": 3580, + "valid_targets_mean": 3963.2, + "valid_targets_min": 1302 }, { - "epoch": 3.9032911392405065, - "grad_norm": 0.2609791537507237, - "learning_rate": 5.629801637510188e-06, - "loss": 0.1666, + "epoch": 3.628860759493671, + "grad_norm": 0.355565500659515, + "learning_rate": 8.499548093548069e-06, + "loss": 0.2915, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009696192108094692, - "step": 964, - "valid_targets_mean": 2726.9, - "valid_targets_min": 1328 + "loss_rank_avg": 0.1492483913898468, + "step": 3585, + "valid_targets_mean": 3815.0, + "valid_targets_min": 998 }, { - "epoch": 3.9073417721518986, - "grad_norm": 0.2801700145562046, - "learning_rate": 5.590524664836043e-06, - "loss": 0.1739, + "epoch": 3.6339240506329116, + "grad_norm": 0.3209640823808966, + "learning_rate": 8.441809490414664e-06, + "loss": 0.2943, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03406285494565964, - "step": 965, - "valid_targets_mean": 6347.1, - "valid_targets_min": 1495 + "loss_rank_avg": 0.10731847584247589, + "step": 3590, + "valid_targets_mean": 3390.2, + "valid_targets_min": 1205 }, { - "epoch": 3.911392405063291, - "grad_norm": 0.2543961381253017, - "learning_rate": 5.551362909942708e-06, - "loss": 0.1662, + "epoch": 3.638987341772152, + "grad_norm": 0.3111850353285487, + "learning_rate": 8.384215161999245e-06, + "loss": 0.2928, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014446750283241272, - "step": 966, - "valid_targets_mean": 3029.8, - "valid_targets_min": 868 + "loss_rank_avg": 0.12559540569782257, + "step": 3595, + "valid_targets_mean": 3864.8, + "valid_targets_min": 1279 }, { - "epoch": 3.9154430379746836, - "grad_norm": 0.2604047583655427, - "learning_rate": 5.51231668596653e-06, - "loss": 0.1817, + "epoch": 3.6440506329113926, + "grad_norm": 0.3021980554963678, + "learning_rate": 8.326765827221066e-06, + "loss": 0.3006, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020005282014608383, - "step": 967, - "valid_targets_mean": 4006.3, - "valid_targets_min": 1141 + "loss_rank_avg": 0.16643747687339783, + "step": 3600, + "valid_targets_mean": 4737.3, + "valid_targets_min": 980 }, { - "epoch": 3.9194936708860757, - "grad_norm": 0.2569034631060701, - "learning_rate": 5.47338630512007e-06, - "loss": 0.1712, + "epoch": 3.649113924050633, + "grad_norm": 0.3299126612802994, + "learning_rate": 8.269462203189467e-06, + "loss": 0.3031, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013550623320043087, - "step": 968, - "valid_targets_mean": 3199.0, - "valid_targets_min": 956 + "loss_rank_avg": 0.1495143473148346, + "step": 3605, + "valid_targets_mean": 4373.2, + "valid_targets_min": 1652 }, { - "epoch": 3.923544303797468, - "grad_norm": 0.26580168189369774, - "learning_rate": 5.434572078689617e-06, - "loss": 0.1656, + "epoch": 3.6541772151898733, + "grad_norm": 0.3046029741964782, + "learning_rate": 8.212305005194992e-06, + "loss": 0.2926, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015013760887086391, - "step": 969, - "valid_targets_mean": 3526.7, - "valid_targets_min": 1301 + "loss_rank_avg": 0.12610015273094177, + "step": 3610, + "valid_targets_mean": 4068.0, + "valid_targets_min": 990 }, { - "epoch": 3.9275949367088607, - "grad_norm": 0.2344212801278269, - "learning_rate": 5.395874317032694e-06, - "loss": 0.1664, + "epoch": 3.659240506329114, + "grad_norm": 0.2889811661916004, + "learning_rate": 8.155294946700402e-06, + "loss": 0.2938, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017263196408748627, - "step": 970, - "valid_targets_mean": 4849.8, - "valid_targets_min": 929 + "loss_rank_avg": 0.14253506064414978, + "step": 3615, + "valid_targets_mean": 4970.4, + "valid_targets_min": 1268 }, { - "epoch": 3.9316455696202532, - "grad_norm": 0.2575740382944395, - "learning_rate": 5.357293329575573e-06, - "loss": 0.1534, + "epoch": 3.6643037974683543, + "grad_norm": 0.3058585932521842, + "learning_rate": 8.098432739331803e-06, + "loss": 0.2822, "loss_nan_ranks": 0, - "loss_rank_avg": 0.024109669029712677, - "step": 971, - "valid_targets_mean": 5135.2, - "valid_targets_min": 1642 + "loss_rank_avg": 0.14140847325325012, + "step": 3620, + "valid_targets_mean": 4990.8, + "valid_targets_min": 1162 }, { - "epoch": 3.9356962025316458, - "grad_norm": 0.27920946943071534, - "learning_rate": 5.318829424810797e-06, - "loss": 0.1716, + "epoch": 3.669367088607595, + "grad_norm": 0.3078512961350209, + "learning_rate": 8.041719092869761e-06, + "loss": 0.3031, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017383450642228127, - "step": 972, - "valid_targets_mean": 4036.1, - "valid_targets_min": 1577 + "loss_rank_avg": 0.13041014969348907, + "step": 3625, + "valid_targets_mean": 4127.6, + "valid_targets_min": 1226 }, { - "epoch": 3.939746835443038, - "grad_norm": 0.2674779028295388, - "learning_rate": 5.280482910294728e-06, - "loss": 0.1731, + "epoch": 3.6744303797468354, + "grad_norm": 0.2557971332829355, + "learning_rate": 7.98515471524042e-06, + "loss": 0.3067, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014004931785166264, - "step": 973, - "valid_targets_mean": 3164.8, - "valid_targets_min": 865 + "loss_rank_avg": 0.13294486701488495, + "step": 3630, + "valid_targets_mean": 5826.9, + "valid_targets_min": 1621 }, { - "epoch": 3.9437974683544303, - "grad_norm": 0.2558046574136687, - "learning_rate": 5.242254092645078e-06, - "loss": 0.1657, + "epoch": 3.679493670886076, + "grad_norm": 0.3421960754823531, + "learning_rate": 7.928740312506704e-06, + "loss": 0.3055, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021920736879110336, - "step": 974, - "valid_targets_mean": 4416.2, - "valid_targets_min": 1186 + "loss_rank_avg": 0.11804340779781342, + "step": 3635, + "valid_targets_mean": 3220.9, + "valid_targets_min": 1529 }, { - "epoch": 3.947848101265823, - "grad_norm": 0.27037859086507426, - "learning_rate": 5.2041432775384445e-06, - "loss": 0.1728, + "epoch": 3.6845569620253165, + "grad_norm": 0.30370702242210657, + "learning_rate": 7.87247658885946e-06, + "loss": 0.2923, "loss_nan_ranks": 0, - "loss_rank_avg": 0.03166478872299194, - "step": 975, - "valid_targets_mean": 5821.2, - "valid_targets_min": 915 + "loss_rank_avg": 0.1813192367553711, + "step": 3640, + "valid_targets_mean": 5260.7, + "valid_targets_min": 1428 }, { - "epoch": 3.951898734177215, - "grad_norm": 0.2568070232064629, - "learning_rate": 5.166150769707892e-06, - "loss": 0.1618, + "epoch": 3.689620253164557, + "grad_norm": 0.3275936364034654, + "learning_rate": 7.816364246608688e-06, + "loss": 0.303, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01807749643921852, - "step": 976, - "valid_targets_mean": 4275.6, - "valid_targets_min": 1151 + "loss_rank_avg": 0.16349400579929352, + "step": 3645, + "valid_targets_mean": 4485.0, + "valid_targets_min": 891 }, { - "epoch": 3.9559493670886074, - "grad_norm": 0.2610467303453652, - "learning_rate": 5.128276872940503e-06, - "loss": 0.1585, + "epoch": 3.6946835443037975, + "grad_norm": 0.36264702848645247, + "learning_rate": 7.760403986174796e-06, + "loss": 0.3064, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02191128209233284, - "step": 977, - "valid_targets_mean": 4305.9, - "valid_targets_min": 1243 + "loss_rank_avg": 0.13801871240139008, + "step": 3650, + "valid_targets_mean": 3051.1, + "valid_targets_min": 1057 }, { - "epoch": 3.96, - "grad_norm": 0.24641444939717588, - "learning_rate": 5.090521890074953e-06, - "loss": 0.1715, + "epoch": 3.699746835443038, + "grad_norm": 0.2776099144093922, + "learning_rate": 7.70459650607981e-06, + "loss": 0.3086, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02309935726225376, - "step": 978, - "valid_targets_mean": 4733.2, - "valid_targets_min": 1227 + "loss_rank_avg": 0.1680225133895874, + "step": 3655, + "valid_targets_mean": 6631.8, + "valid_targets_min": 1456 }, { - "epoch": 3.9640506329113925, - "grad_norm": 0.25286992119518464, - "learning_rate": 5.0528861229990675e-06, - "loss": 0.1695, + "epoch": 3.7048101265822786, + "grad_norm": 0.34172337758905963, + "learning_rate": 7.648942502938705e-06, + "loss": 0.3048, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01703287474811077, - "step": 979, - "valid_targets_mean": 4090.1, - "valid_targets_min": 943 + "loss_rank_avg": 0.16763363778591156, + "step": 3660, + "valid_targets_mean": 4061.8, + "valid_targets_min": 1209 }, { - "epoch": 3.968101265822785, - "grad_norm": 0.2515046419810229, - "learning_rate": 5.015369872647442e-06, - "loss": 0.1697, + "epoch": 3.709873417721519, + "grad_norm": 0.31939393718066644, + "learning_rate": 7.593442671450666e-06, + "loss": 0.2965, "loss_nan_ranks": 0, - "loss_rank_avg": 0.023712750524282455, - "step": 980, - "valid_targets_mean": 4041.8, - "valid_targets_min": 1425 + "loss_rank_avg": 0.17321282625198364, + "step": 3665, + "valid_targets_mean": 4894.1, + "valid_targets_min": 1043 }, { - "epoch": 3.972151898734177, - "grad_norm": 0.24882079439249843, - "learning_rate": 4.9779734389990195e-06, - "loss": 0.1572, + "epoch": 3.714936708860759, + "grad_norm": 0.32132553979600637, + "learning_rate": 7.5380977043904365e-06, + "loss": 0.2952, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02589654177427292, - "step": 981, - "valid_targets_mean": 5828.3, - "valid_targets_min": 1577 + "loss_rank_avg": 0.16710880398750305, + "step": 3670, + "valid_targets_mean": 4925.1, + "valid_targets_min": 1393 }, { - "epoch": 3.9762025316455696, - "grad_norm": 0.25927214688896627, - "learning_rate": 4.940697121074675e-06, - "loss": 0.1655, + "epoch": 3.7199999999999998, + "grad_norm": 0.32445408393836617, + "learning_rate": 7.482908292599689e-06, + "loss": 0.3039, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02023175358772278, - "step": 982, - "valid_targets_mean": 4153.8, - "valid_targets_min": 1031 + "loss_rank_avg": 0.12341426312923431, + "step": 3675, + "valid_targets_mean": 3606.6, + "valid_targets_min": 1214 }, { - "epoch": 3.980253164556962, - "grad_norm": 0.2578774726920398, - "learning_rate": 4.903541216934864e-06, - "loss": 0.1658, + "epoch": 3.7250632911392403, + "grad_norm": 0.3064797642539383, + "learning_rate": 7.427875124978359e-06, + "loss": 0.2968, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019639194011688232, - "step": 983, - "valid_targets_mean": 4131.6, - "valid_targets_min": 869 + "loss_rank_avg": 0.1298845112323761, + "step": 3680, + "valid_targets_mean": 4494.2, + "valid_targets_min": 1211 }, { - "epoch": 3.984303797468354, - "grad_norm": 0.25118875230741106, - "learning_rate": 4.8665060236772e-06, - "loss": 0.162, + "epoch": 3.730126582278481, + "grad_norm": 0.2891043975748266, + "learning_rate": 7.372998888476091e-06, + "loss": 0.307, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014539085328578949, - "step": 984, - "valid_targets_mean": 4011.5, - "valid_targets_min": 1210 + "loss_rank_avg": 0.14520198106765747, + "step": 3685, + "valid_targets_mean": 4642.1, + "valid_targets_min": 1350 }, { - "epoch": 3.9883544303797467, - "grad_norm": 0.2498271509027046, - "learning_rate": 4.829591837434102e-06, - "loss": 0.1723, + "epoch": 3.7351898734177214, + "grad_norm": 0.2986519000501888, + "learning_rate": 7.318280268083624e-06, + "loss": 0.3026, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018505923449993134, - "step": 985, - "valid_targets_mean": 4007.4, - "valid_targets_min": 1684 + "loss_rank_avg": 0.19568581879138947, + "step": 3690, + "valid_targets_mean": 5491.8, + "valid_targets_min": 1141 }, { - "epoch": 3.992405063291139, - "grad_norm": 0.2569733312313395, - "learning_rate": 4.792798953370424e-06, - "loss": 0.1796, + "epoch": 3.740253164556962, + "grad_norm": 0.32054148131031535, + "learning_rate": 7.263719946824264e-06, + "loss": 0.3001, "loss_nan_ranks": 0, - "loss_rank_avg": 0.027533505111932755, - "step": 986, - "valid_targets_mean": 5823.2, - "valid_targets_min": 1688 + "loss_rank_avg": 0.12194908410310745, + "step": 3695, + "valid_targets_mean": 3993.2, + "valid_targets_min": 1248 }, { - "epoch": 3.9964556962025317, - "grad_norm": 0.25646292717570135, - "learning_rate": 4.756127665681083e-06, - "loss": 0.1673, + "epoch": 3.7453164556962024, + "grad_norm": 0.29536657640379355, + "learning_rate": 7.209318605745368e-06, + "loss": 0.2952, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021060016006231308, - "step": 987, - "valid_targets_mean": 4374.2, - "valid_targets_min": 971 + "loss_rank_avg": 0.14819620549678802, + "step": 3700, + "valid_targets_mean": 4433.9, + "valid_targets_min": 1426 }, { - "epoch": 4.0, - "grad_norm": 0.2771544532900968, - "learning_rate": 4.719578267588725e-06, - "loss": 0.165, + "epoch": 3.750379746835443, + "grad_norm": 0.28146869288224746, + "learning_rate": 7.1550769239098e-06, + "loss": 0.2973, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021999431774020195, - "step": 988, - "valid_targets_mean": 4322.3, - "valid_targets_min": 1295 + "loss_rank_avg": 0.18099471926689148, + "step": 3705, + "valid_targets_mean": 7152.2, + "valid_targets_min": 1449 }, { - "epoch": 4.004050632911392, - "grad_norm": 0.4003732826881182, - "learning_rate": 4.6831510513413525e-06, - "loss": 0.1201, + "epoch": 3.7554430379746835, + "grad_norm": 0.31596675118829165, + "learning_rate": 7.100995578387519e-06, + "loss": 0.2938, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014306396245956421, - "step": 989, - "valid_targets_mean": 3664.5, - "valid_targets_min": 1216 + "loss_rank_avg": 0.13141889870166779, + "step": 3710, + "valid_targets_mean": 3884.0, + "valid_targets_min": 1459 }, { - "epoch": 4.008101265822785, - "grad_norm": 0.3655726493503762, - "learning_rate": 4.6468463082100176e-06, - "loss": 0.1199, + "epoch": 3.760506329113924, + "grad_norm": 0.31422332389376245, + "learning_rate": 7.047075244247061e-06, + "loss": 0.3058, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012987561523914337, - "step": 990, - "valid_targets_mean": 4540.7, - "valid_targets_min": 765 + "loss_rank_avg": 0.1718595027923584, + "step": 3715, + "valid_targets_mean": 4909.5, + "valid_targets_min": 1774 }, { - "epoch": 4.012151898734177, - "grad_norm": 0.30964833471583064, - "learning_rate": 4.610664328486485e-06, - "loss": 0.1104, + "epoch": 3.7655696202531646, + "grad_norm": 0.3058451914869696, + "learning_rate": 6.993316594547148e-06, + "loss": 0.3043, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0126019436866045, - "step": 991, - "valid_targets_mean": 4332.8, - "valid_targets_min": 838 + "loss_rank_avg": 0.12041142582893372, + "step": 3720, + "valid_targets_mean": 3857.4, + "valid_targets_min": 1341 }, { - "epoch": 4.01620253164557, - "grad_norm": 0.250996175520195, - "learning_rate": 4.5746054014809e-06, - "loss": 0.1135, + "epoch": 3.770632911392405, + "grad_norm": 0.3285101705687896, + "learning_rate": 6.939720300328303e-06, + "loss": 0.3109, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014090234413743019, - "step": 992, - "valid_targets_mean": 4519.7, - "valid_targets_min": 1248 + "loss_rank_avg": 0.15424466133117676, + "step": 3725, + "valid_targets_mean": 4029.8, + "valid_targets_min": 1388 }, { - "epoch": 4.020253164556962, - "grad_norm": 0.2892432889212748, - "learning_rate": 4.5386698155194766e-06, - "loss": 0.1146, + "epoch": 3.7756962025316456, + "grad_norm": 0.2852711508998474, + "learning_rate": 6.886287030604422e-06, + "loss": 0.2974, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013059249147772789, - "step": 993, - "valid_targets_mean": 4145.2, - "valid_targets_min": 1426 + "loss_rank_avg": 0.15062718093395233, + "step": 3730, + "valid_targets_mean": 5311.7, + "valid_targets_min": 1383 }, { - "epoch": 4.024303797468354, - "grad_norm": 0.34040346346515926, - "learning_rate": 4.502857857942204e-06, - "loss": 0.1099, + "epoch": 3.780759493670886, + "grad_norm": 0.32830150851392953, + "learning_rate": 6.83301745235448e-06, + "loss": 0.3073, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013071644119918346, - "step": 994, - "valid_targets_mean": 4365.7, - "valid_targets_min": 1116 + "loss_rank_avg": 0.1865895390510559, + "step": 3735, + "valid_targets_mean": 4781.8, + "valid_targets_min": 1034 }, { - "epoch": 4.028354430379747, - "grad_norm": 0.43935845584745803, - "learning_rate": 4.467169815100549e-06, - "loss": 0.1175, - "loss_nan_ranks": 0, - "loss_rank_avg": 0.01606411673128605, - "step": 995, - "valid_targets_mean": 5574.1, - "valid_targets_min": 1533 + "epoch": 3.7858227848101267, + "grad_norm": 0.33756549066729197, + "learning_rate": 6.779912230514161e-06, + "loss": 0.2807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12055318802595139, + "step": 3740, + "valid_targets_mean": 3463.3, + "valid_targets_min": 1599 }, { - "epoch": 4.032405063291139, - "grad_norm": 0.38111330979551494, - "learning_rate": 4.431605972355137e-06, - "loss": 0.1166, + "epoch": 3.790886075949367, + "grad_norm": 0.3152172059583756, + "learning_rate": 6.7269720279675755e-06, + "loss": 0.3027, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014245737344026566, - "step": 996, - "valid_targets_mean": 4346.1, - "valid_targets_min": 1263 + "loss_rank_avg": 0.17137764394283295, + "step": 3745, + "valid_targets_mean": 4888.2, + "valid_targets_min": 1432 }, { - "epoch": 4.036455696202531, - "grad_norm": 0.30287401074217873, - "learning_rate": 4.396166614073516e-06, - "loss": 0.1124, + "epoch": 3.7959493670886078, + "grad_norm": 0.3239318373383687, + "learning_rate": 6.674197505539006e-06, + "loss": 0.3045, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013161606155335903, - "step": 997, - "valid_targets_mean": 4571.4, - "valid_targets_min": 1194 + "loss_rank_avg": 0.14893168210983276, + "step": 3750, + "valid_targets_mean": 4158.4, + "valid_targets_min": 1285 }, { - "epoch": 4.040506329113924, - "grad_norm": 0.2798009529690627, - "learning_rate": 4.360852023627846e-06, - "loss": 0.1081, + "epoch": 3.8010126582278483, + "grad_norm": 0.2919220688770005, + "learning_rate": 6.621589321984611e-06, + "loss": 0.2953, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016295261681079865, - "step": 998, - "valid_targets_mean": 5411.4, - "valid_targets_min": 1057 + "loss_rank_avg": 0.14690950512886047, + "step": 3755, + "valid_targets_mean": 4748.6, + "valid_targets_min": 1607 }, { - "epoch": 4.044556962025316, - "grad_norm": 0.2922199601998903, - "learning_rate": 4.325662483392656e-06, - "loss": 0.1153, + "epoch": 3.806075949367089, + "grad_norm": 0.3025582373018823, + "learning_rate": 6.569148133984258e-06, + "loss": 0.2936, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010619858279824257, - "step": 999, - "valid_targets_mean": 3356.9, - "valid_targets_min": 1193 + "loss_rank_avg": 0.17515698075294495, + "step": 3760, + "valid_targets_mean": 5047.3, + "valid_targets_min": 1082 }, { - "epoch": 4.048607594936709, - "grad_norm": 0.2787040016448312, - "learning_rate": 4.29059827474257e-06, - "loss": 0.116, + "epoch": 3.8111392405063294, + "grad_norm": 0.2952019372668243, + "learning_rate": 6.516874596133269e-06, + "loss": 0.2869, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021953728049993515, - "step": 1000, - "valid_targets_mean": 6415.1, - "valid_targets_min": 1294 + "loss_rank_avg": 0.15086880326271057, + "step": 3765, + "valid_targets_mean": 5131.5, + "valid_targets_min": 1253 }, { - "epoch": 4.052658227848101, - "grad_norm": 0.28719331022465855, - "learning_rate": 4.255659678050063e-06, - "loss": 0.1144, + "epoch": 3.81620253164557, + "grad_norm": 0.29724809214492576, + "learning_rate": 6.464769360934306e-06, + "loss": 0.2927, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015153765678405762, - "step": 1001, - "valid_targets_mean": 4424.2, - "valid_targets_min": 1264 + "loss_rank_avg": 0.19773977994918823, + "step": 3770, + "valid_targets_mean": 6146.5, + "valid_targets_min": 1366 }, { - "epoch": 4.056708860759493, - "grad_norm": 0.2885840564817545, - "learning_rate": 4.22084697268323e-06, - "loss": 0.1147, + "epoch": 3.82126582278481, + "grad_norm": 0.33270725662814826, + "learning_rate": 6.412833078789178e-06, + "loss": 0.3024, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016420746222138405, - "step": 1002, - "valid_targets_mean": 4203.6, - "valid_targets_min": 1221 + "loss_rank_avg": 0.15699324011802673, + "step": 3775, + "valid_targets_mean": 4472.4, + "valid_targets_min": 1350 }, { - "epoch": 4.060759493670886, - "grad_norm": 0.2889212669563016, - "learning_rate": 4.186160437003535e-06, - "loss": 0.1136, + "epoch": 3.8263291139240505, + "grad_norm": 0.2963372278675901, + "learning_rate": 6.361066397990758e-06, + "loss": 0.2956, "loss_nan_ranks": 0, - "loss_rank_avg": 0.007882167585194111, - "step": 1003, - "valid_targets_mean": 2971.5, - "valid_targets_min": 1138 + "loss_rank_avg": 0.1388930231332779, + "step": 3780, + "valid_targets_mean": 4676.4, + "valid_targets_min": 971 }, { - "epoch": 4.0648101265822785, - "grad_norm": 0.2708558928023269, - "learning_rate": 4.151600348363582e-06, - "loss": 0.1154, + "epoch": 3.831392405063291, + "grad_norm": 0.2786191474125972, + "learning_rate": 6.309469964714863e-06, + "loss": 0.298, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011313255876302719, - "step": 1004, - "valid_targets_mean": 4321.1, - "valid_targets_min": 1264 + "loss_rank_avg": 0.14243824779987335, + "step": 3785, + "valid_targets_mean": 4964.9, + "valid_targets_min": 1391 }, { - "epoch": 4.0688607594936705, - "grad_norm": 0.2589101403089754, - "learning_rate": 4.11716698310493e-06, - "loss": 0.1067, + "epoch": 3.8364556962025316, + "grad_norm": 0.3057769621782611, + "learning_rate": 6.2580444230122175e-06, + "loss": 0.2824, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012739719823002815, - "step": 1005, - "valid_targets_mean": 4941.8, - "valid_targets_min": 1269 + "loss_rank_avg": 0.1198386549949646, + "step": 3790, + "valid_targets_mean": 3840.2, + "valid_targets_min": 745 }, { - "epoch": 4.0729113924050635, - "grad_norm": 0.2795822652210067, - "learning_rate": 4.082860616555848e-06, - "loss": 0.1143, + "epoch": 3.841518987341772, + "grad_norm": 0.31205008704708975, + "learning_rate": 6.2067904148003764e-06, + "loss": 0.3096, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011751806363463402, - "step": 1006, - "valid_targets_mean": 3975.0, - "valid_targets_min": 1238 + "loss_rank_avg": 0.17915937304496765, + "step": 3795, + "valid_targets_mean": 5550.8, + "valid_targets_min": 1392 }, { - "epoch": 4.076962025316456, - "grad_norm": 0.27901499487691106, - "learning_rate": 4.048681523029125e-06, - "loss": 0.1046, + "epoch": 3.8465822784810126, + "grad_norm": 0.3072712018089592, + "learning_rate": 6.155708579855759e-06, + "loss": 0.3163, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013513721525669098, - "step": 1007, - "valid_targets_mean": 4067.1, - "valid_targets_min": 1093 + "loss_rank_avg": 0.15422801673412323, + "step": 3800, + "valid_targets_mean": 4714.1, + "valid_targets_min": 970 }, { - "epoch": 4.0810126582278485, - "grad_norm": 0.28199971187759815, - "learning_rate": 4.014629975819882e-06, - "loss": 0.1167, + "epoch": 3.851645569620253, + "grad_norm": 0.3056625890719376, + "learning_rate": 6.104799555805607e-06, + "loss": 0.2963, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013382020406425, - "step": 1008, - "valid_targets_mean": 4960.6, - "valid_targets_min": 809 + "loss_rank_avg": 0.10852354019880295, + "step": 3805, + "valid_targets_mean": 3481.8, + "valid_targets_min": 1001 }, { - "epoch": 4.085063291139241, - "grad_norm": 0.2739291802210153, - "learning_rate": 3.9807062472033855e-06, - "loss": 0.1153, + "epoch": 3.8567088607594937, + "grad_norm": 0.33415927615785906, + "learning_rate": 6.054063978120093e-06, + "loss": 0.3035, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011161049827933311, - "step": 1009, - "valid_targets_mean": 3603.4, - "valid_targets_min": 1360 + "loss_rank_avg": 0.16793662309646606, + "step": 3810, + "valid_targets_mean": 3962.6, + "valid_targets_min": 1458 }, { - "epoch": 4.089113924050633, - "grad_norm": 0.2608212672568236, - "learning_rate": 3.94691060843287e-06, - "loss": 0.1155, + "epoch": 3.8617721518987342, + "grad_norm": 0.2859437483451691, + "learning_rate": 6.00350248010431e-06, + "loss": 0.2964, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02048046886920929, - "step": 1010, - "valid_targets_mean": 6277.9, - "valid_targets_min": 1750 + "loss_rank_avg": 0.11396883428096771, + "step": 3815, + "valid_targets_mean": 4421.5, + "valid_targets_min": 1527 }, { - "epoch": 4.093164556962026, - "grad_norm": 0.2529424848219501, - "learning_rate": 3.913243329737353e-06, - "loss": 0.1131, + "epoch": 3.8668354430379748, + "grad_norm": 0.32446773366076725, + "learning_rate": 5.95311569289045e-06, + "loss": 0.2976, "loss_nan_ranks": 0, - "loss_rank_avg": 0.00706963986158371, - "step": 1011, - "valid_targets_mean": 3304.1, - "valid_targets_min": 1428 + "loss_rank_avg": 0.14191418886184692, + "step": 3820, + "valid_targets_mean": 3904.6, + "valid_targets_min": 1346 }, { - "epoch": 4.097215189873418, - "grad_norm": 0.26109890093820065, - "learning_rate": 3.879704680319503e-06, - "loss": 0.1142, + "epoch": 3.8718987341772153, + "grad_norm": 0.33241644171629736, + "learning_rate": 5.902904245429852e-06, + "loss": 0.3066, "loss_nan_ranks": 0, - "loss_rank_avg": 0.007938157767057419, - "step": 1012, - "valid_targets_mean": 3203.9, - "valid_targets_min": 1186 + "loss_rank_avg": 0.15994411706924438, + "step": 3825, + "valid_targets_mean": 4205.2, + "valid_targets_min": 1211 }, { - "epoch": 4.10126582278481, - "grad_norm": 0.25760137757483825, - "learning_rate": 3.846294928353471e-06, - "loss": 0.1086, + "epoch": 3.876962025316456, + "grad_norm": 0.33295558620330956, + "learning_rate": 5.852868764485185e-06, + "loss": 0.2951, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01943141408264637, - "step": 1013, - "valid_targets_mean": 5626.9, - "valid_targets_min": 1068 + "loss_rank_avg": 0.11764267832040787, + "step": 3830, + "valid_targets_mean": 3148.9, + "valid_targets_min": 992 }, { - "epoch": 4.105316455696203, - "grad_norm": 0.2479626910788689, - "learning_rate": 3.8130143409827345e-06, - "loss": 0.1112, + "epoch": 3.882025316455696, + "grad_norm": 0.3094377142586556, + "learning_rate": 5.803009874622645e-06, + "loss": 0.3014, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013259506784379482, - "step": 1014, - "valid_targets_mean": 4291.4, - "valid_targets_min": 1322 + "loss_rank_avg": 0.13288931548595428, + "step": 3835, + "valid_targets_mean": 3723.9, + "valid_targets_min": 1009 }, { - "epoch": 4.109367088607595, - "grad_norm": 0.2569955392936143, - "learning_rate": 3.7798631843179887e-06, - "loss": 0.1071, + "epoch": 3.8870886075949365, + "grad_norm": 0.28966742570043214, + "learning_rate": 5.753328198204107e-06, + "loss": 0.2827, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019261609762907028, - "step": 1015, - "valid_targets_mean": 4930.9, - "valid_targets_min": 828 + "loss_rank_avg": 0.14470314979553223, + "step": 3840, + "valid_targets_mean": 5011.6, + "valid_targets_min": 1536 }, { - "epoch": 4.113417721518988, - "grad_norm": 0.25253897584899043, - "learning_rate": 3.7468417234349973e-06, - "loss": 0.1171, + "epoch": 3.892151898734177, + "grad_norm": 0.3556764011843303, + "learning_rate": 5.7038243553794105e-06, + "loss": 0.3098, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018999673426151276, - "step": 1016, - "valid_targets_mean": 5932.4, - "valid_targets_min": 1791 + "loss_rank_avg": 0.15663528442382812, + "step": 3845, + "valid_targets_mean": 3704.3, + "valid_targets_min": 938 }, { - "epoch": 4.11746835443038, - "grad_norm": 0.2611560587823719, - "learning_rate": 3.7139502223724823e-06, - "loss": 0.1104, + "epoch": 3.8972151898734175, + "grad_norm": 0.4434641609697601, + "learning_rate": 5.654498964078578e-06, + "loss": 0.3071, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01204339787364006, - "step": 1017, - "valid_targets_mean": 3853.3, - "valid_targets_min": 1330 + "loss_rank_avg": 0.15775218605995178, + "step": 3850, + "valid_targets_mean": 5224.2, + "valid_targets_min": 1008 }, { - "epoch": 4.121518987341772, - "grad_norm": 0.2586414238324103, - "learning_rate": 3.6811889441300095e-06, - "loss": 0.1065, + "epoch": 3.902278481012658, + "grad_norm": 0.28354591909624743, + "learning_rate": 5.605352640004111e-06, + "loss": 0.2998, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015708964318037033, - "step": 1018, - "valid_targets_mean": 4885.8, - "valid_targets_min": 994 + "loss_rank_avg": 0.20830413699150085, + "step": 3855, + "valid_targets_mean": 6659.2, + "valid_targets_min": 1634 }, { - "epoch": 4.125569620253165, - "grad_norm": 0.25967544297221273, - "learning_rate": 3.648558150665886e-06, - "loss": 0.1186, + "epoch": 3.9073417721518986, + "grad_norm": 0.31278015955485905, + "learning_rate": 5.556385996623334e-06, + "loss": 0.3024, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01149682980030775, - "step": 1019, - "valid_targets_mean": 3851.9, - "valid_targets_min": 1121 + "loss_rank_avg": 0.1927700638771057, + "step": 3860, + "valid_targets_mean": 6460.9, + "valid_targets_min": 1493 }, { - "epoch": 4.129620253164557, - "grad_norm": 0.25572555469489316, - "learning_rate": 3.616058102895075e-06, - "loss": 0.1126, + "epoch": 3.912405063291139, + "grad_norm": 0.3183546633710576, + "learning_rate": 5.507599645160688e-06, + "loss": 0.2956, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016392933204770088, - "step": 1020, - "valid_targets_mean": 4871.8, - "valid_targets_min": 1100 + "loss_rank_avg": 0.14958074688911438, + "step": 3865, + "valid_targets_mean": 4336.2, + "valid_targets_min": 1153 }, { - "epoch": 4.133670886075949, - "grad_norm": 0.2441110532126236, - "learning_rate": 3.583689060687079e-06, - "loss": 0.1157, + "epoch": 3.9174683544303797, + "grad_norm": 0.2926146837393363, + "learning_rate": 5.45899419459015e-06, + "loss": 0.317, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012376191094517708, - "step": 1021, - "valid_targets_mean": 4524.1, - "valid_targets_min": 1028 + "loss_rank_avg": 0.11325857788324356, + "step": 3870, + "valid_targets_mean": 3313.8, + "valid_targets_min": 775 }, { - "epoch": 4.137721518987342, - "grad_norm": 0.24398646370891114, - "learning_rate": 3.5514512828639023e-06, - "loss": 0.1091, + "epoch": 3.92253164556962, + "grad_norm": 0.2947057963950262, + "learning_rate": 5.410570251627587e-06, + "loss": 0.2958, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018081197515130043, - "step": 1022, - "valid_targets_mean": 6087.4, - "valid_targets_min": 1117 + "loss_rank_avg": 0.1467699259519577, + "step": 3875, + "valid_targets_mean": 5764.1, + "valid_targets_min": 1283 }, { - "epoch": 4.141772151898734, - "grad_norm": 0.2546021016376095, - "learning_rate": 3.51934502719796e-06, - "loss": 0.1039, + "epoch": 3.9275949367088607, + "grad_norm": 0.29570913844534924, + "learning_rate": 5.362328420723208e-06, + "loss": 0.2956, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01941712014377117, - "step": 1023, - "valid_targets_mean": 5920.0, - "valid_targets_min": 1266 + "loss_rank_avg": 0.14309361577033997, + "step": 3880, + "valid_targets_mean": 4847.8, + "valid_targets_min": 927 }, { - "epoch": 4.145822784810127, - "grad_norm": 0.25101564084424466, - "learning_rate": 3.4873705504100185e-06, - "loss": 0.1031, + "epoch": 3.9326582278481013, + "grad_norm": 0.3026281684204848, + "learning_rate": 5.314269304054029e-06, + "loss": 0.29, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011633863672614098, - "step": 1024, - "valid_targets_mean": 3833.2, - "valid_targets_min": 1357 + "loss_rank_avg": 0.19492954015731812, + "step": 3885, + "valid_targets_mean": 5634.2, + "valid_targets_min": 1361 }, { - "epoch": 4.149873417721519, - "grad_norm": 0.24006292387114583, - "learning_rate": 3.4555281081671345e-06, - "loss": 0.1106, + "epoch": 3.937721518987342, + "grad_norm": 0.30964406158183067, + "learning_rate": 5.26639350151632e-06, + "loss": 0.3055, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01572991907596588, - "step": 1025, - "valid_targets_mean": 5546.1, - "valid_targets_min": 1024 + "loss_rank_avg": 0.13411834836006165, + "step": 3890, + "valid_targets_mean": 4035.8, + "valid_targets_min": 1199 }, { - "epoch": 4.153924050632911, - "grad_norm": 0.24629627209906968, - "learning_rate": 3.4238179550806326e-06, - "loss": 0.108, + "epoch": 3.9427848101265823, + "grad_norm": 0.3111439221332038, + "learning_rate": 5.218701610718162e-06, + "loss": 0.3044, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012446759268641472, - "step": 1026, - "valid_targets_mean": 4453.8, - "valid_targets_min": 975 + "loss_rank_avg": 0.1787598729133606, + "step": 3895, + "valid_targets_mean": 5102.0, + "valid_targets_min": 1779 }, { - "epoch": 4.157974683544304, - "grad_norm": 0.25615612600541154, - "learning_rate": 3.3922403447040497e-06, - "loss": 0.108, + "epoch": 3.947848101265823, + "grad_norm": 0.256548917813654, + "learning_rate": 5.171194226971947e-06, + "loss": 0.3164, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014301635324954987, - "step": 1027, - "valid_targets_mean": 4307.7, - "valid_targets_min": 1391 + "loss_rank_avg": 0.15520493686199188, + "step": 3900, + "valid_targets_mean": 5819.2, + "valid_targets_min": 913 }, { - "epoch": 4.162025316455696, - "grad_norm": 0.23812708306917799, - "learning_rate": 3.3607955295311247e-06, - "loss": 0.101, + "epoch": 3.9529113924050634, + "grad_norm": 0.35240271321787325, + "learning_rate": 5.12387194328696e-06, + "loss": 0.2864, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011930042877793312, - "step": 1028, - "valid_targets_mean": 4049.1, - "valid_targets_min": 1088 + "loss_rank_avg": 0.100287064909935, + "step": 3905, + "valid_targets_mean": 2803.6, + "valid_targets_min": 1229 }, { - "epoch": 4.166075949367088, - "grad_norm": 0.24105681140422938, - "learning_rate": 3.32948376099375e-06, - "loss": 0.1081, + "epoch": 3.957974683544304, + "grad_norm": 0.28051305016909855, + "learning_rate": 5.0767353503620055e-06, + "loss": 0.3012, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012245169840753078, - "step": 1029, - "valid_targets_mean": 4256.4, - "valid_targets_min": 902 + "loss_rank_avg": 0.11824625730514526, + "step": 3910, + "valid_targets_mean": 4403.1, + "valid_targets_min": 1454 }, { - "epoch": 4.170126582278481, - "grad_norm": 0.2603579396276263, - "learning_rate": 3.2983052894599976e-06, - "loss": 0.1119, + "epoch": 3.9630379746835445, + "grad_norm": 0.33648253442313436, + "learning_rate": 5.029785036577976e-06, + "loss": 0.3047, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009018047712743282, - "step": 1030, - "valid_targets_mean": 2847.9, - "valid_targets_min": 1455 + "loss_rank_avg": 0.12415116280317307, + "step": 3915, + "valid_targets_mean": 3484.4, + "valid_targets_min": 1290 }, { - "epoch": 4.174177215189873, - "grad_norm": 0.24094462126263408, - "learning_rate": 3.2672603642320945e-06, - "loss": 0.1131, + "epoch": 3.968101265822785, + "grad_norm": 0.3051655834868841, + "learning_rate": 4.983021587990577e-06, + "loss": 0.2999, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009460234083235264, - "step": 1031, - "valid_targets_mean": 3995.7, - "valid_targets_min": 1381 + "loss_rank_avg": 0.16038689017295837, + "step": 3920, + "valid_targets_mean": 4039.8, + "valid_targets_min": 1423 }, { - "epoch": 4.178227848101266, - "grad_norm": 0.2539272303216648, - "learning_rate": 3.236349233544438e-06, - "loss": 0.1101, + "epoch": 3.9731645569620255, + "grad_norm": 0.3149366075705648, + "learning_rate": 4.936445588322947e-06, + "loss": 0.2836, "loss_nan_ranks": 0, - "loss_rank_avg": 0.00956416130065918, - "step": 1032, - "valid_targets_mean": 3387.9, - "valid_targets_min": 1578 + "loss_rank_avg": 0.17041844129562378, + "step": 3925, + "valid_targets_mean": 4645.1, + "valid_targets_min": 1388 }, { - "epoch": 4.182278481012658, - "grad_norm": 0.24607206464657877, - "learning_rate": 3.205572144561604e-06, - "loss": 0.1109, + "epoch": 3.978227848101266, + "grad_norm": 0.3104948744129676, + "learning_rate": 4.890057618958406e-06, + "loss": 0.3008, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009433125145733356, - "step": 1033, - "valid_targets_mean": 3179.5, - "valid_targets_min": 1212 + "loss_rank_avg": 0.13706716895103455, + "step": 3930, + "valid_targets_mean": 4148.2, + "valid_targets_min": 918 }, { - "epoch": 4.18632911392405, - "grad_norm": 0.2369043979584509, - "learning_rate": 3.174929343376374e-06, - "loss": 0.1104, + "epoch": 3.9832911392405066, + "grad_norm": 0.4596385567964535, + "learning_rate": 4.843858258933207e-06, + "loss": 0.306, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01051153615117073, - "step": 1034, - "valid_targets_mean": 3981.8, - "valid_targets_min": 1298 + "loss_rank_avg": 0.12977036833763123, + "step": 3935, + "valid_targets_mean": 4589.1, + "valid_targets_min": 1135 }, { - "epoch": 4.190379746835443, - "grad_norm": 0.24736908966042873, - "learning_rate": 3.1444210750077777e-06, - "loss": 0.1088, + "epoch": 3.9883544303797467, + "grad_norm": 0.35205194291692793, + "learning_rate": 4.797848084929271e-06, + "loss": 0.2963, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012088462710380554, - "step": 1035, - "valid_targets_mean": 4654.1, - "valid_targets_min": 1300 + "loss_rank_avg": 0.16180771589279175, + "step": 3940, + "valid_targets_mean": 4005.4, + "valid_targets_min": 1682 }, { - "epoch": 4.194430379746835, - "grad_norm": 0.24641644567177923, - "learning_rate": 3.1140475833991046e-06, - "loss": 0.1111, + "epoch": 3.993417721518987, + "grad_norm": 0.3115344869207087, + "learning_rate": 4.7520276712670344e-06, + "loss": 0.3138, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018200930207967758, - "step": 1036, - "valid_targets_mean": 5480.0, - "valid_targets_min": 1518 + "loss_rank_avg": 0.1245289221405983, + "step": 3945, + "valid_targets_mean": 3869.5, + "valid_targets_min": 1814 }, { - "epoch": 4.1984810126582275, - "grad_norm": 0.2473167962312305, - "learning_rate": 3.0838091114159895e-06, - "loss": 0.1113, + "epoch": 3.9984810126582278, + "grad_norm": 0.3447482808883552, + "learning_rate": 4.706397589898237e-06, + "loss": 0.2992, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014702276326715946, - "step": 1037, - "valid_targets_mean": 4763.0, - "valid_targets_min": 914 + "loss_rank_avg": 0.21260038018226624, + "step": 3950, + "valid_targets_mean": 4663.9, + "valid_targets_min": 1153 }, { - "epoch": 4.2025316455696204, - "grad_norm": 0.2380808023314683, - "learning_rate": 3.0537059008444524e-06, - "loss": 0.1131, + "epoch": 4.0030379746835445, + "grad_norm": 0.31704002367241657, + "learning_rate": 4.660958410398808e-06, + "loss": 0.2938, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012161606922745705, - "step": 1038, - "valid_targets_mean": 4509.5, - "valid_targets_min": 1325 + "loss_rank_avg": 0.11520913243293762, + "step": 3955, + "valid_targets_mean": 3230.5, + "valid_targets_min": 1024 }, { - "epoch": 4.2065822784810125, - "grad_norm": 0.2371915388592596, - "learning_rate": 3.0237381923889563e-06, - "loss": 0.1019, + "epoch": 4.008101265822785, + "grad_norm": 0.3107759512067684, + "learning_rate": 4.61571069996176e-06, + "loss": 0.3067, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0148830721154809, - "step": 1039, - "valid_targets_mean": 5838.4, - "valid_targets_min": 1214 + "loss_rank_avg": 0.15815721452236176, + "step": 3960, + "valid_targets_mean": 4538.7, + "valid_targets_min": 763 }, { - "epoch": 4.2106329113924055, - "grad_norm": 0.2459439242197879, - "learning_rate": 2.9939062256704997e-06, - "loss": 0.1104, + "epoch": 4.013164556962026, + "grad_norm": 0.3240449542122713, + "learning_rate": 4.5706550233900825e-06, + "loss": 0.2895, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014685314148664474, - "step": 1040, - "valid_targets_mean": 4390.6, - "valid_targets_min": 1164 + "loss_rank_avg": 0.1527724266052246, + "step": 3965, + "valid_targets_mean": 4152.1, + "valid_targets_min": 1278 }, { - "epoch": 4.2146835443037975, - "grad_norm": 0.23226846355518324, - "learning_rate": 2.964210239224696e-06, - "loss": 0.1131, + "epoch": 4.018227848101266, + "grad_norm": 0.3371371879535488, + "learning_rate": 4.5257919430897305e-06, + "loss": 0.2919, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01576303504407406, - "step": 1041, - "valid_targets_mean": 5250.2, - "valid_targets_min": 1423 + "loss_rank_avg": 0.14754730463027954, + "step": 3970, + "valid_targets_mean": 3785.3, + "valid_targets_min": 1269 }, { - "epoch": 4.21873417721519, - "grad_norm": 0.2426028729765493, - "learning_rate": 2.934650470499869e-06, - "loss": 0.1036, + "epoch": 4.023291139240507, + "grad_norm": 0.28767581366652223, + "learning_rate": 4.481122019062562e-06, + "loss": 0.2976, "loss_nan_ranks": 0, - "loss_rank_avg": 0.02107350155711174, - "step": 1042, - "valid_targets_mean": 6033.2, - "valid_targets_min": 1302 + "loss_rank_avg": 0.14282898604869843, + "step": 3975, + "valid_targets_mean": 4653.0, + "valid_targets_min": 1265 }, { - "epoch": 4.222784810126583, - "grad_norm": 0.2387287478909716, - "learning_rate": 2.905227155855128e-06, - "loss": 0.1163, + "epoch": 4.028354430379747, + "grad_norm": 0.29732809453534903, + "learning_rate": 4.436645808899374e-06, + "loss": 0.3062, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011402295902371407, - "step": 1043, - "valid_targets_mean": 3917.2, - "valid_targets_min": 1283 + "loss_rank_avg": 0.18286307156085968, + "step": 3980, + "valid_targets_mean": 5864.7, + "valid_targets_min": 1531 }, { - "epoch": 4.226835443037975, - "grad_norm": 0.24908362936328057, - "learning_rate": 2.875940530558519e-06, - "loss": 0.112, + "epoch": 4.033417721518988, + "grad_norm": 0.29104777332578374, + "learning_rate": 4.3923638677729506e-06, + "loss": 0.292, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016240276396274567, - "step": 1044, - "valid_targets_mean": 5248.2, - "valid_targets_min": 1146 + "loss_rank_avg": 0.15134663879871368, + "step": 3985, + "valid_targets_mean": 5712.8, + "valid_targets_min": 923 }, { - "epoch": 4.230886075949367, - "grad_norm": 0.23423752746429335, - "learning_rate": 2.8467908287851154e-06, - "loss": 0.1082, + "epoch": 4.038481012658228, + "grad_norm": 0.34370328700294156, + "learning_rate": 4.348276748431095e-06, + "loss": 0.2889, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012546324171125889, - "step": 1045, - "valid_targets_mean": 5506.6, - "valid_targets_min": 1132 + "loss_rank_avg": 0.16227614879608154, + "step": 3990, + "valid_targets_mean": 4682.2, + "valid_targets_min": 1365 }, { - "epoch": 4.23493670886076, - "grad_norm": 0.2520296188756494, - "learning_rate": 2.8177782836151603e-06, - "loss": 0.1073, + "epoch": 4.043544303797469, + "grad_norm": 0.2853726249793705, + "learning_rate": 4.304385001189781e-06, + "loss": 0.292, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014122463762760162, - "step": 1046, - "valid_targets_mean": 4423.8, - "valid_targets_min": 1436 + "loss_rank_avg": 0.15851733088493347, + "step": 3995, + "valid_targets_mean": 5396.8, + "valid_targets_min": 1278 }, { - "epoch": 4.238987341772152, - "grad_norm": 0.23557866922129664, - "learning_rate": 2.7889031270321874e-06, - "loss": 0.1092, + "epoch": 4.048607594936709, + "grad_norm": 0.2664334385740433, + "learning_rate": 4.260689173926238e-06, + "loss": 0.3008, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017405740916728973, - "step": 1047, - "valid_targets_mean": 5047.8, - "valid_targets_min": 1672 + "loss_rank_avg": 0.1690644472837448, + "step": 4000, + "valid_targets_mean": 6774.5, + "valid_targets_min": 1292 }, { - "epoch": 4.243037974683545, - "grad_norm": 0.24293451283336756, - "learning_rate": 2.760165589921184e-06, - "loss": 0.1042, + "epoch": 4.053670886075949, + "grad_norm": 0.27904233780454435, + "learning_rate": 4.217189812072131e-06, + "loss": 0.2918, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01629508100450039, - "step": 1048, - "valid_targets_mean": 5036.8, - "valid_targets_min": 1141 + "loss_rank_avg": 0.13511596620082855, + "step": 4005, + "valid_targets_mean": 4486.5, + "valid_targets_min": 1562 }, { - "epoch": 4.247088607594937, - "grad_norm": 0.23988649700489975, - "learning_rate": 2.7315659020667305e-06, - "loss": 0.1097, + "epoch": 4.0587341772151895, + "grad_norm": 0.3231775999779525, + "learning_rate": 4.173887458606767e-06, + "loss": 0.3074, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010395625606179237, - "step": 1049, - "valid_targets_mean": 4051.7, - "valid_targets_min": 1243 + "loss_rank_avg": 0.11682070791721344, + "step": 4010, + "valid_targets_mean": 3668.7, + "valid_targets_min": 1126 }, { - "epoch": 4.251139240506329, - "grad_norm": 0.24862472182244583, - "learning_rate": 2.703104292151164e-06, - "loss": 0.1106, + "epoch": 4.06379746835443, + "grad_norm": 0.2770166025598625, + "learning_rate": 4.130782654050283e-06, + "loss": 0.3007, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01229317206889391, - "step": 1050, - "valid_targets_mean": 3529.8, - "valid_targets_min": 1096 + "loss_rank_avg": 0.15404465794563293, + "step": 4015, + "valid_targets_mean": 5586.6, + "valid_targets_min": 1272 }, { - "epoch": 4.255189873417722, - "grad_norm": 0.2349536501847763, - "learning_rate": 2.6747809877527607e-06, - "loss": 0.1164, + "epoch": 4.0688607594936705, + "grad_norm": 0.27479033637998745, + "learning_rate": 4.087875936456937e-06, + "loss": 0.2758, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01483272947371006, - "step": 1051, - "valid_targets_mean": 4799.9, - "valid_targets_min": 1106 + "loss_rank_avg": 0.12938356399536133, + "step": 4020, + "valid_targets_mean": 5243.8, + "valid_targets_min": 1267 }, { - "epoch": 4.259240506329114, - "grad_norm": 0.23067383757803828, - "learning_rate": 2.6465962153439082e-06, - "loss": 0.106, + "epoch": 4.073924050632911, + "grad_norm": 0.31378983164450186, + "learning_rate": 4.045167841408359e-06, + "loss": 0.3017, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012178324162960052, - "step": 1052, - "valid_targets_mean": 4971.4, - "valid_targets_min": 1405 + "loss_rank_avg": 0.15723198652267456, + "step": 4025, + "valid_targets_mean": 4392.6, + "valid_targets_min": 1232 }, { - "epoch": 4.263291139240506, - "grad_norm": 0.2658122118801973, - "learning_rate": 2.618550200289303e-06, - "loss": 0.1074, + "epoch": 4.078987341772152, + "grad_norm": 0.29002739575691505, + "learning_rate": 4.002658902006893e-06, + "loss": 0.2859, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012370210140943527, - "step": 1053, - "valid_targets_mean": 4161.8, - "valid_targets_min": 1260 + "loss_rank_avg": 0.15544429421424866, + "step": 4030, + "valid_targets_mean": 5350.4, + "valid_targets_min": 909 }, { - "epoch": 4.267341772151899, - "grad_norm": 0.2385990623874725, - "learning_rate": 2.59064316684412e-06, - "loss": 0.1074, + "epoch": 4.084050632911392, + "grad_norm": 0.3087847959185058, + "learning_rate": 3.9603496488689174e-06, + "loss": 0.2968, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010678352788090706, - "step": 1054, - "valid_targets_mean": 3915.4, - "valid_targets_min": 1049 + "loss_rank_avg": 0.16527938842773438, + "step": 4035, + "valid_targets_mean": 5526.1, + "valid_targets_min": 1129 }, { - "epoch": 4.271392405063291, - "grad_norm": 0.24493657061517968, - "learning_rate": 2.562875338152264e-06, - "loss": 0.1121, + "epoch": 4.089113924050633, + "grad_norm": 0.3085791674780826, + "learning_rate": 3.918240610118234e-06, + "loss": 0.2936, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011886542662978172, - "step": 1055, - "valid_targets_mean": 4704.1, - "valid_targets_min": 1066 + "loss_rank_avg": 0.1917693018913269, + "step": 4040, + "valid_targets_mean": 6275.9, + "valid_targets_min": 1748 }, { - "epoch": 4.275443037974684, - "grad_norm": 0.24932341757544677, - "learning_rate": 2.5352469362445552e-06, - "loss": 0.1162, + "epoch": 4.094177215189873, + "grad_norm": 0.286080067957159, + "learning_rate": 3.876332311379489e-06, + "loss": 0.2979, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014495152980089188, - "step": 1056, - "valid_targets_mean": 4415.4, - "valid_targets_min": 1253 + "loss_rank_avg": 0.1519317775964737, + "step": 4045, + "valid_targets_mean": 5526.4, + "valid_targets_min": 1127 }, { - "epoch": 4.279493670886076, - "grad_norm": 0.23423551396029124, - "learning_rate": 2.5077581820369523e-06, - "loss": 0.1083, + "epoch": 4.099240506329114, + "grad_norm": 0.2837484355272842, + "learning_rate": 3.834625275771582e-06, + "loss": 0.2926, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01119888387620449, - "step": 1057, - "valid_targets_mean": 3970.0, - "valid_targets_min": 1026 + "loss_rank_avg": 0.123206228017807, + "step": 4050, + "valid_targets_mean": 4113.7, + "valid_targets_min": 1252 }, { - "epoch": 4.283544303797468, - "grad_norm": 0.23860357313003533, - "learning_rate": 2.480409295328803e-06, - "loss": 0.1034, + "epoch": 4.104303797468354, + "grad_norm": 0.2999251731420047, + "learning_rate": 3.7931200239011643e-06, + "loss": 0.2945, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010069448500871658, - "step": 1058, - "valid_targets_mean": 3870.6, - "valid_targets_min": 1099 + "loss_rank_avg": 0.13318127393722534, + "step": 4055, + "valid_targets_mean": 4568.8, + "valid_targets_min": 1236 }, { - "epoch": 4.287594936708861, - "grad_norm": 0.24458807065991509, - "learning_rate": 2.45320049480108e-06, - "loss": 0.1099, + "epoch": 4.109367088607595, + "grad_norm": 0.2816382941308749, + "learning_rate": 3.7518170738561168e-06, + "loss": 0.2923, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015278888866305351, - "step": 1059, - "valid_targets_mean": 4747.9, - "valid_targets_min": 1143 + "loss_rank_avg": 0.14610666036605835, + "step": 4060, + "valid_targets_mean": 4928.9, + "valid_targets_min": 826 }, { - "epoch": 4.291645569620253, - "grad_norm": 0.23154765850425468, - "learning_rate": 2.4261319980146293e-06, - "loss": 0.1095, + "epoch": 4.114430379746835, + "grad_norm": 0.32554297974189644, + "learning_rate": 3.7107169411991127e-06, + "loss": 0.3074, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01711427979171276, - "step": 1060, - "valid_targets_mean": 5362.8, - "valid_targets_min": 1402 + "loss_rank_avg": 0.12090884894132614, + "step": 4065, + "valid_targets_mean": 3398.6, + "valid_targets_min": 1385 }, { - "epoch": 4.295696202531645, - "grad_norm": 0.2348947040709094, - "learning_rate": 2.399204021408421e-06, - "loss": 0.1048, + "epoch": 4.119493670886076, + "grad_norm": 0.3170024428197897, + "learning_rate": 3.6698201389611423e-06, + "loss": 0.2839, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017931368201971054, - "step": 1061, - "valid_targets_mean": 5377.5, - "valid_targets_min": 1164 + "loss_rank_avg": 0.13328410685062408, + "step": 4070, + "valid_targets_mean": 4025.0, + "valid_targets_min": 901 }, { - "epoch": 4.299746835443038, - "grad_norm": 0.24187219348589478, - "learning_rate": 2.3724167802978395e-06, - "loss": 0.1144, + "epoch": 4.124556962025316, + "grad_norm": 0.3205485924763668, + "learning_rate": 3.62912717763515e-06, + "loss": 0.3124, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009700765833258629, - "step": 1062, - "valid_targets_mean": 3637.3, - "valid_targets_min": 1719 + "loss_rank_avg": 0.21576707065105438, + "step": 4075, + "valid_targets_mean": 6120.9, + "valid_targets_min": 1421 }, { - "epoch": 4.30379746835443, - "grad_norm": 0.24712355847455172, - "learning_rate": 2.3457704888729625e-06, - "loss": 0.1034, + "epoch": 4.129620253164557, + "grad_norm": 0.38654381765739637, + "learning_rate": 3.5886385651696267e-06, + "loss": 0.287, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011837814934551716, - "step": 1063, - "valid_targets_mean": 4285.8, - "valid_targets_min": 1364 + "loss_rank_avg": 0.14297889173030853, + "step": 4080, + "valid_targets_mean": 4917.6, + "valid_targets_min": 1098 }, { - "epoch": 4.307848101265823, - "grad_norm": 0.25482267306274325, - "learning_rate": 2.3192653601968162e-06, - "loss": 0.1087, + "epoch": 4.1346835443037975, + "grad_norm": 0.3524477589713149, + "learning_rate": 3.5483548069623043e-06, + "loss": 0.3032, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017915062606334686, - "step": 1064, - "valid_targets_mean": 4704.2, - "valid_targets_min": 1391 + "loss_rank_avg": 0.14278902113437653, + "step": 4085, + "valid_targets_mean": 3844.8, + "valid_targets_min": 1260 }, { - "epoch": 4.311898734177215, - "grad_norm": 0.2491622467734568, - "learning_rate": 2.2929016062037057e-06, - "loss": 0.1082, + "epoch": 4.139746835443038, + "grad_norm": 0.3197809913122213, + "learning_rate": 3.5082764058538056e-06, + "loss": 0.2891, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009641005657613277, - "step": 1065, - "valid_targets_mean": 2723.6, - "valid_targets_min": 915 + "loss_rank_avg": 0.12523522973060608, + "step": 4090, + "valid_targets_mean": 3690.5, + "valid_targets_min": 885 }, { - "epoch": 4.315949367088607, - "grad_norm": 0.23291800635203916, - "learning_rate": 2.2666794376975055e-06, - "loss": 0.1099, + "epoch": 4.1448101265822785, + "grad_norm": 0.30886469526940097, + "learning_rate": 3.4684038621214124e-06, + "loss": 0.2829, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010794535279273987, - "step": 1066, - "valid_targets_mean": 4469.6, - "valid_targets_min": 1237 + "loss_rank_avg": 0.16583015024662018, + "step": 4095, + "valid_targets_mean": 5001.4, + "valid_targets_min": 1243 }, { - "epoch": 4.32, - "grad_norm": 0.24684772398855878, - "learning_rate": 2.24059906434998e-06, - "loss": 0.1075, + "epoch": 4.149873417721519, + "grad_norm": 0.28226754473975985, + "learning_rate": 3.4287376734727817e-06, + "loss": 0.2931, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015624083578586578, - "step": 1067, - "valid_targets_mean": 5676.4, - "valid_targets_min": 1377 + "loss_rank_avg": 0.16582466661930084, + "step": 4100, + "valid_targets_mean": 5544.1, + "valid_targets_min": 1022 }, { - "epoch": 4.324050632911392, - "grad_norm": 0.24639223893807155, - "learning_rate": 2.214660694699091e-06, - "loss": 0.1178, + "epoch": 4.15493670886076, + "grad_norm": 0.3051843817153008, + "learning_rate": 3.3892783350397675e-06, + "loss": 0.2876, "loss_nan_ranks": 0, - "loss_rank_avg": 0.021386027336120605, - "step": 1068, - "valid_targets_mean": 6623.7, - "valid_targets_min": 1466 + "loss_rank_avg": 0.11235642433166504, + "step": 4105, + "valid_targets_mean": 3728.7, + "valid_targets_min": 1019 }, { - "epoch": 4.328101265822784, - "grad_norm": 0.25355308895164, - "learning_rate": 2.188864536147355e-06, - "loss": 0.1101, + "epoch": 4.16, + "grad_norm": 0.28561421446674784, + "learning_rate": 3.3500263393722033e-06, + "loss": 0.2821, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012787914834916592, - "step": 1069, - "valid_targets_mean": 3562.9, - "valid_targets_min": 1581 + "loss_rank_avg": 0.14730824530124664, + "step": 4110, + "valid_targets_mean": 5325.3, + "valid_targets_min": 1247 }, { - "epoch": 4.332151898734177, - "grad_norm": 0.2438523661180247, - "learning_rate": 2.1632107949601757e-06, - "loss": 0.1073, + "epoch": 4.165063291139241, + "grad_norm": 0.3396963845435047, + "learning_rate": 3.310982176431785e-06, + "loss": 0.285, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013902321457862854, - "step": 1070, - "valid_targets_mean": 4379.1, - "valid_targets_min": 1419 + "loss_rank_avg": 0.1362554281949997, + "step": 4115, + "valid_targets_mean": 4046.4, + "valid_targets_min": 974 }, { - "epoch": 4.3362025316455695, - "grad_norm": 0.23567280343654579, - "learning_rate": 2.137699676264171e-06, - "loss": 0.1144, + "epoch": 4.170126582278481, + "grad_norm": 0.3604026125994517, + "learning_rate": 3.2721463335859484e-06, + "loss": 0.2991, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012037249282002449, - "step": 1071, - "valid_targets_mean": 4784.9, - "valid_targets_min": 1578 + "loss_rank_avg": 0.12034134566783905, + "step": 4120, + "valid_targets_mean": 2845.9, + "valid_targets_min": 1453 }, { - "epoch": 4.340253164556962, - "grad_norm": 0.2510640878527952, - "learning_rate": 2.1123313840455738e-06, - "loss": 0.1082, + "epoch": 4.175189873417722, + "grad_norm": 0.3047812123169994, + "learning_rate": 3.2335192956017634e-06, + "loss": 0.2972, "loss_nan_ranks": 0, - "loss_rank_avg": 0.008996009826660156, - "step": 1072, - "valid_targets_mean": 3449.2, - "valid_targets_min": 1279 + "loss_rank_avg": 0.1503809243440628, + "step": 4125, + "valid_targets_mean": 4877.8, + "valid_targets_min": 1783 }, { - "epoch": 4.3443037974683545, - "grad_norm": 0.25292754399485057, - "learning_rate": 2.08710612114857e-06, - "loss": 0.1068, + "epoch": 4.180253164556962, + "grad_norm": 0.36835014889244666, + "learning_rate": 3.1951015446399247e-06, + "loss": 0.2942, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010600311681628227, - "step": 1073, - "valid_targets_mean": 3625.1, - "valid_targets_min": 1295 + "loss_rank_avg": 0.11517144739627838, + "step": 4130, + "valid_targets_mean": 2904.6, + "valid_targets_min": 909 }, { - "epoch": 4.348354430379747, - "grad_norm": 0.23233271567825875, - "learning_rate": 2.0620240892736953e-06, - "loss": 0.1151, + "epoch": 4.185316455696203, + "grad_norm": 0.32918669737266704, + "learning_rate": 3.156893560248688e-06, + "loss": 0.3031, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01687842421233654, - "step": 1074, - "valid_targets_mean": 5275.1, - "valid_targets_min": 1080 + "loss_rank_avg": 0.140342578291893, + "step": 4135, + "valid_targets_mean": 4083.8, + "valid_targets_min": 1081 }, { - "epoch": 4.3524050632911395, - "grad_norm": 0.23516768581742103, - "learning_rate": 2.0370854889762e-06, - "loss": 0.1074, + "epoch": 4.190379746835443, + "grad_norm": 0.33957836205604514, + "learning_rate": 3.118895819357908e-06, + "loss": 0.2808, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014397460035979748, - "step": 1075, - "valid_targets_mean": 4645.4, - "valid_targets_min": 1009 + "loss_rank_avg": 0.1507326364517212, + "step": 4140, + "valid_targets_mean": 4652.1, + "valid_targets_min": 1298 }, { - "epoch": 4.356455696202532, - "grad_norm": 0.2407611209148767, - "learning_rate": 2.0122905196644726e-06, - "loss": 0.0982, + "epoch": 4.195443037974684, + "grad_norm": 0.285494983139607, + "learning_rate": 3.081108796273098e-06, + "loss": 0.301, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009776914492249489, - "step": 1076, - "valid_targets_mean": 3878.9, - "valid_targets_min": 1328 + "loss_rank_avg": 0.12487950921058655, + "step": 4145, + "valid_targets_mean": 4737.4, + "valid_targets_min": 1243 }, { - "epoch": 4.360506329113924, - "grad_norm": 0.24693246517008668, - "learning_rate": 1.987639379598427e-06, - "loss": 0.1074, + "epoch": 4.200506329113924, + "grad_norm": 0.32355832904009185, + "learning_rate": 3.0435329626694733e-06, + "loss": 0.3044, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019608918577432632, - "step": 1077, - "valid_targets_mean": 5656.2, - "valid_targets_min": 1097 + "loss_rank_avg": 0.18284101784229279, + "step": 4150, + "valid_targets_mean": 4946.2, + "valid_targets_min": 1066 }, { - "epoch": 4.364556962025317, - "grad_norm": 0.23917342823384077, - "learning_rate": 1.963132265887919e-06, - "loss": 0.1186, + "epoch": 4.205569620253165, + "grad_norm": 0.2901811873935047, + "learning_rate": 3.006168787586097e-06, + "loss": 0.2793, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015440532006323338, - "step": 1078, - "valid_targets_mean": 4235.8, - "valid_targets_min": 1298 + "loss_rank_avg": 0.13859279453754425, + "step": 4155, + "valid_targets_mean": 5316.1, + "valid_targets_min": 1099 }, { - "epoch": 4.368607594936709, - "grad_norm": 0.2476402893022485, - "learning_rate": 1.9387693744911784e-06, - "loss": 0.1148, + "epoch": 4.2106329113924055, + "grad_norm": 0.5190123284320625, + "learning_rate": 2.9690167374200075e-06, + "loss": 0.2972, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0155026875436306, - "step": 1079, - "valid_targets_mean": 4751.9, - "valid_targets_min": 1181 + "loss_rank_avg": 0.1444871723651886, + "step": 4160, + "valid_targets_mean": 4388.6, + "valid_targets_min": 1162 }, { - "epoch": 4.372658227848102, - "grad_norm": 0.24046448814498173, - "learning_rate": 1.9145509002132368e-06, - "loss": 0.1149, + "epoch": 4.215696202531646, + "grad_norm": 0.31722642981901594, + "learning_rate": 2.9320772759203975e-06, + "loss": 0.2988, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0111662857234478, - "step": 1080, - "valid_targets_mean": 3625.0, - "valid_targets_min": 1172 + "loss_rank_avg": 0.13206876814365387, + "step": 4165, + "valid_targets_mean": 4130.3, + "valid_targets_min": 1281 }, { - "epoch": 4.376708860759494, - "grad_norm": 0.2508759488943957, - "learning_rate": 1.8904770367043723e-06, - "loss": 0.1161, + "epoch": 4.220759493670886, + "grad_norm": 0.30916942149206356, + "learning_rate": 2.8953508641828375e-06, + "loss": 0.3024, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015941239893436432, - "step": 1081, - "valid_targets_mean": 4548.8, - "valid_targets_min": 897 + "loss_rank_avg": 0.1427369862794876, + "step": 4170, + "valid_targets_mean": 4538.5, + "valid_targets_min": 1313 }, { - "epoch": 4.380759493670886, - "grad_norm": 0.2417593143201865, - "learning_rate": 1.8665479764585504e-06, - "loss": 0.1192, + "epoch": 4.225822784810126, + "grad_norm": 0.3075492109138346, + "learning_rate": 2.858837960643499e-06, + "loss": 0.3005, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011951446533203125, - "step": 1082, - "valid_targets_mean": 4621.5, - "valid_targets_min": 1320 + "loss_rank_avg": 0.17169958353042603, + "step": 4175, + "valid_targets_mean": 4840.8, + "valid_targets_min": 1296 }, { - "epoch": 4.384810126582279, - "grad_norm": 0.24355761102946463, - "learning_rate": 1.8427639108118977e-06, - "loss": 0.1137, + "epoch": 4.230886075949367, + "grad_norm": 0.29933826061321767, + "learning_rate": 2.822539021073463e-06, + "loss": 0.3067, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014804190024733543, - "step": 1083, - "valid_targets_mean": 4951.3, - "valid_targets_min": 1288 + "loss_rank_avg": 0.16137537360191345, + "step": 4180, + "valid_targets_mean": 5634.3, + "valid_targets_min": 1130 }, { - "epoch": 4.388860759493671, - "grad_norm": 0.24633384198377473, - "learning_rate": 1.8191250299411755e-06, - "loss": 0.1064, + "epoch": 4.235949367088607, + "grad_norm": 0.29690888264127147, + "learning_rate": 2.786454498572997e-06, + "loss": 0.2857, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015205018222332, - "step": 1084, - "valid_targets_mean": 4959.1, - "valid_targets_min": 1053 + "loss_rank_avg": 0.135961651802063, + "step": 4185, + "valid_targets_mean": 4399.6, + "valid_targets_min": 732 }, { - "epoch": 4.392911392405063, - "grad_norm": 0.24678792381766704, - "learning_rate": 1.7956315228622446e-06, - "loss": 0.1035, + "epoch": 4.241012658227848, + "grad_norm": 0.3246955608047686, + "learning_rate": 2.7505848435659156e-06, + "loss": 0.3044, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010629641823470592, - "step": 1085, - "valid_targets_mean": 3940.3, - "valid_targets_min": 1206 + "loss_rank_avg": 0.1551479548215866, + "step": 4190, + "valid_targets_mean": 4534.1, + "valid_targets_min": 1082 }, { - "epoch": 4.396962025316456, - "grad_norm": 0.23023111044490813, - "learning_rate": 1.7722835774285553e-06, - "loss": 0.1138, + "epoch": 4.246075949367088, + "grad_norm": 0.29829478040788515, + "learning_rate": 2.7149305037939687e-06, + "loss": 0.2932, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01395136397331953, - "step": 1086, - "valid_targets_mean": 5731.8, - "valid_targets_min": 1487 + "loss_rank_avg": 0.18227550387382507, + "step": 4195, + "valid_targets_mean": 5650.3, + "valid_targets_min": 1359 }, { - "epoch": 4.401012658227848, - "grad_norm": 0.2254694696708648, - "learning_rate": 1.7490813803296624e-06, - "loss": 0.1074, + "epoch": 4.251139240506329, + "grad_norm": 0.3491396172417171, + "learning_rate": 2.679491924311226e-06, + "loss": 0.2841, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018306240439414978, - "step": 1087, - "valid_targets_mean": 6387.6, - "valid_targets_min": 1149 + "loss_rank_avg": 0.13989317417144775, + "step": 4200, + "valid_targets_mean": 3527.8, + "valid_targets_min": 1094 }, { - "epoch": 4.405063291139241, - "grad_norm": 0.23602745246431867, - "learning_rate": 1.726025117089718e-06, - "loss": 0.1092, + "epoch": 4.256202531645569, + "grad_norm": 0.3062465471975927, + "learning_rate": 2.644269547478555e-06, + "loss": 0.304, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01539086364209652, - "step": 1088, - "valid_targets_mean": 5928.7, - "valid_targets_min": 981 + "loss_rank_avg": 0.16212749481201172, + "step": 4205, + "valid_targets_mean": 4856.2, + "valid_targets_min": 850 }, { - "epoch": 4.409113924050633, - "grad_norm": 0.24677515872589578, - "learning_rate": 1.70311497206598e-06, - "loss": 0.1107, + "epoch": 4.26126582278481, + "grad_norm": 0.31154484975672075, + "learning_rate": 2.609263812958065e-06, + "loss": 0.2934, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01074039563536644, - "step": 1089, - "valid_targets_mean": 3698.3, - "valid_targets_min": 1212 + "loss_rank_avg": 0.10032152384519577, + "step": 4210, + "valid_targets_mean": 3391.6, + "valid_targets_min": 1245 }, { - "epoch": 4.413164556962025, - "grad_norm": 0.24193020498990295, - "learning_rate": 1.6803511284473638e-06, - "loss": 0.1034, + "epoch": 4.2663291139240505, + "grad_norm": 0.3276187265653155, + "learning_rate": 2.5744751577076343e-06, + "loss": 0.2924, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012303371913731098, - "step": 1090, - "valid_targets_mean": 3728.2, - "valid_targets_min": 1434 + "loss_rank_avg": 0.16798563301563263, + "step": 4215, + "valid_targets_mean": 4301.5, + "valid_targets_min": 828 }, { - "epoch": 4.417215189873418, - "grad_norm": 0.25697776287635726, - "learning_rate": 1.6577337682529538e-06, - "loss": 0.1151, + "epoch": 4.271392405063291, + "grad_norm": 0.31771678562930245, + "learning_rate": 2.539904015975476e-06, + "loss": 0.2932, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015065351501107216, - "step": 1091, - "valid_targets_mean": 4298.5, - "valid_targets_min": 1137 + "loss_rank_avg": 0.14239826798439026, + "step": 4220, + "valid_targets_mean": 4702.1, + "valid_targets_min": 1064 }, { - "epoch": 4.42126582278481, - "grad_norm": 0.24763499135416595, - "learning_rate": 1.6352630723305617e-06, - "loss": 0.1077, + "epoch": 4.2764556962025315, + "grad_norm": 0.2979983224769533, + "learning_rate": 2.5055508192946732e-06, + "loss": 0.3026, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017283417284488678, - "step": 1092, - "valid_targets_mean": 4962.7, - "valid_targets_min": 1041 + "loss_rank_avg": 0.15499252080917358, + "step": 4225, + "valid_targets_mean": 4931.4, + "valid_targets_min": 1420 }, { - "epoch": 4.425316455696202, - "grad_norm": 0.23550719317841343, - "learning_rate": 1.612939220355272e-06, - "loss": 0.1128, + "epoch": 4.281518987341772, + "grad_norm": 0.3378145372579719, + "learning_rate": 2.4714159964778394e-06, + "loss": 0.2766, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014940792694687843, - "step": 1093, - "valid_targets_mean": 4944.0, - "valid_targets_min": 1344 + "loss_rank_avg": 0.15219071507453918, + "step": 4230, + "valid_targets_mean": 4131.9, + "valid_targets_min": 1088 }, { - "epoch": 4.429367088607595, - "grad_norm": 0.23589340772446932, - "learning_rate": 1.5907623908280112e-06, - "loss": 0.1061, + "epoch": 4.286582278481013, + "grad_norm": 0.3132928788106069, + "learning_rate": 2.437499973611728e-06, + "loss": 0.2861, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015425451099872589, - "step": 1094, - "valid_targets_mean": 5052.8, - "valid_targets_min": 1142 + "loss_rank_avg": 0.17868193984031677, + "step": 4235, + "valid_targets_mean": 5772.8, + "valid_targets_min": 1773 }, { - "epoch": 4.433417721518987, - "grad_norm": 0.2354380195701554, - "learning_rate": 1.568732761074121e-06, - "loss": 0.1081, + "epoch": 4.291645569620253, + "grad_norm": 0.3049108704834252, + "learning_rate": 2.403803174051933e-06, + "loss": 0.2997, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010928714647889137, - "step": 1095, - "valid_targets_mean": 4296.2, - "valid_targets_min": 1321 + "loss_rank_avg": 0.17676058411598206, + "step": 4240, + "valid_targets_mean": 5360.8, + "valid_targets_min": 1400 }, { - "epoch": 4.43746835443038, - "grad_norm": 0.23904734455838542, - "learning_rate": 1.5468505072419237e-06, - "loss": 0.1122, + "epoch": 4.296708860759494, + "grad_norm": 0.2674881690807918, + "learning_rate": 2.3703260184176103e-06, + "loss": 0.2849, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018300985917448997, - "step": 1096, - "valid_targets_mean": 5111.7, - "valid_targets_min": 1443 + "loss_rank_avg": 0.11438636481761932, + "step": 4245, + "valid_targets_mean": 4663.1, + "valid_targets_min": 991 }, { - "epoch": 4.441518987341772, - "grad_norm": 0.24199872653993765, - "learning_rate": 1.5251158043013448e-06, - "loss": 0.1105, + "epoch": 4.301772151898734, + "grad_norm": 0.3381875004506121, + "learning_rate": 2.337068924586203e-06, + "loss": 0.3034, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01229251828044653, - "step": 1097, - "valid_targets_mean": 5098.9, - "valid_targets_min": 515 + "loss_rank_avg": 0.14016669988632202, + "step": 4250, + "valid_targets_mean": 3257.6, + "valid_targets_min": 973 }, { - "epoch": 4.445569620253164, - "grad_norm": 0.23200296960339453, - "learning_rate": 1.5035288260424885e-06, - "loss": 0.1035, + "epoch": 4.306835443037975, + "grad_norm": 0.33566799216440724, + "learning_rate": 2.3040323076882578e-06, + "loss": 0.2852, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010072460398077965, - "step": 1098, - "valid_targets_mean": 3966.8, - "valid_targets_min": 868 + "loss_rank_avg": 0.1721951961517334, + "step": 4255, + "valid_targets_mean": 4602.8, + "valid_targets_min": 1312 }, { - "epoch": 4.449620253164557, - "grad_norm": 0.2462493968454483, - "learning_rate": 1.4820897450742623e-06, - "loss": 0.108, + "epoch": 4.311898734177215, + "grad_norm": 0.39194447138918637, + "learning_rate": 2.2712165801022046e-06, + "loss": 0.2997, "loss_nan_ranks": 0, - "loss_rank_avg": 0.00930652767419815, - "step": 1099, - "valid_targets_mean": 2841.6, - "valid_targets_min": 1123 + "loss_rank_avg": 0.1366182416677475, + "step": 4260, + "valid_targets_mean": 2721.6, + "valid_targets_min": 913 }, { - "epoch": 4.453670886075949, - "grad_norm": 0.23785633041846221, - "learning_rate": 1.4607987328229767e-06, - "loss": 0.1132, + "epoch": 4.316962025316456, + "grad_norm": 0.3637100172839474, + "learning_rate": 2.2386221514492502e-06, + "loss": 0.2911, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01593669131398201, - "step": 1100, - "valid_targets_mean": 5572.1, - "valid_targets_min": 1144 + "loss_rank_avg": 0.11843443661928177, + "step": 4265, + "valid_targets_mean": 3167.0, + "valid_targets_min": 1022 }, { - "epoch": 4.457721518987341, - "grad_norm": 0.25294580176951953, - "learning_rate": 1.4396559595310056e-06, - "loss": 0.1105, + "epoch": 4.322025316455696, + "grad_norm": 0.32727869850436964, + "learning_rate": 2.2062494285882363e-06, + "loss": 0.3013, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01751577854156494, - "step": 1101, - "valid_targets_mean": 5425.2, - "valid_targets_min": 1279 + "loss_rank_avg": 0.1746322512626648, + "step": 4270, + "valid_targets_mean": 4557.8, + "valid_targets_min": 939 }, { - "epoch": 4.461772151898734, - "grad_norm": 0.2258972905675386, - "learning_rate": 1.4186615942553973e-06, - "loss": 0.1104, + "epoch": 4.327088607594937, + "grad_norm": 0.35219909889872136, + "learning_rate": 2.1740988156105593e-06, + "loss": 0.2937, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01287138182669878, - "step": 1102, - "valid_targets_mean": 4567.1, - "valid_targets_min": 1567 + "loss_rank_avg": 0.08915197849273682, + "step": 4275, + "valid_targets_mean": 2497.8, + "valid_targets_min": 1172 }, { - "epoch": 4.465822784810126, - "grad_norm": 0.26232752630138473, - "learning_rate": 1.3978158048665424e-06, - "loss": 0.1128, + "epoch": 4.332151898734177, + "grad_norm": 0.3312024847124787, + "learning_rate": 2.142170713835161e-06, + "loss": 0.2925, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015547772869467735, - "step": 1103, - "valid_targets_mean": 4495.8, - "valid_targets_min": 1144 + "loss_rank_avg": 0.16628465056419373, + "step": 4280, + "valid_targets_mean": 4481.7, + "valid_targets_min": 1417 }, { - "epoch": 4.469873417721519, - "grad_norm": 0.23988110774615404, - "learning_rate": 1.3771187580468115e-06, - "loss": 0.1108, + "epoch": 4.337215189873418, + "grad_norm": 0.26123810507474954, + "learning_rate": 2.1104655218034685e-06, + "loss": 0.2875, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012048361822962761, - "step": 1104, - "valid_targets_mean": 4554.2, - "valid_targets_min": 1062 + "loss_rank_avg": 0.136556938290596, + "step": 4285, + "valid_targets_mean": 5086.1, + "valid_targets_min": 1214 }, { - "epoch": 4.473924050632911, - "grad_norm": 0.23875743605498156, - "learning_rate": 1.3565706192892392e-06, - "loss": 0.1098, + "epoch": 4.3422784810126585, + "grad_norm": 0.28913080019288895, + "learning_rate": 2.0789836352744653e-06, + "loss": 0.3025, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012100529856979847, - "step": 1105, - "valid_targets_mean": 4961.4, - "valid_targets_min": 1118 + "loss_rank_avg": 0.1929275244474411, + "step": 4290, + "valid_targets_mean": 6718.0, + "valid_targets_min": 666 }, { - "epoch": 4.4779746835443035, - "grad_norm": 0.22977235459445125, - "learning_rate": 1.3361715528961993e-06, - "loss": 0.1057, + "epoch": 4.347341772151899, + "grad_norm": 0.2856092567973034, + "learning_rate": 2.0477254472197237e-06, + "loss": 0.2857, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01663079485297203, - "step": 1106, - "valid_targets_mean": 5048.4, - "valid_targets_min": 1415 + "loss_rank_avg": 0.12290873378515244, + "step": 4295, + "valid_targets_mean": 4296.3, + "valid_targets_min": 828 }, { - "epoch": 4.4820253164556965, - "grad_norm": 0.23543453318246152, - "learning_rate": 1.3159217219780817e-06, - "loss": 0.1098, + "epoch": 4.3524050632911395, + "grad_norm": 0.376756316497093, + "learning_rate": 2.0166913478185004e-06, + "loss": 0.2969, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013302445411682129, - "step": 1107, - "valid_targets_mean": 4229.1, - "valid_targets_min": 1511 + "loss_rank_avg": 0.1333305984735489, + "step": 4300, + "valid_targets_mean": 4643.4, + "valid_targets_min": 1007 }, { - "epoch": 4.4860759493670885, - "grad_norm": 0.25576008740685763, - "learning_rate": 1.2958212884519949e-06, - "loss": 0.1151, + "epoch": 4.35746835443038, + "grad_norm": 0.37269967571373286, + "learning_rate": 1.9858817244528896e-06, + "loss": 0.2771, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015795301645994186, - "step": 1108, - "valid_targets_mean": 5122.8, - "valid_targets_min": 1139 + "loss_rank_avg": 0.16167780756950378, + "step": 4305, + "valid_targets_mean": 3327.0, + "valid_targets_min": 1248 }, { - "epoch": 4.490126582278481, - "grad_norm": 0.23384858874585748, - "learning_rate": 1.2758704130404675e-06, - "loss": 0.1037, + "epoch": 4.362531645569621, + "grad_norm": 0.36507367284654024, + "learning_rate": 1.955296961702955e-06, + "loss": 0.2891, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014064125716686249, - "step": 1109, - "valid_targets_mean": 4712.9, - "valid_targets_min": 741 + "loss_rank_avg": 0.1149420514702797, + "step": 4310, + "valid_targets_mean": 3484.9, + "valid_targets_min": 1218 }, { - "epoch": 4.494177215189874, - "grad_norm": 0.23135518159291688, - "learning_rate": 1.2560692552701738e-06, - "loss": 0.1071, + "epoch": 4.367594936708861, + "grad_norm": 0.3549138995526032, + "learning_rate": 1.9249374413419584e-06, + "loss": 0.3209, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009383934549987316, - "step": 1110, - "valid_targets_mean": 3435.5, - "valid_targets_min": 1257 + "loss_rank_avg": 0.16552500426769257, + "step": 4315, + "valid_targets_mean": 4319.2, + "valid_targets_min": 1166 }, { - "epoch": 4.498227848101266, - "grad_norm": 0.23853269674283112, - "learning_rate": 1.2364179734706382e-06, - "loss": 0.1076, + "epoch": 4.372658227848102, + "grad_norm": 0.30961509363434114, + "learning_rate": 1.894803542331567e-06, + "loss": 0.2936, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013448908925056458, - "step": 1111, - "valid_targets_mean": 4762.3, - "valid_targets_min": 1467 + "loss_rank_avg": 0.12911680340766907, + "step": 4320, + "valid_targets_mean": 3623.0, + "valid_targets_min": 1170 }, { - "epoch": 4.502278481012659, - "grad_norm": 0.2469096524547987, - "learning_rate": 1.2169167247729852e-06, - "loss": 0.1109, + "epoch": 4.377721518987342, + "grad_norm": 0.30219606387390835, + "learning_rate": 1.8648956408171547e-06, + "loss": 0.2973, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011553686112165451, - "step": 1112, - "valid_targets_mean": 3743.1, - "valid_targets_min": 1108 + "loss_rank_avg": 0.1574065387248993, + "step": 4325, + "valid_targets_mean": 4955.6, + "valid_targets_min": 1679 }, { - "epoch": 4.506329113924051, - "grad_norm": 0.2424742627143951, - "learning_rate": 1.1975656651086864e-06, - "loss": 0.1123, + "epoch": 4.382784810126582, + "grad_norm": 0.3279169523694041, + "learning_rate": 1.8352141101230758e-06, + "loss": 0.2983, "loss_nan_ranks": 0, - "loss_rank_avg": 0.019136445596814156, - "step": 1113, - "valid_targets_mean": 5195.6, - "valid_targets_min": 1037 + "loss_rank_avg": 0.1483180820941925, + "step": 4330, + "valid_targets_mean": 3969.1, + "valid_targets_min": 1428 }, { - "epoch": 4.510379746835443, - "grad_norm": 0.25201664133307256, - "learning_rate": 1.1783649492082926e-06, - "loss": 0.1101, + "epoch": 4.387848101265822, + "grad_norm": 0.3105228186529775, + "learning_rate": 1.8057593207480194e-06, + "loss": 0.2979, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020832877606153488, - "step": 1114, - "valid_targets_mean": 5313.9, - "valid_targets_min": 1113 + "loss_rank_avg": 0.11923258006572723, + "step": 4335, + "valid_targets_mean": 3387.1, + "valid_targets_min": 1009 }, { - "epoch": 4.514430379746836, - "grad_norm": 0.23965971627973182, - "learning_rate": 1.1593147306002183e-06, - "loss": 0.1081, + "epoch": 4.392911392405063, + "grad_norm": 0.33881080054396395, + "learning_rate": 1.7765316403603927e-06, + "loss": 0.2916, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015099755488336086, - "step": 1115, - "valid_targets_mean": 4789.8, - "valid_targets_min": 1380 + "loss_rank_avg": 0.13051745295524597, + "step": 4340, + "valid_targets_mean": 3938.3, + "valid_targets_min": 1204 }, { - "epoch": 4.518481012658228, - "grad_norm": 0.24374472934816327, - "learning_rate": 1.140415161609505e-06, - "loss": 0.1093, + "epoch": 4.397974683544303, + "grad_norm": 0.2859920210081916, + "learning_rate": 1.7475314337937099e-06, + "loss": 0.2971, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01824544370174408, - "step": 1116, - "valid_targets_mean": 5898.2, - "valid_targets_min": 1408 + "loss_rank_avg": 0.14054298400878906, + "step": 4345, + "valid_targets_mean": 4863.8, + "valid_targets_min": 1288 }, { - "epoch": 4.52253164556962, - "grad_norm": 0.22621971438759939, - "learning_rate": 1.121666393356604e-06, - "loss": 0.1067, + "epoch": 4.403037974683544, + "grad_norm": 0.3222088763676915, + "learning_rate": 1.7187590630420681e-06, + "loss": 0.2911, "loss_nan_ranks": 0, - "loss_rank_avg": 0.006982675287872553, - "step": 1117, - "valid_targets_mean": 3357.9, - "valid_targets_min": 1230 + "loss_rank_avg": 0.15554268658161163, + "step": 4350, + "valid_targets_mean": 3944.6, + "valid_targets_min": 1256 }, { - "epoch": 4.526582278481013, - "grad_norm": 0.24277785111288838, - "learning_rate": 1.1030685757561632e-06, - "loss": 0.1105, + "epoch": 4.4081012658227845, + "grad_norm": 0.32147293328088156, + "learning_rate": 1.6902148872555924e-06, + "loss": 0.2983, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014613626524806023, - "step": 1118, - "valid_targets_mean": 4052.4, - "valid_targets_min": 1621 + "loss_rank_avg": 0.13429546356201172, + "step": 4355, + "valid_targets_mean": 3537.0, + "valid_targets_min": 1501 }, { - "epoch": 4.530632911392405, - "grad_norm": 0.24715457555021894, - "learning_rate": 1.0846218575158373e-06, - "loss": 0.1173, + "epoch": 4.413164556962025, + "grad_norm": 0.2873112295172894, + "learning_rate": 1.661899262735991e-06, + "loss": 0.2809, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014865981414914131, - "step": 1119, - "valid_targets_mean": 4405.4, - "valid_targets_min": 1484 + "loss_rank_avg": 0.11225029826164246, + "step": 4360, + "valid_targets_mean": 3726.2, + "valid_targets_min": 1432 }, { - "epoch": 4.534683544303798, - "grad_norm": 0.2350923562542679, - "learning_rate": 1.066326386135097e-06, - "loss": 0.1034, + "epoch": 4.418227848101266, + "grad_norm": 0.3137420905534193, + "learning_rate": 1.6338125429320694e-06, + "loss": 0.3025, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013338493183255196, - "step": 1120, - "valid_targets_mean": 5251.7, - "valid_targets_min": 1338 + "loss_rank_avg": 0.1628667116165161, + "step": 4365, + "valid_targets_mean": 4489.4, + "valid_targets_min": 1251 }, { - "epoch": 4.53873417721519, - "grad_norm": 0.23261782507815473, - "learning_rate": 1.0481823079040398e-06, - "loss": 0.107, + "epoch": 4.423291139240506, + "grad_norm": 0.2980697010922971, + "learning_rate": 1.605955078435355e-06, + "loss": 0.2861, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018727660179138184, - "step": 1121, - "valid_targets_mean": 6229.1, - "valid_targets_min": 1371 + "loss_rank_avg": 0.1740245670080185, + "step": 4370, + "valid_targets_mean": 5941.4, + "valid_targets_min": 949 }, { - "epoch": 4.542784810126582, - "grad_norm": 0.2372217839868558, - "learning_rate": 1.0301897679022344e-06, - "loss": 0.1065, + "epoch": 4.428354430379747, + "grad_norm": 0.29655956780164205, + "learning_rate": 1.5783272169756903e-06, + "loss": 0.2888, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015288677997887135, - "step": 1122, - "valid_targets_mean": 4868.1, - "valid_targets_min": 1328 + "loss_rank_avg": 0.12947620451450348, + "step": 4375, + "valid_targets_mean": 4607.8, + "valid_targets_min": 1670 }, { - "epoch": 4.546835443037975, - "grad_norm": 0.22851145118582236, - "learning_rate": 1.0123489099975491e-06, - "loss": 0.106, + "epoch": 4.433417721518987, + "grad_norm": 0.28159919536576594, + "learning_rate": 1.550929303416917e-06, + "loss": 0.2891, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009553518146276474, - "step": 1123, - "valid_targets_mean": 3740.2, - "valid_targets_min": 1374 + "loss_rank_avg": 0.11871195584535599, + "step": 4380, + "valid_targets_mean": 4622.2, + "valid_targets_min": 1319 }, { - "epoch": 4.550886075949367, - "grad_norm": 0.24789698005851352, - "learning_rate": 9.9465987684501e-07, - "loss": 0.1114, + "epoch": 4.438481012658228, + "grad_norm": 0.3159849176088272, + "learning_rate": 1.5237616797525512e-06, + "loss": 0.2964, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014984434470534325, - "step": 1124, - "valid_targets_mean": 4818.4, - "valid_targets_min": 1139 + "loss_rank_avg": 0.13893471658229828, + "step": 4385, + "valid_targets_mean": 4139.4, + "valid_targets_min": 1377 }, { - "epoch": 4.55493670886076, - "grad_norm": 0.2352459318761055, - "learning_rate": 9.771228098856534e-07, - "loss": 0.1102, + "epoch": 4.443544303797468, + "grad_norm": 0.3103371076544768, + "learning_rate": 1.49682468510153e-06, + "loss": 0.292, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012403848581016064, - "step": 1125, - "valid_targets_mean": 3469.1, - "valid_targets_min": 828 + "loss_rank_avg": 0.17314529418945312, + "step": 4390, + "valid_targets_mean": 5825.6, + "valid_targets_min": 1000 }, { - "epoch": 4.558987341772152, - "grad_norm": 0.23193523725933549, - "learning_rate": 9.597378493454013e-07, - "loss": 0.1083, + "epoch": 4.448607594936709, + "grad_norm": 0.28729653443614084, + "learning_rate": 1.4701186557039648e-06, + "loss": 0.2844, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01025470346212387, - "step": 1126, - "valid_targets_mean": 3658.0, - "valid_targets_min": 985 + "loss_rank_avg": 0.13900840282440186, + "step": 4395, + "valid_targets_mean": 4918.3, + "valid_targets_min": 1337 }, { - "epoch": 4.563037974683544, - "grad_norm": 0.24340338611260912, - "learning_rate": 9.425051342339353e-07, - "loss": 0.1157, + "epoch": 4.453670886075949, + "grad_norm": 0.3411738428471954, + "learning_rate": 1.4436439249169554e-06, + "loss": 0.3038, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011842905543744564, - "step": 1127, - "valid_targets_mean": 4074.4, - "valid_targets_min": 1395 + "loss_rank_avg": 0.13948732614517212, + "step": 4400, + "valid_targets_mean": 5576.6, + "valid_targets_min": 1142 }, { - "epoch": 4.567088607594937, - "grad_norm": 0.23813693989298826, - "learning_rate": 9.254248023435864e-07, - "loss": 0.1064, + "epoch": 4.45873417721519, + "grad_norm": 0.3118105881962252, + "learning_rate": 1.4174008232104285e-06, + "loss": 0.2997, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014363069087266922, - "step": 1128, - "valid_targets_mean": 5131.9, - "valid_targets_min": 1460 + "loss_rank_avg": 0.13870777189731598, + "step": 4405, + "valid_targets_mean": 4355.1, + "valid_targets_min": 1522 }, { - "epoch": 4.571139240506329, - "grad_norm": 0.2314982637199714, - "learning_rate": 9.084969902482288e-07, - "loss": 0.113, + "epoch": 4.46379746835443, + "grad_norm": 0.32978343142383926, + "learning_rate": 1.3913896781629954e-06, + "loss": 0.2975, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017669495195150375, - "step": 1129, - "valid_targets_mean": 5292.9, - "valid_targets_min": 1350 + "loss_rank_avg": 0.13087686896324158, + "step": 4410, + "valid_targets_mean": 3731.3, + "valid_targets_min": 540 }, { - "epoch": 4.575189873417721, - "grad_norm": 0.24205208281186263, - "learning_rate": 8.917218333021993e-07, - "loss": 0.1043, + "epoch": 4.468860759493671, + "grad_norm": 0.2618923931315447, + "learning_rate": 1.3656108144578962e-06, + "loss": 0.2879, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015519630163908005, - "step": 1130, - "valid_targets_mean": 4128.9, - "valid_targets_min": 868 + "loss_rank_avg": 0.13182973861694336, + "step": 4415, + "valid_targets_mean": 4776.1, + "valid_targets_min": 889 }, { - "epoch": 4.579240506329114, - "grad_norm": 0.2261304617829508, - "learning_rate": 8.750994656392109e-07, - "loss": 0.111, + "epoch": 4.473924050632911, + "grad_norm": 0.27864898068700833, + "learning_rate": 1.340064553878908e-06, + "loss": 0.2865, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016634684056043625, - "step": 1131, - "valid_targets_mean": 5589.6, - "valid_targets_min": 1178 + "loss_rank_avg": 0.14037862420082092, + "step": 4420, + "valid_targets_mean": 5322.2, + "valid_targets_min": 1116 }, { - "epoch": 4.583291139240506, - "grad_norm": 0.23952329967135425, - "learning_rate": 8.58630020171265e-07, - "loss": 0.1105, + "epoch": 4.478987341772152, + "grad_norm": 0.2935646123466447, + "learning_rate": 1.3147512153063558e-06, + "loss": 0.2848, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014186002314090729, - "step": 1132, - "valid_targets_mean": 4555.3, - "valid_targets_min": 1872 + "loss_rank_avg": 0.18074341118335724, + "step": 4425, + "valid_targets_mean": 6079.2, + "valid_targets_min": 1329 }, { - "epoch": 4.587341772151898, - "grad_norm": 0.2442885441237708, - "learning_rate": 8.423136285876099e-07, - "loss": 0.1121, + "epoch": 4.4840506329113925, + "grad_norm": 0.34486016144946086, + "learning_rate": 1.289671114713129e-06, + "loss": 0.2996, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012748418375849724, - "step": 1133, - "valid_targets_mean": 4353.7, - "valid_targets_min": 1734 + "loss_rank_avg": 0.1330321580171585, + "step": 4430, + "valid_targets_mean": 3942.1, + "valid_targets_min": 906 }, { - "epoch": 4.591392405063291, - "grad_norm": 0.2551715498481697, - "learning_rate": 8.26150421353682e-07, - "loss": 0.1074, + "epoch": 4.489113924050633, + "grad_norm": 0.3344555372523651, + "learning_rate": 1.264824565160716e-06, + "loss": 0.2802, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01069591473788023, - "step": 1134, - "valid_targets_mean": 3888.2, - "valid_targets_min": 1501 + "loss_rank_avg": 0.1601177453994751, + "step": 4435, + "valid_targets_mean": 5832.6, + "valid_targets_min": 1133 }, { - "epoch": 4.595443037974683, - "grad_norm": 0.24467054996033144, - "learning_rate": 8.101405277100549e-07, - "loss": 0.1145, + "epoch": 4.494177215189874, + "grad_norm": 0.31338693723543865, + "learning_rate": 1.2402118767953342e-06, + "loss": 0.2851, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012983351945877075, - "step": 1135, - "valid_targets_mean": 4395.8, - "valid_targets_min": 983 + "loss_rank_avg": 0.11089809238910675, + "step": 4440, + "valid_targets_mean": 3433.5, + "valid_targets_min": 1255 }, { - "epoch": 4.599493670886076, - "grad_norm": 0.23952615604287592, - "learning_rate": 7.942840756714077e-07, - "loss": 0.1089, + "epoch": 4.499240506329114, + "grad_norm": 0.32257737557743127, + "learning_rate": 1.2158333568440183e-06, + "loss": 0.2943, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011914948001503944, - "step": 1136, - "valid_targets_mean": 3921.2, - "valid_targets_min": 1482 + "loss_rank_avg": 0.19272515177726746, + "step": 4445, + "valid_targets_mean": 5173.0, + "valid_targets_min": 1505 }, { - "epoch": 4.603544303797468, - "grad_norm": 0.24743934229288847, - "learning_rate": 7.785811920255093e-07, - "loss": 0.1117, + "epoch": 4.504303797468355, + "grad_norm": 0.31362483355013254, + "learning_rate": 1.1916893096108063e-06, + "loss": 0.2946, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01661062426865101, - "step": 1137, - "valid_targets_mean": 4857.1, - "valid_targets_min": 915 + "loss_rank_avg": 0.1500798463821411, + "step": 4450, + "valid_targets_mean": 4663.4, + "valid_targets_min": 1370 }, { - "epoch": 4.6075949367088604, - "grad_norm": 0.243337849066111, - "learning_rate": 7.630320023322024e-07, - "loss": 0.1168, + "epoch": 4.509367088607595, + "grad_norm": 0.3261715632472016, + "learning_rate": 1.167780036472952e-06, + "loss": 0.2959, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009833608753979206, - "step": 1138, - "valid_targets_mean": 3292.8, - "valid_targets_min": 639 + "loss_rank_avg": 0.13341718912124634, + "step": 4455, + "valid_targets_mean": 3426.2, + "valid_targets_min": 1364 }, { - "epoch": 4.611645569620253, - "grad_norm": 0.24721697537601303, - "learning_rate": 7.476366309223903e-07, - "loss": 0.1118, + "epoch": 4.514430379746836, + "grad_norm": 0.30597584635072983, + "learning_rate": 1.1441058358771317e-06, + "loss": 0.2888, "loss_nan_ranks": 0, - "loss_rank_avg": 0.008912472054362297, - "step": 1139, - "valid_targets_mean": 2869.4, - "valid_targets_min": 1178 + "loss_rank_avg": 0.16293524205684662, + "step": 4460, + "valid_targets_mean": 4787.8, + "valid_targets_min": 1378 }, { - "epoch": 4.6156962025316455, - "grad_norm": 0.24031897463539212, - "learning_rate": 7.323952008970537e-07, - "loss": 0.1111, + "epoch": 4.519493670886076, + "grad_norm": 0.2817037355554941, + "learning_rate": 1.1206670033357537e-06, + "loss": 0.2906, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013830740936100483, - "step": 1140, - "valid_targets_mean": 4143.6, - "valid_targets_min": 1243 + "loss_rank_avg": 0.15130957961082458, + "step": 4465, + "valid_targets_mean": 5285.4, + "valid_targets_min": 1543 }, { - "epoch": 4.619746835443038, - "grad_norm": 0.2470119895734213, - "learning_rate": 7.173078341262618e-07, - "loss": 0.1109, + "epoch": 4.524556962025317, + "grad_norm": 0.31694092622996994, + "learning_rate": 1.0974638314232355e-06, + "loss": 0.2894, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010928130708634853, - "step": 1141, - "valid_targets_mean": 3865.4, - "valid_targets_min": 1214 + "loss_rank_avg": 0.15193693339824677, + "step": 4470, + "valid_targets_mean": 4457.0, + "valid_targets_min": 1065 }, { - "epoch": 4.6237974683544305, - "grad_norm": 0.2409629382375663, - "learning_rate": 7.023746512481989e-07, - "loss": 0.1121, + "epoch": 4.529620253164557, + "grad_norm": 0.3521199068866329, + "learning_rate": 1.074496609772384e-06, + "loss": 0.2957, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011756358668208122, - "step": 1142, - "valid_targets_mean": 3866.8, - "valid_targets_min": 745 + "loss_rank_avg": 0.12703898549079895, + "step": 4475, + "valid_targets_mean": 3699.0, + "valid_targets_min": 1328 }, { - "epoch": 4.627848101265823, - "grad_norm": 0.23363037887291022, - "learning_rate": 6.875957716681902e-07, - "loss": 0.1119, + "epoch": 4.534683544303798, + "grad_norm": 0.33021283919153493, + "learning_rate": 1.0517656250707575e-06, + "loss": 0.2989, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01830505207180977, - "step": 1143, - "valid_targets_mean": 5421.4, - "valid_targets_min": 995 + "loss_rank_avg": 0.17758193612098694, + "step": 4480, + "valid_targets_mean": 5269.1, + "valid_targets_min": 1336 }, { - "epoch": 4.6318987341772155, - "grad_norm": 0.24180508844289644, - "learning_rate": 6.729713135577687e-07, - "loss": 0.1075, + "epoch": 4.539746835443038, + "grad_norm": 0.29305233423571464, + "learning_rate": 1.0292711610570904e-06, + "loss": 0.2799, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009786557406187057, - "step": 1144, - "valid_targets_mean": 3802.4, - "valid_targets_min": 904 + "loss_rank_avg": 0.17428819835186005, + "step": 4485, + "valid_targets_mean": 6173.5, + "valid_targets_min": 1377 }, { - "epoch": 4.635949367088608, - "grad_norm": 0.23052849419457697, - "learning_rate": 6.585013938537143e-07, - "loss": 0.105, + "epoch": 4.544810126582279, + "grad_norm": 0.28819969743794543, + "learning_rate": 1.007013498517766e-06, + "loss": 0.2909, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01294481847435236, - "step": 1145, - "valid_targets_mean": 4412.9, - "valid_targets_min": 1270 + "loss_rank_avg": 0.11772993206977844, + "step": 4490, + "valid_targets_mean": 4622.6, + "valid_targets_min": 1404 }, { - "epoch": 4.64, - "grad_norm": 0.2309435146050416, - "learning_rate": 6.441861282571115e-07, - "loss": 0.1088, + "epoch": 4.549873417721519, + "grad_norm": 0.28436809731542273, + "learning_rate": 9.849929152832937e-07, + "loss": 0.2917, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014942819252610207, - "step": 1146, - "valid_targets_mean": 5691.0, - "valid_targets_min": 1458 + "loss_rank_avg": 0.1377575397491455, + "step": 4495, + "valid_targets_mean": 4425.2, + "valid_targets_min": 1292 }, { - "epoch": 4.644050632911393, - "grad_norm": 0.23113695571145013, - "learning_rate": 6.300256312324471e-07, - "loss": 0.1049, + "epoch": 4.55493670886076, + "grad_norm": 0.3150381879092353, + "learning_rate": 9.63209686224853e-07, + "loss": 0.293, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015250153839588165, - "step": 1147, - "valid_targets_mean": 5221.1, - "valid_targets_min": 1262 + "loss_rank_avg": 0.13312697410583496, + "step": 4500, + "valid_targets_mean": 3467.1, + "valid_targets_min": 826 }, { - "epoch": 4.648101265822785, - "grad_norm": 0.23932963878845193, - "learning_rate": 6.160200160066776e-07, - "loss": 0.1175, + "epoch": 4.5600000000000005, + "grad_norm": 0.2911980506543601, + "learning_rate": 9.416640832508572e-07, + "loss": 0.2899, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013173839077353477, - "step": 1148, - "valid_targets_mean": 4475.6, - "valid_targets_min": 1448 + "loss_rank_avg": 0.16458618640899658, + "step": 4505, + "valid_targets_mean": 5330.3, + "valid_targets_min": 980 }, { - "epoch": 4.652151898734177, - "grad_norm": 0.2604647410402613, - "learning_rate": 6.021693945683327e-07, - "loss": 0.1078, + "epoch": 4.56506329113924, + "grad_norm": 0.32761330476726724, + "learning_rate": 9.203563753035527e-07, + "loss": 0.3005, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012512031942605972, - "step": 1149, - "valid_targets_mean": 3842.2, - "valid_targets_min": 1361 + "loss_rank_avg": 0.15134479105472565, + "step": 4510, + "valid_targets_mean": 4789.5, + "valid_targets_min": 1240 }, { - "epoch": 4.65620253164557, - "grad_norm": 0.2523420469817438, - "learning_rate": 5.884738776666044e-07, - "loss": 0.1102, + "epoch": 4.570126582278481, + "grad_norm": 0.27610230646497064, + "learning_rate": 8.992868283556833e-07, + "loss": 0.3023, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013408460654318333, - "step": 1150, - "valid_targets_mean": 4382.5, - "valid_targets_min": 1186 + "loss_rank_avg": 0.14713215827941895, + "step": 4515, + "valid_targets_mean": 5446.5, + "valid_targets_min": 810 }, { - "epoch": 4.660253164556962, - "grad_norm": 0.2435123644982836, - "learning_rate": 5.749335748104878e-07, - "loss": 0.117, + "epoch": 4.575189873417721, + "grad_norm": 0.33737396503280603, + "learning_rate": 8.78455705407144e-07, + "loss": 0.2892, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013424522243440151, - "step": 1151, - "valid_targets_mean": 4520.6, - "valid_targets_min": 1361 + "loss_rank_avg": 0.15195101499557495, + "step": 4520, + "valid_targets_mean": 4126.9, + "valid_targets_min": 866 }, { - "epoch": 4.664303797468355, - "grad_norm": 0.2381173828939779, - "learning_rate": 5.615485942678778e-07, - "loss": 0.1154, + "epoch": 4.580253164556962, + "grad_norm": 0.421939799277528, + "learning_rate": 8.578632664817177e-07, + "loss": 0.2998, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01639263704419136, - "step": 1152, - "valid_targets_mean": 5444.4, - "valid_targets_min": 1579 + "loss_rank_avg": 0.15855969488620758, + "step": 4525, + "valid_targets_mean": 4059.2, + "valid_targets_min": 1162 }, { - "epoch": 4.668354430379747, - "grad_norm": 0.23270087042794557, - "learning_rate": 5.483190430647289e-07, - "loss": 0.1072, + "epoch": 4.585316455696202, + "grad_norm": 0.30679616072425525, + "learning_rate": 8.375097686238187e-07, + "loss": 0.3003, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01745782047510147, - "step": 1153, - "valid_targets_mean": 6098.2, - "valid_targets_min": 1126 + "loss_rank_avg": 0.15983693301677704, + "step": 4530, + "valid_targets_mean": 4688.2, + "valid_targets_min": 844 }, { - "epoch": 4.672405063291139, - "grad_norm": 0.24265278992980324, - "learning_rate": 5.352450269841747e-07, - "loss": 0.108, + "epoch": 4.590379746835443, + "grad_norm": 0.33783626379593873, + "learning_rate": 8.173954658952854e-07, + "loss": 0.2979, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014374315738677979, - "step": 1154, - "valid_targets_mean": 4428.9, - "valid_targets_min": 1319 + "loss_rank_avg": 0.13029304146766663, + "step": 4535, + "valid_targets_mean": 3584.9, + "valid_targets_min": 1132 }, { - "epoch": 4.676455696202532, - "grad_norm": 0.22989885206535504, - "learning_rate": 5.22326650565701e-07, - "loss": 0.1107, + "epoch": 4.595443037974683, + "grad_norm": 0.409725960177412, + "learning_rate": 7.975206093722176e-07, + "loss": 0.3019, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01609790325164795, - "step": 1155, - "valid_targets_mean": 4971.1, - "valid_targets_min": 1240 + "loss_rank_avg": 0.1639317274093628, + "step": 4540, + "valid_targets_mean": 4393.8, + "valid_targets_min": 981 }, { - "epoch": 4.680506329113924, - "grad_norm": 0.22750941745027642, - "learning_rate": 5.095640171043048e-07, - "loss": 0.1066, + "epoch": 4.600506329113924, + "grad_norm": 0.34883872064169896, + "learning_rate": 7.778854471418306e-07, + "loss": 0.2912, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009911238215863705, - "step": 1156, - "valid_targets_mean": 3801.1, - "valid_targets_min": 1198 + "loss_rank_avg": 0.1438419073820114, + "step": 4545, + "valid_targets_mean": 3598.3, + "valid_targets_min": 1015 }, { - "epoch": 4.684556962025317, - "grad_norm": 0.24098958521920144, - "learning_rate": 4.969572286496594e-07, - "loss": 0.1119, + "epoch": 4.605569620253164, + "grad_norm": 0.3163445155831429, + "learning_rate": 7.584902242993708e-07, + "loss": 0.2991, "loss_nan_ranks": 0, - "loss_rank_avg": 0.022853152826428413, - "step": 1157, - "valid_targets_mean": 5341.6, - "valid_targets_min": 1436 + "loss_rank_avg": 0.19004172086715698, + "step": 4550, + "valid_targets_mean": 5218.9, + "valid_targets_min": 1337 }, { - "epoch": 4.688607594936709, - "grad_norm": 0.2392105757928696, - "learning_rate": 4.845063860053079e-07, - "loss": 0.1129, + "epoch": 4.610632911392405, + "grad_norm": 0.28908020066387696, + "learning_rate": 7.393351829450379e-07, + "loss": 0.3023, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016803249716758728, - "step": 1158, - "valid_targets_mean": 5111.2, - "valid_targets_min": 1302 + "loss_rank_avg": 0.12715786695480347, + "step": 4555, + "valid_targets_mean": 4291.9, + "valid_targets_min": 1068 }, { - "epoch": 4.692658227848101, - "grad_norm": 0.24206555145149455, - "learning_rate": 4.7221158872785997e-07, - "loss": 0.1129, + "epoch": 4.6156962025316455, + "grad_norm": 0.3390758182489428, + "learning_rate": 7.204205621809813e-07, + "loss": 0.3018, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011856446042656898, - "step": 1159, - "valid_targets_mean": 4355.4, - "valid_targets_min": 1073 + "loss_rank_avg": 0.16513592004776, + "step": 4560, + "valid_targets_mean": 4141.6, + "valid_targets_min": 1241 }, { - "epoch": 4.696708860759494, - "grad_norm": 0.24045095578781692, - "learning_rate": 4.600729351261857e-07, - "loss": 0.1108, + "epoch": 4.620759493670886, + "grad_norm": 0.3707332505274512, + "learning_rate": 7.017465981083127e-07, + "loss": 0.3019, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017727192491292953, - "step": 1160, - "valid_targets_mean": 5086.9, - "valid_targets_min": 1256 + "loss_rank_avg": 0.18795087933540344, + "step": 4565, + "valid_targets_mean": 6154.2, + "valid_targets_min": 1174 }, { - "epoch": 4.700759493670886, - "grad_norm": 0.23286053027211254, - "learning_rate": 4.4809052226063355e-07, - "loss": 0.1101, + "epoch": 4.6258227848101265, + "grad_norm": 0.3431279755640469, + "learning_rate": 6.833135238241473e-07, + "loss": 0.2955, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009751537814736366, - "step": 1161, - "valid_targets_mean": 3784.0, - "valid_targets_min": 1066 + "loss_rank_avg": 0.1402522623538971, + "step": 4570, + "valid_targets_mean": 3705.2, + "valid_targets_min": 1346 }, { - "epoch": 4.704810126582278, - "grad_norm": 0.25441081024171247, - "learning_rate": 4.3626444594226046e-07, - "loss": 0.1066, + "epoch": 4.630886075949367, + "grad_norm": 0.3201150073208208, + "learning_rate": 6.651215694187074e-07, + "loss": 0.2956, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015385766513645649, - "step": 1162, - "valid_targets_mean": 4484.6, - "valid_targets_min": 1134 + "loss_rank_avg": 0.13963478803634644, + "step": 4575, + "valid_targets_mean": 4570.6, + "valid_targets_min": 1027 }, { - "epoch": 4.708860759493671, - "grad_norm": 0.24446375599635572, - "learning_rate": 4.245948007320588e-07, - "loss": 0.1175, + "epoch": 4.635949367088608, + "grad_norm": 0.32083737479519586, + "learning_rate": 6.471709619724386e-07, + "loss": 0.2811, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012111935764551163, - "step": 1163, - "valid_targets_mean": 3709.1, - "valid_targets_min": 1099 + "loss_rank_avg": 0.1721346378326416, + "step": 4580, + "valid_targets_mean": 4410.9, + "valid_targets_min": 1268 }, { - "epoch": 4.712911392405063, - "grad_norm": 0.23130476730298755, - "learning_rate": 4.130816799401971e-07, - "loss": 0.1058, + "epoch": 4.641012658227848, + "grad_norm": 0.3413968428656355, + "learning_rate": 6.294619255531853e-07, + "loss": 0.2984, "loss_nan_ranks": 0, - "loss_rank_avg": 0.008287204429507256, - "step": 1164, - "valid_targets_mean": 3054.6, - "valid_targets_min": 1392 + "loss_rank_avg": 0.1746809184551239, + "step": 4585, + "valid_targets_mean": 4543.4, + "valid_targets_min": 1503 }, { - "epoch": 4.716962025316455, - "grad_norm": 0.24435314164443975, - "learning_rate": 4.017251756252827e-07, - "loss": 0.1164, + "epoch": 4.646075949367089, + "grad_norm": 0.2984905883347286, + "learning_rate": 6.119946812133926e-07, + "loss": 0.2957, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013173110783100128, - "step": 1165, - "valid_targets_mean": 3855.7, - "valid_targets_min": 1333 + "loss_rank_avg": 0.2020062506198883, + "step": 4590, + "valid_targets_mean": 7061.8, + "valid_targets_min": 1615 }, { - "epoch": 4.721012658227848, - "grad_norm": 0.223425818597529, - "learning_rate": 3.9052537859362473e-07, - "loss": 0.1154, + "epoch": 4.651139240506329, + "grad_norm": 0.2957015403668056, + "learning_rate": 5.947694469873377e-07, + "loss": 0.3024, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012818815186619759, - "step": 1166, - "valid_targets_mean": 4592.1, - "valid_targets_min": 1370 + "loss_rank_avg": 0.1912672221660614, + "step": 4595, + "valid_targets_mean": 6352.6, + "valid_targets_min": 1115 }, { - "epoch": 4.72506329113924, - "grad_norm": 0.2345797216984834, - "learning_rate": 3.7948237839849687e-07, - "loss": 0.1077, + "epoch": 4.65620253164557, + "grad_norm": 0.3177142897029759, + "learning_rate": 5.777864378884212e-07, + "loss": 0.2905, "loss_nan_ranks": 0, - "loss_rank_avg": 0.020206928253173828, - "step": 1167, - "valid_targets_mean": 5562.1, - "valid_targets_min": 1174 + "loss_rank_avg": 0.1385997235774994, + "step": 4600, + "valid_targets_mean": 4380.5, + "valid_targets_min": 1184 }, { - "epoch": 4.729113924050633, - "grad_norm": 0.24694843961663482, - "learning_rate": 3.6859626333943797e-07, - "loss": 0.1094, + "epoch": 4.66126582278481, + "grad_norm": 0.30671392692985, + "learning_rate": 5.610458659064688e-07, + "loss": 0.3067, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011120930314064026, - "step": 1168, - "valid_targets_mean": 4122.5, - "valid_targets_min": 1409 + "loss_rank_avg": 0.17495407164096832, + "step": 4605, + "valid_targets_mean": 5155.4, + "valid_targets_min": 1128 }, { - "epoch": 4.733164556962025, - "grad_norm": 0.23916710669800414, - "learning_rate": 3.578671204615347e-07, - "loss": 0.1155, + "epoch": 4.666329113924051, + "grad_norm": 0.32460623503028924, + "learning_rate": 5.445479400051046e-07, + "loss": 0.2997, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010612422600388527, - "step": 1169, - "valid_targets_mean": 3807.1, - "valid_targets_min": 1177 + "loss_rank_avg": 0.1435496062040329, + "step": 4610, + "valid_targets_mean": 4002.2, + "valid_targets_min": 1103 }, { - "epoch": 4.737215189873417, - "grad_norm": 0.23534285201891775, - "learning_rate": 3.472950355547267e-07, - "loss": 0.117, + "epoch": 4.671392405063291, + "grad_norm": 0.3077758754022733, + "learning_rate": 5.282928661191178e-07, + "loss": 0.283, "loss_nan_ranks": 0, - "loss_rank_avg": 0.017033474519848824, - "step": 1170, - "valid_targets_mean": 5603.9, - "valid_targets_min": 1779 + "loss_rank_avg": 0.11725576967000961, + "step": 4615, + "valid_targets_mean": 3599.3, + "valid_targets_min": 1270 }, { - "epoch": 4.74126582278481, - "grad_norm": 0.21909731195941826, - "learning_rate": 3.3688009315312244e-07, - "loss": 0.1056, + "epoch": 4.676455696202532, + "grad_norm": 0.2920943225927082, + "learning_rate": 5.12280847151918e-07, + "loss": 0.2933, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01086464338004589, - "step": 1171, - "valid_targets_mean": 4267.8, - "valid_targets_min": 1056 + "loss_rank_avg": 0.15268352627754211, + "step": 4620, + "valid_targets_mean": 4974.8, + "valid_targets_min": 1238 }, { - "epoch": 4.745316455696202, - "grad_norm": 0.24388805731338395, - "learning_rate": 3.266223765343224e-07, - "loss": 0.11, + "epoch": 4.681518987341772, + "grad_norm": 0.3310902814879114, + "learning_rate": 4.965120829729819e-07, + "loss": 0.2944, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015588557347655296, - "step": 1172, - "valid_targets_mean": 4333.6, - "valid_targets_min": 1172 + "loss_rank_avg": 0.1577180027961731, + "step": 4625, + "valid_targets_mean": 4892.1, + "valid_targets_min": 1480 }, { - "epoch": 4.749367088607595, - "grad_norm": 0.22955891283346133, - "learning_rate": 3.16521967718757e-07, - "loss": 0.1115, + "epoch": 4.686582278481013, + "grad_norm": 0.3364738036743787, + "learning_rate": 4.809867704153726e-07, + "loss": 0.2962, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016997043043375015, - "step": 1173, - "valid_targets_mean": 5823.1, - "valid_targets_min": 1068 + "loss_rank_avg": 0.11956533789634705, + "step": 4630, + "valid_targets_mean": 3546.2, + "valid_targets_min": 914 }, { - "epoch": 4.7534177215189874, - "grad_norm": 0.23474462024092194, - "learning_rate": 3.0657894746902726e-07, - "loss": 0.098, + "epoch": 4.6916455696202535, + "grad_norm": 0.34774374236202665, + "learning_rate": 4.657051032732707e-07, + "loss": 0.3098, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0072157541289925575, - "step": 1174, - "valid_targets_mean": 2820.7, - "valid_targets_min": 986 + "loss_rank_avg": 0.14325565099716187, + "step": 4635, + "valid_targets_mean": 3353.1, + "valid_targets_min": 908 }, { - "epoch": 4.7574683544303795, - "grad_norm": 0.23196623024831636, - "learning_rate": 2.9679339528925654e-07, - "loss": 0.1042, + "epoch": 4.696708860759494, + "grad_norm": 0.32445866672285933, + "learning_rate": 4.506672722995609e-07, + "loss": 0.2953, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01756441406905651, - "step": 1175, - "valid_targets_mean": 5168.0, - "valid_targets_min": 956 + "loss_rank_avg": 0.1779376119375229, + "step": 4640, + "valid_targets_mean": 5084.9, + "valid_targets_min": 1254 }, { - "epoch": 4.7615189873417725, - "grad_norm": 0.2459616528038731, - "learning_rate": 2.871653894244597e-07, - "loss": 0.108, + "epoch": 4.7017721518987345, + "grad_norm": 0.3370837185705519, + "learning_rate": 4.358734652034624e-07, + "loss": 0.3029, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010087177157402039, - "step": 1176, - "valid_targets_mean": 3164.2, - "valid_targets_min": 1003 + "loss_rank_avg": 0.12637917697429657, + "step": 4645, + "valid_targets_mean": 3544.5, + "valid_targets_min": 1283 }, { - "epoch": 4.7655696202531646, - "grad_norm": 0.23554609620846492, - "learning_rate": 2.7769500685991266e-07, - "loss": 0.1085, + "epoch": 4.706835443037974, + "grad_norm": 0.3141292209016846, + "learning_rate": 4.2132386664815783e-07, + "loss": 0.3063, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013887660577893257, - "step": 1177, - "valid_targets_mean": 4648.5, - "valid_targets_min": 876 + "loss_rank_avg": 0.13325795531272888, + "step": 4650, + "valid_targets_mean": 4623.6, + "valid_targets_min": 1232 }, { - "epoch": 4.769620253164557, - "grad_norm": 0.23193207006204014, - "learning_rate": 2.683823233205485e-07, - "loss": 0.1046, + "epoch": 4.711898734177215, + "grad_norm": 0.3052040124075767, + "learning_rate": 4.070186582485214e-07, + "loss": 0.2944, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012032478116452694, - "step": 1178, - "valid_targets_mean": 3907.8, - "valid_targets_min": 1086 + "loss_rank_avg": 0.14848539233207703, + "step": 4655, + "valid_targets_mean": 3974.2, + "valid_targets_min": 1752 }, { - "epoch": 4.77367088607595, - "grad_norm": 0.2510058757439523, - "learning_rate": 2.592274132703287e-07, - "loss": 0.1052, + "epoch": 4.716962025316455, + "grad_norm": 0.3485398015143201, + "learning_rate": 3.9295801856882307e-07, + "loss": 0.3033, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014164448715746403, - "step": 1179, - "valid_targets_mean": 4645.2, - "valid_targets_min": 998 + "loss_rank_avg": 0.15560302138328552, + "step": 4660, + "valid_targets_mean": 3853.7, + "valid_targets_min": 1331 }, { - "epoch": 4.777721518987342, - "grad_norm": 0.23175267801304952, - "learning_rate": 2.502303499116754e-07, - "loss": 0.1069, + "epoch": 4.722025316455696, + "grad_norm": 0.30603263342163534, + "learning_rate": 3.791421231205217e-07, + "loss": 0.2941, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009768212214112282, - "step": 1180, - "valid_targets_mean": 3876.7, - "valid_targets_min": 967 + "loss_rank_avg": 0.1302764117717743, + "step": 4665, + "valid_targets_mean": 4759.7, + "valid_targets_min": 980 }, { - "epoch": 4.781772151898734, - "grad_norm": 0.24370100149211857, - "learning_rate": 2.413912051848666e-07, - "loss": 0.0988, + "epoch": 4.727088607594936, + "grad_norm": 0.36206047089808735, + "learning_rate": 3.6557114436005516e-07, + "loss": 0.2953, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009045436978340149, - "step": 1181, - "valid_targets_mean": 3097.1, - "valid_targets_min": 1008 + "loss_rank_avg": 0.11317434906959534, + "step": 4670, + "valid_targets_mean": 2750.8, + "valid_targets_min": 1281 }, { - "epoch": 4.785822784810127, - "grad_norm": 0.2370454588564179, - "learning_rate": 2.3271004976746613e-07, - "loss": 0.1159, + "epoch": 4.732151898734177, + "grad_norm": 0.31924377751088473, + "learning_rate": 3.522452516867048e-07, + "loss": 0.3074, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011587612330913544, - "step": 1182, - "valid_targets_mean": 3934.2, - "valid_targets_min": 1371 + "loss_rank_avg": 0.19149929285049438, + "step": 4675, + "valid_targets_mean": 5680.5, + "valid_targets_min": 1070 }, { - "epoch": 4.789873417721519, - "grad_norm": 0.23815121483817714, - "learning_rate": 2.2418695307376393e-07, - "loss": 0.1128, + "epoch": 4.737215189873417, + "grad_norm": 0.40142669703450146, + "learning_rate": 3.391646114404701e-07, + "loss": 0.3025, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014157026074826717, - "step": 1183, - "valid_targets_mean": 4409.0, - "valid_targets_min": 1453 + "loss_rank_avg": 0.1676904857158661, + "step": 4680, + "valid_targets_mean": 5601.9, + "valid_targets_min": 1777 }, { - "epoch": 4.793924050632912, - "grad_norm": 0.23783821363379468, - "learning_rate": 2.1582198325420968e-07, - "loss": 0.1133, + "epoch": 4.742278481012658, + "grad_norm": 0.33298593772917773, + "learning_rate": 3.263293868999928e-07, + "loss": 0.2974, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010271416045725346, - "step": 1184, - "valid_targets_mean": 3274.3, - "valid_targets_min": 1086 + "loss_rank_avg": 0.1185358390212059, + "step": 4685, + "valid_targets_mean": 2993.5, + "valid_targets_min": 1244 }, { - "epoch": 4.797974683544304, - "grad_norm": 0.23229256280874674, - "learning_rate": 2.0761520719488447e-07, - "loss": 0.0997, + "epoch": 4.7473417721518985, + "grad_norm": 0.28590139808721005, + "learning_rate": 3.1373973828052964e-07, + "loss": 0.2946, "loss_nan_ranks": 0, - "loss_rank_avg": 0.006490450352430344, - "step": 1185, - "valid_targets_mean": 2632.8, - "valid_targets_min": 1157 + "loss_rank_avg": 0.1383609175682068, + "step": 4690, + "valid_targets_mean": 4685.9, + "valid_targets_min": 1897 }, { - "epoch": 4.802025316455696, - "grad_norm": 0.24419609664439176, - "learning_rate": 1.9956669051694798e-07, - "loss": 0.1104, + "epoch": 4.752405063291139, + "grad_norm": 0.33045699934667616, + "learning_rate": 3.013958227319358e-07, + "loss": 0.2952, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012339601293206215, - "step": 1186, - "valid_targets_mean": 3850.8, - "valid_targets_min": 1617 + "loss_rank_avg": 0.128380686044693, + "step": 4695, + "valid_targets_mean": 3661.8, + "valid_targets_min": 1102 }, { - "epoch": 4.806075949367089, - "grad_norm": 0.24897996532180455, - "learning_rate": 1.916764975761254e-07, - "loss": 0.1164, + "epoch": 4.7574683544303795, + "grad_norm": 0.3175505591380564, + "learning_rate": 2.892977943367159e-07, + "loss": 0.2774, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012995798140764236, - "step": 1187, - "valid_targets_mean": 3230.2, - "valid_targets_min": 688 + "loss_rank_avg": 0.19775532186031342, + "step": 4700, + "valid_targets_mean": 5166.0, + "valid_targets_min": 954 }, { - "epoch": 4.810126582278481, - "grad_norm": 0.24236818776265567, - "learning_rate": 1.839446914621923e-07, - "loss": 0.1144, + "epoch": 4.76253164556962, + "grad_norm": 0.2773796894575858, + "learning_rate": 2.774458041080963e-07, + "loss": 0.2875, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01723683439195156, - "step": 1188, - "valid_targets_mean": 5158.0, - "valid_targets_min": 1305 + "loss_rank_avg": 0.12263042479753494, + "step": 4705, + "valid_targets_mean": 4538.4, + "valid_targets_min": 1559 }, { - "epoch": 4.814177215189874, - "grad_norm": 0.23727219276751094, - "learning_rate": 1.7637133399845962e-07, - "loss": 0.1071, + "epoch": 4.767594936708861, + "grad_norm": 0.3292357564608315, + "learning_rate": 2.6583999998814e-07, + "loss": 0.2953, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012703627347946167, - "step": 1189, - "valid_targets_mean": 4795.9, - "valid_targets_min": 1235 + "loss_rank_avg": 0.14571279287338257, + "step": 4710, + "valid_targets_mean": 3984.7, + "valid_targets_min": 1195 }, { - "epoch": 4.818227848101266, - "grad_norm": 0.24391400967421373, - "learning_rate": 1.6895648574129397e-07, - "loss": 0.1121, + "epoch": 4.772658227848101, + "grad_norm": 0.3368831623026044, + "learning_rate": 2.544805268459016e-07, + "loss": 0.2867, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01454693078994751, - "step": 1190, - "valid_targets_mean": 4082.4, - "valid_targets_min": 1370 + "loss_rank_avg": 0.21459759771823883, + "step": 4715, + "valid_targets_mean": 4702.3, + "valid_targets_min": 1266 }, { - "epoch": 4.822278481012658, - "grad_norm": 0.24677221947667136, - "learning_rate": 1.6170020597962465e-07, - "loss": 0.1078, + "epoch": 4.777721518987342, + "grad_norm": 0.335601220973268, + "learning_rate": 2.4336752647561304e-07, + "loss": 0.2845, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01703988015651703, - "step": 1191, - "valid_targets_mean": 4989.6, - "valid_targets_min": 1047 + "loss_rank_avg": 0.14298905432224274, + "step": 4720, + "valid_targets_mean": 3874.7, + "valid_targets_min": 965 }, { - "epoch": 4.826329113924051, - "grad_norm": 0.23896025862435885, - "learning_rate": 1.5460255273447078e-07, - "loss": 0.1125, + "epoch": 4.782784810126582, + "grad_norm": 0.28413880966924987, + "learning_rate": 2.3250113759492266e-07, + "loss": 0.2862, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015895096585154533, - "step": 1192, - "valid_targets_mean": 5156.8, - "valid_targets_min": 957 + "loss_rank_avg": 0.1618804931640625, + "step": 4725, + "valid_targets_mean": 5582.7, + "valid_targets_min": 868 }, { - "epoch": 4.830379746835443, - "grad_norm": 0.23823056699462464, - "learning_rate": 1.4766358275847715e-07, - "loss": 0.1047, + "epoch": 4.787848101265823, + "grad_norm": 0.3021482607745952, + "learning_rate": 2.2188149584315922e-07, + "loss": 0.3055, "loss_nan_ranks": 0, - "loss_rank_avg": 0.007428309414535761, - "step": 1193, - "valid_targets_mean": 2911.4, - "valid_targets_min": 1249 + "loss_rank_avg": 0.15998998284339905, + "step": 4730, + "valid_targets_mean": 4565.1, + "valid_targets_min": 1149 }, { - "epoch": 4.834430379746835, - "grad_norm": 0.2247744160174395, - "learning_rate": 1.4088335153546573e-07, - "loss": 0.1202, + "epoch": 4.792911392405063, + "grad_norm": 0.3011882542828061, + "learning_rate": 2.1150873377963954e-07, + "loss": 0.3021, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01684490591287613, - "step": 1194, - "valid_targets_mean": 5795.6, - "valid_targets_min": 1354 + "loss_rank_avg": 0.1401754915714264, + "step": 4735, + "valid_targets_mean": 5010.8, + "valid_targets_min": 998 }, { - "epoch": 4.838481012658228, - "grad_norm": 0.2272848805922521, - "learning_rate": 1.3426191327998496e-07, - "loss": 0.1122, + "epoch": 4.797974683544304, + "grad_norm": 0.3507567997328214, + "learning_rate": 2.0138298088201004e-07, + "loss": 0.2854, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015583060681819916, - "step": 1195, - "valid_targets_mean": 5315.5, - "valid_targets_min": 1446 + "loss_rank_avg": 0.0675700232386589, + "step": 4740, + "valid_targets_mean": 2630.8, + "valid_targets_min": 1155 }, { - "epoch": 4.84253164556962, - "grad_norm": 0.24493188674129462, - "learning_rate": 1.277993209368744e-07, - "loss": 0.106, + "epoch": 4.803037974683544, + "grad_norm": 0.3187910514753291, + "learning_rate": 1.915043635446323e-07, + "loss": 0.2916, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011442995630204678, - "step": 1196, - "valid_targets_mean": 3736.6, - "valid_targets_min": 1506 + "loss_rank_avg": 0.12245633453130722, + "step": 4745, + "valid_targets_mean": 3394.0, + "valid_targets_min": 1403 }, { - "epoch": 4.846582278481012, - "grad_norm": 0.2382142438109627, - "learning_rate": 1.214956261808542e-07, - "loss": 0.1069, + "epoch": 4.808101265822785, + "grad_norm": 0.2967133708531598, + "learning_rate": 1.8187300507701345e-07, + "loss": 0.3035, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013297231867909431, - "step": 1197, - "valid_targets_mean": 4526.4, - "valid_targets_min": 1470 + "loss_rank_avg": 0.1302807629108429, + "step": 4750, + "valid_targets_mean": 3881.6, + "valid_targets_min": 1033 }, { - "epoch": 4.850632911392405, - "grad_norm": 0.2264295628229462, - "learning_rate": 1.1535087941609846e-07, - "loss": 0.1076, + "epoch": 4.813164556962025, + "grad_norm": 0.30044714660435673, + "learning_rate": 1.7248902570225378e-07, + "loss": 0.2941, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012269329279661179, - "step": 1198, - "valid_targets_mean": 4186.8, - "valid_targets_min": 1153 + "loss_rank_avg": 0.13116371631622314, + "step": 4755, + "valid_targets_mean": 4054.7, + "valid_targets_min": 886 }, { - "epoch": 4.854683544303797, - "grad_norm": 0.2307526058124495, - "learning_rate": 1.0936512977583802e-07, - "loss": 0.1121, + "epoch": 4.818227848101266, + "grad_norm": 0.3161774057323668, + "learning_rate": 1.6335254255555933e-07, + "loss": 0.2925, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010853808373212814, - "step": 1199, - "valid_targets_mean": 3549.2, - "valid_targets_min": 1597 + "loss_rank_avg": 0.14581556618213654, + "step": 4760, + "valid_targets_mean": 4080.4, + "valid_targets_min": 1368 }, { - "epoch": 4.85873417721519, - "grad_norm": 0.23868588844023084, - "learning_rate": 1.0353842512196732e-07, - "loss": 0.113, + "epoch": 4.8232911392405065, + "grad_norm": 0.30034489171760914, + "learning_rate": 1.5446366968276283e-07, + "loss": 0.2973, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009980385191738605, - "step": 1200, - "valid_targets_mean": 3069.8, - "valid_targets_min": 1044 + "loss_rank_avg": 0.1464047133922577, + "step": 4765, + "valid_targets_mean": 4359.5, + "valid_targets_min": 1093 }, { - "epoch": 4.862784810126582, - "grad_norm": 0.23795196443442018, - "learning_rate": 9.787081204466253e-08, - "loss": 0.1183, + "epoch": 4.828354430379747, + "grad_norm": 0.30396263421785596, + "learning_rate": 1.4582251803892055e-07, + "loss": 0.2898, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015351405367255211, - "step": 1201, - "valid_targets_mean": 4512.4, - "valid_targets_min": 1229 + "loss_rank_avg": 0.14488472044467926, + "step": 4770, + "valid_targets_mean": 4854.4, + "valid_targets_min": 956 }, { - "epoch": 4.866835443037974, - "grad_norm": 0.25403745028440267, - "learning_rate": 9.236233586200627e-08, - "loss": 0.1181, + "epoch": 4.8334177215189875, + "grad_norm": 0.25886849889886787, + "learning_rate": 1.3742919548691114e-07, + "loss": 0.2973, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01124050747603178, - "step": 1202, - "valid_targets_mean": 3405.4, - "valid_targets_min": 812 + "loss_rank_avg": 0.13551469147205353, + "step": 4775, + "valid_targets_mean": 5292.5, + "valid_targets_min": 1085 }, { - "epoch": 4.870886075949367, - "grad_norm": 0.23091009126628026, - "learning_rate": 8.70130406196279e-08, - "loss": 0.1053, + "epoch": 4.838481012658228, + "grad_norm": 0.3369536866060149, + "learning_rate": 1.2928380679609442e-07, + "loss": 0.2957, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015791581943631172, - "step": 1203, - "valid_targets_mean": 5422.2, - "valid_targets_min": 1240 + "loss_rank_avg": 0.20701755583286285, + "step": 4780, + "valid_targets_mean": 5313.5, + "valid_targets_min": 1444 }, { - "epoch": 4.874936708860759, - "grad_norm": 0.24714675801931965, - "learning_rate": 8.182296909035047e-08, - "loss": 0.1097, + "epoch": 4.843544303797469, + "grad_norm": 0.2981209270612857, + "learning_rate": 1.2138645364101032e-07, + "loss": 0.2933, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013445362448692322, - "step": 1204, - "valid_targets_mean": 3916.1, - "valid_targets_min": 1263 + "loss_rank_avg": 0.11797007918357849, + "step": 4785, + "valid_targets_mean": 4077.7, + "valid_targets_min": 1058 }, { - "epoch": 4.878987341772152, - "grad_norm": 0.23238400031620596, - "learning_rate": 7.679216277384439e-08, - "loss": 0.1089, + "epoch": 4.848607594936709, + "grad_norm": 0.2923056085103413, + "learning_rate": 1.1373723460009756e-07, + "loss": 0.2866, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013845035806298256, - "step": 1205, - "valid_targets_mean": 4798.1, - "valid_targets_min": 1292 + "loss_rank_avg": 0.18645049631595612, + "step": 4790, + "valid_targets_mean": 6275.9, + "valid_targets_min": 1356 }, { - "epoch": 4.883037974683544, - "grad_norm": 0.2433693133823994, - "learning_rate": 7.19206618963053e-08, - "loss": 0.1141, + "epoch": 4.85367088607595, + "grad_norm": 0.31520564975094073, + "learning_rate": 1.0633624515447027e-07, + "loss": 0.3001, "loss_nan_ranks": 0, - "loss_rank_avg": 0.008713947609066963, - "step": 1206, - "valid_targets_mean": 3317.1, - "valid_targets_min": 1387 + "loss_rank_avg": 0.1347823590040207, + "step": 4795, + "valid_targets_mean": 3709.9, + "valid_targets_min": 1143 }, { - "epoch": 4.8870886075949365, - "grad_norm": 0.24241122111292934, - "learning_rate": 6.720850541012347e-08, - "loss": 0.1089, + "epoch": 4.85873417721519, + "grad_norm": 0.330282900217264, + "learning_rate": 9.918357768673004e-08, + "loss": 0.3003, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013348542153835297, - "step": 1207, - "valid_targets_mean": 4618.2, - "valid_targets_min": 1733 + "loss_rank_avg": 0.10554465651512146, + "step": 4800, + "valid_targets_mean": 3067.8, + "valid_targets_min": 1042 }, { - "epoch": 4.891139240506329, - "grad_norm": 0.2304352158805518, - "learning_rate": 6.265573099357714e-08, - "loss": 0.1072, + "epoch": 4.863797468354431, + "grad_norm": 0.34394007944621313, + "learning_rate": 9.22793214797979e-08, + "loss": 0.3043, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011430079117417336, - "step": 1208, - "valid_targets_mean": 4107.3, - "valid_targets_min": 1089 + "loss_rank_avg": 0.1265249103307724, + "step": 4805, + "valid_targets_mean": 3206.9, + "valid_targets_min": 972 }, { - "epoch": 4.8951898734177215, - "grad_norm": 0.24145238610331443, - "learning_rate": 5.826237505053067e-08, - "loss": 0.108, + "epoch": 4.868860759493671, + "grad_norm": 0.3066301892656282, + "learning_rate": 8.562356271582194e-08, + "loss": 0.3032, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011784245260059834, - "step": 1209, - "valid_targets_mean": 4086.8, - "valid_targets_min": 1417 + "loss_rank_avg": 0.11144056916236877, + "step": 4810, + "valid_targets_mean": 3448.4, + "valid_targets_min": 1079 }, { - "epoch": 4.899240506329114, - "grad_norm": 0.2450341203448637, - "learning_rate": 5.4028472710137e-08, - "loss": 0.1136, + "epoch": 4.873924050632912, + "grad_norm": 0.31898550298505707, + "learning_rate": 7.92163844750804e-08, + "loss": 0.2921, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014564326032996178, - "step": 1210, - "valid_targets_mean": 4164.2, - "valid_targets_min": 1538 + "loss_rank_avg": 0.17169725894927979, + "step": 4815, + "valid_targets_mean": 4453.8, + "valid_targets_min": 1086 }, { - "epoch": 4.9032911392405065, - "grad_norm": 0.24354061349344405, - "learning_rate": 4.9954057826571146e-08, - "loss": 0.1142, + "epoch": 4.878987341772152, + "grad_norm": 0.35072783756221276, + "learning_rate": 7.305786673495796e-08, + "loss": 0.298, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011334458366036415, - "step": 1211, - "valid_targets_mean": 4018.5, - "valid_targets_min": 1525 + "loss_rank_avg": 0.1834912747144699, + "step": 4820, + "valid_targets_mean": 4796.1, + "valid_targets_min": 1290 }, { - "epoch": 4.907341772151899, - "grad_norm": 0.2315542455299904, - "learning_rate": 4.6039162978743776e-08, - "loss": 0.1104, + "epoch": 4.884050632911393, + "grad_norm": 0.30086828019346806, + "learning_rate": 6.714808636893999e-08, + "loss": 0.2984, "loss_nan_ranks": 0, - "loss_rank_avg": 0.0195978581905365, - "step": 1212, - "valid_targets_mean": 6231.7, - "valid_targets_min": 1774 + "loss_rank_avg": 0.19445079565048218, + "step": 4825, + "valid_targets_mean": 5488.9, + "valid_targets_min": 1013 }, { - "epoch": 4.911392405063291, - "grad_norm": 0.23313479210470323, - "learning_rate": 4.228381947004812e-08, - "loss": 0.1116, + "epoch": 4.889113924050633, + "grad_norm": 0.29066248451169346, + "learning_rate": 6.148711714565992e-08, + "loss": 0.2906, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014019984751939774, - "step": 1213, - "valid_targets_mean": 4620.9, - "valid_targets_min": 1882 + "loss_rank_avg": 0.14921297132968903, + "step": 4830, + "valid_targets_mean": 5289.1, + "valid_targets_min": 1279 }, { - "epoch": 4.915443037974684, - "grad_norm": 0.22855727314511912, - "learning_rate": 3.868805732811343e-08, - "loss": 0.1112, + "epoch": 4.894177215189873, + "grad_norm": 0.3206808977051651, + "learning_rate": 5.6075029727971075e-08, + "loss": 0.294, "loss_nan_ranks": 0, - "loss_rank_avg": 0.007369753904640675, - "step": 1214, - "valid_targets_mean": 3527.6, - "valid_targets_min": 1183 + "loss_rank_avg": 0.15516558289527893, + "step": 4835, + "valid_targets_mean": 4284.6, + "valid_targets_min": 1465 }, { - "epoch": 4.919493670886076, - "grad_norm": 0.25236996871942824, - "learning_rate": 3.525190530455636e-08, - "loss": 0.1108, + "epoch": 4.899240506329114, + "grad_norm": 0.34136663563954944, + "learning_rate": 5.091189167207189e-08, + "loss": 0.3011, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01704399660229683, - "step": 1215, - "valid_targets_mean": 5038.8, - "valid_targets_min": 1361 + "loss_rank_avg": 0.16188789904117584, + "step": 4840, + "valid_targets_mean": 4162.2, + "valid_targets_min": 1536 }, { - "epoch": 4.923544303797469, - "grad_norm": 0.2339802304650147, - "learning_rate": 3.197539087475887e-08, - "loss": 0.1088, + "epoch": 4.904303797468354, + "grad_norm": 0.2892291641651244, + "learning_rate": 4.5997767426653183e-08, + "loss": 0.2969, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014040106907486916, - "step": 1216, - "valid_targets_mean": 5526.5, - "valid_targets_min": 1119 + "loss_rank_avg": 0.11494030058383942, + "step": 4845, + "valid_targets_mean": 3805.9, + "valid_targets_min": 950 }, { - "epoch": 4.927594936708861, - "grad_norm": 0.243821254252586, - "learning_rate": 2.8858540237641786e-08, - "loss": 0.1023, + "epoch": 4.909367088607595, + "grad_norm": 0.3310728562870818, + "learning_rate": 4.133271833210772e-08, + "loss": 0.3063, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01630534417927265, - "step": 1217, - "valid_targets_mean": 4622.2, - "valid_targets_min": 1320 + "loss_rank_avg": 0.15285596251487732, + "step": 4850, + "valid_targets_mean": 4150.3, + "valid_targets_min": 1408 }, { - "epoch": 4.931645569620253, - "grad_norm": 0.23514977499476192, - "learning_rate": 2.5901378315458247e-08, - "loss": 0.1164, + "epoch": 4.914430379746835, + "grad_norm": 0.29329622685622975, + "learning_rate": 3.6916802619746395e-08, + "loss": 0.2958, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015973731875419617, - "step": 1218, - "valid_targets_mean": 5489.3, - "valid_targets_min": 941 + "loss_rank_avg": 0.16643556952476501, + "step": 4855, + "valid_targets_mean": 5756.4, + "valid_targets_min": 1791 }, { - "epoch": 4.935696202531646, - "grad_norm": 0.23066847016906347, - "learning_rate": 2.3103928753598347e-08, - "loss": 0.1082, + "epoch": 4.919493670886076, + "grad_norm": 0.3795378130128926, + "learning_rate": 3.2750075411087654e-08, + "loss": 0.2983, "loss_nan_ranks": 0, - "loss_rank_avg": 0.014494219794869423, - "step": 1219, - "valid_targets_mean": 4435.1, - "valid_targets_min": 1143 + "loss_rank_avg": 0.20965784788131714, + "step": 4860, + "valid_targets_mean": 5036.8, + "valid_targets_min": 1359 }, { - "epoch": 4.939746835443038, - "grad_norm": 0.23599418339991424, - "learning_rate": 2.0466213920393718e-08, - "loss": 0.1134, + "epoch": 4.924556962025316, + "grad_norm": 0.3227285897594828, + "learning_rate": 2.8832588717164766e-08, + "loss": 0.2875, "loss_nan_ranks": 0, - "loss_rank_avg": 0.009911555796861649, - "step": 1220, - "valid_targets_mean": 4003.4, - "valid_targets_min": 1057 + "loss_rank_avg": 0.13055765628814697, + "step": 4865, + "valid_targets_mean": 3481.6, + "valid_targets_min": 1608 }, { - "epoch": 4.943797468354431, - "grad_norm": 0.23839899590308894, - "learning_rate": 1.7988254906942115e-08, - "loss": 0.1136, + "epoch": 4.929620253164557, + "grad_norm": 0.30074655411206147, + "learning_rate": 2.516439143786631e-08, + "loss": 0.3002, "loss_nan_ranks": 0, - "loss_rank_avg": 0.016164466738700867, - "step": 1221, - "valid_targets_mean": 5077.0, - "valid_targets_min": 1211 + "loss_rank_avg": 0.17739662528038025, + "step": 4870, + "valid_targets_mean": 5336.9, + "valid_targets_min": 786 }, { - "epoch": 4.947848101265823, - "grad_norm": 0.23916470486920938, - "learning_rate": 1.5670071526936447e-08, - "loss": 0.1126, + "epoch": 4.934683544303797, + "grad_norm": 0.37988249749008485, + "learning_rate": 2.1745529361343333e-08, + "loss": 0.2951, "loss_nan_ranks": 0, - "loss_rank_avg": 0.01405949704349041, - "step": 1222, - "valid_targets_mean": 4898.8, - "valid_targets_min": 1266 + "loss_rank_avg": 0.18349409103393555, + "step": 4875, + "valid_targets_mean": 5208.7, + "valid_targets_min": 1284 }, { - "epoch": 4.951898734177215, - "grad_norm": 0.21741608069325183, - "learning_rate": 1.3511682316509344e-08, - "loss": 0.1014, + "epoch": 4.939746835443038, + "grad_norm": 0.3022387792298933, + "learning_rate": 1.857604516342315e-08, + "loss": 0.2971, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011473024263978004, - "step": 1223, - "valid_targets_mean": 4689.2, - "valid_targets_min": 1033 + "loss_rank_avg": 0.1349787563085556, + "step": 4880, + "valid_targets_mean": 4001.4, + "valid_targets_min": 1055 }, { - "epoch": 4.955949367088608, - "grad_norm": 0.2292728694270739, - "learning_rate": 1.1513104534086605e-08, - "loss": 0.1031, + "epoch": 4.944810126582278, + "grad_norm": 0.3239022765036362, + "learning_rate": 1.5655978407085326e-08, + "loss": 0.3022, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012841494753956795, - "step": 1224, - "valid_targets_mean": 4275.0, - "valid_targets_min": 1314 + "loss_rank_avg": 0.15572325885295868, + "step": 4885, + "valid_targets_mean": 4256.0, + "valid_targets_min": 1282 }, { - "epoch": 4.96, - "grad_norm": 0.25041866378749583, - "learning_rate": 9.67435416023843e-09, - "loss": 0.1033, + "epoch": 4.949873417721519, + "grad_norm": 0.3484022919664308, + "learning_rate": 1.2985365541959838e-08, + "loss": 0.2871, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018508005887269974, - "step": 1225, - "valid_targets_mean": 4550.4, - "valid_targets_min": 1057 + "loss_rank_avg": 0.1123068630695343, + "step": 4890, + "valid_targets_mean": 3383.1, + "valid_targets_min": 1353 }, { - "epoch": 4.964050632911392, - "grad_norm": 0.22277492122517895, - "learning_rate": 7.995445897566178e-09, - "loss": 0.1046, + "epoch": 4.954936708860759, + "grad_norm": 0.2766076122909068, + "learning_rate": 1.0564239903885221e-08, + "loss": 0.298, "loss_nan_ranks": 0, - "loss_rank_avg": 0.010599160566926003, - "step": 1226, - "valid_targets_mean": 4327.2, - "valid_targets_min": 1313 + "loss_rank_avg": 0.13588979840278625, + "step": 4895, + "valid_targets_mean": 5022.2, + "valid_targets_min": 1687 }, { - "epoch": 4.968101265822785, - "grad_norm": 0.2367509043949318, - "learning_rate": 6.476393170578021e-09, - "loss": 0.1001, + "epoch": 4.96, + "grad_norm": 0.3637031857594984, + "learning_rate": 8.392631714477794e-09, + "loss": 0.2958, "loss_nan_ranks": 0, - "loss_rank_avg": 0.011691408231854439, - "step": 1227, - "valid_targets_mean": 4577.2, - "valid_targets_min": 936 + "loss_rank_avg": 0.22151219844818115, + "step": 4900, + "valid_targets_mean": 4548.4, + "valid_targets_min": 1055 }, { - "epoch": 4.972151898734177, - "grad_norm": 0.22551297933814682, - "learning_rate": 5.11720812557348e-09, - "loss": 0.1036, + "epoch": 4.9650632911392405, + "grad_norm": 0.3070617643850501, + "learning_rate": 6.470568080760853e-09, + "loss": 0.2957, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013229941949248314, - "step": 1228, - "valid_targets_mean": 5726.6, - "valid_targets_min": 869 + "loss_rank_avg": 0.1599656343460083, + "step": 4905, + "valid_targets_mean": 5380.0, + "valid_targets_min": 696 }, { - "epoch": 4.976202531645569, - "grad_norm": 0.23734494557382904, - "learning_rate": 3.917901630563492e-09, - "loss": 0.1112, + "epoch": 4.970126582278481, + "grad_norm": 0.2982099706698035, + "learning_rate": 4.798072994827152e-09, + "loss": 0.2819, "loss_nan_ranks": 0, - "loss_rank_avg": 0.022276364266872406, - "step": 1229, - "valid_targets_mean": 6153.0, - "valid_targets_min": 1226 + "loss_rank_avg": 0.13907426595687866, + "step": 4910, + "valid_targets_mean": 4511.9, + "valid_targets_min": 1282 }, { - "epoch": 4.980253164556962, - "grad_norm": 0.23363612704383607, - "learning_rate": 2.878483275170485e-09, - "loss": 0.1163, + "epoch": 4.975189873417722, + "grad_norm": 0.34819560371210506, + "learning_rate": 3.3751673335458147e-09, + "loss": 0.2899, "loss_nan_ranks": 0, - "loss_rank_avg": 0.015504706650972366, - "step": 1230, - "valid_targets_mean": 5061.0, - "valid_targets_min": 1139 + "loss_rank_avg": 0.14076785743236542, + "step": 4915, + "valid_targets_mean": 3422.9, + "valid_targets_min": 1383 }, { - "epoch": 4.984303797468354, - "grad_norm": 0.2305284497602762, - "learning_rate": 1.998961370555108e-09, - "loss": 0.1054, + "epoch": 4.980253164556962, + "grad_norm": 0.3120620502259134, + "learning_rate": 2.2018688582803315e-09, + "loss": 0.2976, "loss_nan_ranks": 0, - "loss_rank_avg": 0.007713980041444302, - "step": 1231, - "valid_targets_mean": 3291.6, - "valid_targets_min": 930 + "loss_rank_avg": 0.17987266182899475, + "step": 4920, + "valid_targets_mean": 5285.9, + "valid_targets_min": 1137 }, { - "epoch": 4.988354430379747, - "grad_norm": 0.22672850532808966, - "learning_rate": 1.2793429493518361e-09, - "loss": 0.1142, + "epoch": 4.985316455696203, + "grad_norm": 0.28626460706538365, + "learning_rate": 1.2781922146931635e-09, + "loss": 0.3007, "loss_nan_ranks": 0, - "loss_rank_avg": 0.013615965843200684, - "step": 1232, - "valid_targets_mean": 5076.8, - "valid_targets_min": 1208 + "loss_rank_avg": 0.15345998108386993, + "step": 4925, + "valid_targets_mean": 5331.3, + "valid_targets_min": 781 }, { - "epoch": 4.992405063291139, - "grad_norm": 0.2394791229613049, - "learning_rate": 7.196337656112384e-10, - "loss": 0.103, + "epoch": 4.990379746835443, + "grad_norm": 0.30214318111472815, + "learning_rate": 6.041489325459004e-10, + "loss": 0.2843, "loss_nan_ranks": 0, - "loss_rank_avg": 0.012944923713803291, - "step": 1233, - "valid_targets_mean": 3943.1, - "valid_targets_min": 1136 + "loss_rank_avg": 0.14609292149543762, + "step": 4930, + "valid_targets_mean": 4398.6, + "valid_targets_min": 1460 }, { - "epoch": 4.996455696202531, - "grad_norm": 0.24313900796577423, - "learning_rate": 3.198382947511292e-10, - "loss": 0.1081, + "epoch": 4.995443037974684, + "grad_norm": 0.36710406768986703, + "learning_rate": 1.7974742556159386e-10, + "loss": 0.2945, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018745126202702522, - "step": 1234, - "valid_targets_mean": 5020.2, - "valid_targets_min": 1195 + "loss_rank_avg": 0.17454470694065094, + "step": 4935, + "valid_targets_mean": 4667.4, + "valid_targets_min": 949 }, { "epoch": 5.0, - "grad_norm": 0.2701508029879809, - "learning_rate": 7.995973352770137e-11, - "loss": 0.1094, + "grad_norm": 0.42711913533675244, + "learning_rate": 4.992991313734763e-12, + "loss": 0.2954, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018037570640444756, - "step": 1235, - "valid_targets_mean": 4350.2, - "valid_targets_min": 878 + "loss_rank_avg": 0.29739144444465637, + "step": 4940, + "valid_targets_mean": 4348.2, + "valid_targets_min": 876 }, { "epoch": 5.0, "loss_nan_ranks": 0, - "loss_rank_avg": 0.018037570640444756, - "step": 1235, - "total_flos": 2.10289015818027e+18, - "train_loss": 0.2609484052790804, - "train_runtime": 28006.4544, - "train_samples_per_second": 5.642, - "train_steps_per_second": 0.044, - "valid_targets_mean": 4350.2, - "valid_targets_min": 878 + "loss_rank_avg": 0.29739144444465637, + "step": 4940, + "total_flos": 1.3775029894633226e+18, + "train_loss": 0.31707094306887884, + "train_runtime": 34760.1149, + "train_samples_per_second": 4.545, + "train_steps_per_second": 0.142, + "valid_targets_mean": 4348.2, + "valid_targets_min": 876 } ], - "logging_steps": 1, - "max_steps": 1235, + "logging_steps": 5, + "max_steps": 4940, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, @@ -13619,13 +10902,13 @@ "should_epoch_stop": false, "should_evaluate": false, "should_log": false, - "should_save": true, - "should_training_stop": true + "should_save": false, + "should_training_stop": false }, "attributes": {} } }, - "total_flos": 2.10289015818027e+18, + "total_flos": 1.3775029894633226e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null